Feature Request I'd like to be able to start a new <code class="no

I asked for sothing similar today <a class="issue-link js-issue-link" data-error-text=

More details on why I need this here: <a class="issue-link js-

You might use llama-cpp-agent (<a href="https://github.com/Maximilian-Winter/llama-cpp

[Feature] Ability to populate previous chat history when using chat_session() about gpt4all HOT 7 OPEN

simonw commented on September 22, 2024

[Feature] Ability to populate previous chat history when using chat_session()

from gpt4all.

Comments (7)

simonw commented on September 22, 2024

Relevant code:

gpt4all/gpt4all-bindings/python/gpt4all/gpt4all.py

Lines 534 to 603 in 2025d2d

    
           if self._history is not None: 
        
               # check if there is only one message, i.e. system prompt: 
        
               reset = len(self._history) == 1 
        
               self._history.append({"role": "user", "content": prompt}) 
        
               fct_func = self._format_chat_prompt_template.__func__  # type: ignore[attr-defined] 
        
               if fct_func is GPT4All._format_chat_prompt_template: 
        
                   if reset: 
        
                       # ingest system prompt 
        
                       # use "%1%2" and not "%1" to avoid implicit whitespace 
        
                       self.model.prompt_model(self._history[0]["content"], "%1%2", 
        
                                               empty_response_callback, 
        
                                               n_batch=n_batch, n_predict=0, reset_context=True, special=True) 
        
                   prompt_template = self._current_prompt_template.format("%1", "%2") 
        
               else: 
        
                   warnings.warn( 
        
                       "_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.", 
        
                       DeprecationWarning, 
        
                   ) 
        
                   # special tokens won't be processed 
        
                   prompt = self._format_chat_prompt_template( 
        
                       self._history[-1:], 
        
                       self._history[0]["content"] if reset else "", 
        
                   ) 
        
                   prompt_template = "%1" 
        
                   generate_kwargs["reset_context"] = reset 
        
           else: 
        
               prompt_template = "%1" 
        
               generate_kwargs["reset_context"] = True 
        
           # Prepare the callback, process the model response 
        
           output_collector: list[MessageType] 
        
           output_collector = [ 
        
               {"content": ""} 
        
           ]  # placeholder for the self._history if chat session is not activated 
        
           if self._history is not None: 
        
               self._history.append({"role": "assistant", "content": ""}) 
        
               output_collector = self._history 
        
           def _callback_wrapper( 
        
               callback: ResponseCallbackType, 
        
               output_collector: list[MessageType], 
        
           ) -> ResponseCallbackType: 
        
               def _callback(token_id: int, response: str) -> bool: 
        
                   nonlocal callback, output_collector 
        
                   output_collector[-1]["content"] += response 
        
                   return callback(token_id, response) 
        
               return _callback 
        
           # Send the request to the model 
        
           if streaming: 
        
               return self.model.prompt_model_streaming( 
        
                   prompt, 
        
                   prompt_template, 
        
                   _callback_wrapper(callback, output_collector), 
        
                   **generate_kwargs, 
        
               ) 
        
           self.model.prompt_model( 
        
               prompt, 
        
               prompt_template, 
        
               _callback_wrapper(callback, output_collector), 
        
               **generate_kwargs, 
        
           ) 
        
           return output_collector[-1]["content"]

My best guess is that self.model.prompt_model_streaming has its own internal state, which is why my attempts to manipulate the state in the outer layer are having no effect.

from gpt4all.

simonw commented on September 22, 2024

Maybe the previous tokens are accumulated in this low-level tokens C array, and that's the thing that isn't updated if you add stuff to _history?

gpt4all/gpt4all-bindings/python/gpt4all/_pyllmodel.py

Lines 53 to 70 in 2025d2d

    
           class LLModelPromptContext(ctypes.Structure): 
        
               _fields_ = [ 
        
                   ("logits", ctypes.POINTER(ctypes.c_float)), 
        
                   ("logits_size", ctypes.c_size_t), 
        
                   ("tokens", ctypes.POINTER(ctypes.c_int32)), 
        
                   ("tokens_size", ctypes.c_size_t), 
        
                   ("n_past", ctypes.c_int32), 
        
                   ("n_ctx", ctypes.c_int32), 
        
                   ("n_predict", ctypes.c_int32), 
        
                   ("top_k", ctypes.c_int32), 
        
                   ("top_p", ctypes.c_float), 
        
                   ("min_p", ctypes.c_float), 
        
                   ("temp", ctypes.c_float), 
        
                   ("n_batch", ctypes.c_int32), 
        
                   ("repeat_penalty", ctypes.c_float), 
        
                   ("repeat_last_n", ctypes.c_int32), 
        
                   ("context_erase", ctypes.c_float), 
        
               ]

from gpt4all.

woheller69 commented on September 22, 2024

I asked for sothing similar today #2358
I tried to clear() current_chat_session for a new chat without leaving the context manager but that is also being ignored.

My simple GUI: https://github.com/woheller69/gpt4all-TK-CHAT

from gpt4all.

simonw commented on September 22, 2024

Aha: spotted this which happens only if self._history is None:

gpt4all/gpt4all-bindings/python/gpt4all/gpt4all.py

Line 562 in 2025d2d

generate_kwargs["reset_context"] = True

That must be the mechanism that resets the internal token state.

from gpt4all.

simonw commented on September 22, 2024

More details on why I need this here:

simonw/llm-gpt4all#35

My LLM tool works by logging messages and responses to a SQLite database, so you can do things like this:

llm "three names for a pet pelican"
# Outputs three names
llm -c "2 more" # -c means continue previous thread
# Outputs two more names

In order to get GPT4All working correctly as a plugin for my tool I need the ability to instantiate a new model and then start a chat session with the previous context populated from my persisted SQLite version - but I can't figure out a way to do that.

from gpt4all.

woheller69 commented on September 22, 2024

You might use llama-cpp-agent (https://github.com/Maximilian-Winter/llama-cpp-agent) and llama-cpp-python instead of gpt4all.
I am also experimenting with it: https://github.com/woheller69/LLAMA_TK_CHAT/blob/main/LLAMA_TK_GUI.py

There you can do things like: self.llama_cpp_agent.chat_history.get_message_store().add_assistant_message(...)

from gpt4all.

iimez commented on September 22, 2024

The way we accomplished support for initial chat session messages in the node bindings is using fake_reply . But I think its not exposed/documented as a user facing parameter in the py bindings. It looks intentional, but idk about the exact reasoning. May wanna expose it, or add some other way to allow for that "conversation restore" functionality that encapsulates fake_reply. I believe it was initially added to allow for similar functionality in gpt4all-chat.

There might also be an alternative way to hack around it using the prompt template parameter + special=true and sending in the whole turns "pre-templated", including assistant response with n_predict=0.

from gpt4all.

[Feature] Ability to populate previous chat history when using chat_session() about gpt4all HOT 7 OPEN

Comments (7)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent

	if self._history is not None:
	# check if there is only one message, i.e. system prompt:
	reset = len(self._history) == 1
	self._history.append({"role": "user", "content": prompt})

	fct_func = self._format_chat_prompt_template.__func__ # type: ignore[attr-defined]
	if fct_func is GPT4All._format_chat_prompt_template:
	if reset:
	# ingest system prompt
	# use "%1%2" and not "%1" to avoid implicit whitespace
	self.model.prompt_model(self._history[0]["content"], "%1%2",
	empty_response_callback,
	n_batch=n_batch, n_predict=0, reset_context=True, special=True)
	prompt_template = self._current_prompt_template.format("%1", "%2")
	else:
	warnings.warn(
	"_format_chat_prompt_template is deprecated. Please use a chat session with a prompt template.",
	DeprecationWarning,
	)
	# special tokens won't be processed
	prompt = self._format_chat_prompt_template(
	self._history[-1:],
	self._history[0]["content"] if reset else "",
	)
	prompt_template = "%1"
	generate_kwargs["reset_context"] = reset
	else:
	prompt_template = "%1"
	generate_kwargs["reset_context"] = True

	# Prepare the callback, process the model response
	output_collector: list[MessageType]
	output_collector = [
	{"content": ""}
	] # placeholder for the self._history if chat session is not activated

	if self._history is not None:
	self._history.append({"role": "assistant", "content": ""})
	output_collector = self._history

	def _callback_wrapper(
	callback: ResponseCallbackType,
	output_collector: list[MessageType],
	) -> ResponseCallbackType:
	def _callback(token_id: int, response: str) -> bool:
	nonlocal callback, output_collector

	output_collector[-1]["content"] += response

	return callback(token_id, response)

	return _callback

	# Send the request to the model
	if streaming:
	return self.model.prompt_model_streaming(
	prompt,
	prompt_template,
	_callback_wrapper(callback, output_collector),
	**generate_kwargs,
	)

	self.model.prompt_model(
	prompt,
	prompt_template,
	_callback_wrapper(callback, output_collector),
	**generate_kwargs,
	)

	return output_collector[-1]["content"]

	class LLModelPromptContext(ctypes.Structure):
	_fields_ = [
	("logits", ctypes.POINTER(ctypes.c_float)),
	("logits_size", ctypes.c_size_t),
	("tokens", ctypes.POINTER(ctypes.c_int32)),
	("tokens_size", ctypes.c_size_t),
	("n_past", ctypes.c_int32),
	("n_ctx", ctypes.c_int32),
	("n_predict", ctypes.c_int32),
	("top_k", ctypes.c_int32),
	("top_p", ctypes.c_float),
	("min_p", ctypes.c_float),
	("temp", ctypes.c_float),
	("n_batch", ctypes.c_int32),
	("repeat_penalty", ctypes.c_float),
	("repeat_last_n", ctypes.c_int32),
	("context_erase", ctypes.c_float),
	]