It might be my misuse of this model, but I will show briefly my use case. I have a custom prompt, treated as a scenario(I defined upfront), this will be used as the starting prompt + prompt from Minichat for the model. And user can interact with the bot and receive a response.
Here's my sample usage based on your sample code which returns inconsistent response
` def generate_response(self, user_input, main_topic, subtopic):
# Retrieve and print the system prompt
system_prompt = self.get_prompt(main_topic, subtopic)
print("System Prompt:", system_prompt)
if system_prompt is None:
return "Prompt not found for the given topic and subtopic.", None
# Append user input to the conversation history and print it
self.conv.append_message(self.conv.roles[0], user_input)
print("Appended User Input:", user_input)
# Generate and print the conversation history prompt
conversation_prompt = self.conv.get_prompt()
print("Conversation History Prompt:", conversation_prompt)
# Combine the system prompt with the conversation history and print the combined prompt
combined_prompt = system_prompt + "\n" + conversation_prompt
print("Combined Prompt for Model:", combined_prompt)
# Generate model input IDs
input_ids = self.tokenizer([combined_prompt]).input_ids
# Generate output from the model
output_ids = self.model.generate(
torch.as_tensor(input_ids).cuda(),
do_sample=True,
temperature=0.7,
max_new_tokens=50,
)
# Decode and print the chatbot's response
output_ids = output_ids[0][len(input_ids[0]):]
response = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip()
print("Chatbot Response:", response)
# Append the chatbot's response to the conversation history
self.conv.append_message(self.conv.roles[1], response)
return response`