diff --git a/models/dummy_model.py b/models/dummy_model.py index a5bf388078f6cf35f418e4c033b3e691abdd9632..9cf969e4597db2a0258d56dad3958893eb52a91a 100644 --- a/models/dummy_model.py +++ b/models/dummy_model.py @@ -23,7 +23,7 @@ AICROWD_RUN_SEED = int(os.getenv("AICROWD_RUN_SEED", 42*4096)) # VLLM Parameters -VLLM_TENSOR_PARALLEL_SIZE = 4 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. +VLLM_TENSOR_PARALLEL_SIZE = 1 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. VLLM_GPU_MEMORY_UTILIZATION = 0.96 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. AICROWD_SUBMISSION_BATCH_SIZE = VLLM_TENSOR_PARALLEL_SIZE*4 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. @@ -57,7 +57,7 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): ) # initialize the model with vllm - self.system_prompt = "You are a helpful and multilingual online shopping assistant. You can understand and respond to user queries in English, German, Italian, French, Japanese, Spanish, Chinese. You are knowledgeable about various products. NOTE:ONLY OUTPUT THE ANSWER!!\n\n" + self.system_prompt = "You are a helpful and multilingual online shopping assistant. You can understand and respond to user queries in English, German, Italian, French, Japanese, Spanish, Portuguese, Arabic, Hebrew, Korean, Chinese. You are knowledgeable about various products and adept at providing detailed information, recommendations, and assistance. Respond concisely and accurately to enhance the shopping experience. NOTE: ONLY OUTPUT THE ANSWER!!" self.llm = vllm.LLM( self.model_name, @@ -72,7 +72,7 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): self.faiss = self.load_rag_module(faiss_index_path="./models/index_0703_v1.2data.ivf") self.faiss_retrieve_topk = 7 - self.faiss_score_filter = 0.882 + self.faiss_score_filter = 0.885 self.retrieve_task_description = "Given a online shopping user query, retrieve relevant Question-Answer that similar (type of task ,languages involved and product) to the query." @@ -253,16 +253,19 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): else: prompt_example = '## Now answer the Question:' + prompt - messages = [ - {"role": "system", "content": self.system_prompt}, - {"role": "user", "content": prompt_example} - ] - chat_prompt = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - tokenize=False, - # return_tensors="pt" - ) + if is_multiple_choice: + chat_prompt = self.system_prompt + prompt_example + else: + messages = [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": prompt_example} + ] + chat_prompt = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False, + # return_tensors="pt" + ) # if "llama" in self.model_name.lower(): # chat_prompt = chat_prompt[len(self.tokenizer.bos_token):] # vllm tokenize will also add bos token formatted_prompts.append(chat_prompt)