From 1ae54c64c349e1f63cadae75c7c20cdb3e1dfe1f Mon Sep 17 00:00:00 2001 From: der2933 <13113683246@163.com> Date: Fri, 5 Jul 2024 18:52:54 +0800 Subject: [PATCH] llama 3 0703 datav1.2 rag datav1.2 --- models/dummy_model.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/models/dummy_model.py b/models/dummy_model.py index a5bf388..9cf969e 100644 --- a/models/dummy_model.py +++ b/models/dummy_model.py @@ -23,7 +23,7 @@ AICROWD_RUN_SEED = int(os.getenv("AICROWD_RUN_SEED", 42*4096)) # VLLM Parameters -VLLM_TENSOR_PARALLEL_SIZE = 4 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. +VLLM_TENSOR_PARALLEL_SIZE = 1 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. VLLM_GPU_MEMORY_UTILIZATION = 0.96 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. AICROWD_SUBMISSION_BATCH_SIZE = VLLM_TENSOR_PARALLEL_SIZE*4 # TUNE THIS VARIABLE depending on the number of GPUs you are requesting and the size of your model. @@ -57,7 +57,7 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): ) # initialize the model with vllm - self.system_prompt = "You are a helpful and multilingual online shopping assistant. You can understand and respond to user queries in English, German, Italian, French, Japanese, Spanish, Chinese. You are knowledgeable about various products. NOTE:ONLY OUTPUT THE ANSWER!!\n\n" + self.system_prompt = "You are a helpful and multilingual online shopping assistant. You can understand and respond to user queries in English, German, Italian, French, Japanese, Spanish, Portuguese, Arabic, Hebrew, Korean, Chinese. You are knowledgeable about various products and adept at providing detailed information, recommendations, and assistance. Respond concisely and accurately to enhance the shopping experience. NOTE: ONLY OUTPUT THE ANSWER!!" self.llm = vllm.LLM( self.model_name, @@ -72,7 +72,7 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): self.faiss = self.load_rag_module(faiss_index_path="./models/index_0703_v1.2data.ivf") self.faiss_retrieve_topk = 7 - self.faiss_score_filter = 0.882 + self.faiss_score_filter = 0.885 self.retrieve_task_description = "Given a online shopping user query, retrieve relevant Question-Answer that similar (type of task ,languages involved and product) to the query." @@ -253,16 +253,19 @@ class llama3_8b_FewShot_vllm(ShopBenchBaseModel): else: prompt_example = '## Now answer the Question:' + prompt - messages = [ - {"role": "system", "content": self.system_prompt}, - {"role": "user", "content": prompt_example} - ] - chat_prompt = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - tokenize=False, - # return_tensors="pt" - ) + if is_multiple_choice: + chat_prompt = self.system_prompt + prompt_example + else: + messages = [ + {"role": "system", "content": self.system_prompt}, + {"role": "user", "content": prompt_example} + ] + chat_prompt = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False, + # return_tensors="pt" + ) # if "llama" in self.model_name.lower(): # chat_prompt = chat_prompt[len(self.tokenizer.bos_token):] # vllm tokenize will also add bos token formatted_prompts.append(chat_prompt) -- GitLab