diff --git a/models/rag_llama_baseline.py b/models/rag_llama_baseline.py index 1e1f11d9c89ed91dd447788beb07c4204765247c..0f16d3aa4e7429c3ebf19c8b335df5fc8676329b 100644 --- a/models/rag_llama_baseline.py +++ b/models/rag_llama_baseline.py @@ -126,9 +126,7 @@ class RAGModel: # Process each HTML text from the search results to extract text content. for html_text in search_results: # Parse the HTML content to extract text. - soup = BeautifulSoup( - html_text["page_result"], features="html.parser" - ) + soup = BeautifulSoup(html_text["page_result"], features="lxml") text = soup.get_text().replace("\n", "") if len(text) > 0: # Convert the text into sentences and extract their offsets. @@ -179,7 +177,7 @@ class RAGModel: # If the model fails to generate an answer, return a default response. answer = "I don't know" - # Trim the prediction to a maximum of 75 tokens (this function needs to be defined). + # Trim the prediction to a maximum of 75 tokens to meet the submission requirements. trimmed_answer = trim_predictions_to_max_token_length(answer) return trimmed_answer diff --git a/requirements.txt b/requirements.txt index 951fd3e954838fd7a88b514022677be3df327396..5fc9efd1f15725d33c6775af8f682525d0517ade 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ blingfire hf-transfer huggingface-hub loguru +lxml openai==1.13.3 sentence_transformers torch