diff --git a/local_evaluation.py b/local_evaluation.py
index 3f12a9dc68b2e669a0a36a4e86a27dcca84fc38e..4dafefe09e1e2c6dcdabc388230498adbd58b626 100644
--- a/local_evaluation.py
+++ b/local_evaluation.py
@@ -92,7 +92,7 @@ def generate_predictions(dataset_path, participant_model):
     for query_dict, query_web_search_results in tqdm(zip(qa, web_results), total=len(qa), desc="Generating Predictions"):
         query = query_dict["query"]
         prediction = participant_model.generate_answer(
-            query, query_web_search_results
+            query, query_web_search_results['search_response']
         )
         # trim prediction to 75 tokens
         prediction = trim_predictions_to_max_token_length(prediction)