diff --git a/local_evaluation.py b/local_evaluation.py index fda45b2f9241248dfba07ee73f2eefc00f60ff60..3f12a9dc68b2e669a0a36a4e86a27dcca84fc38e 100644 --- a/local_evaluation.py +++ b/local_evaluation.py @@ -125,7 +125,7 @@ def evaluate_predictions(predictions, evaluation_model_name, openai_client): "content": f"Question: {query}\n Ground truth: {ground_truth}\n Prediction: {prediction}\n", }, ] - if prediction == "i don't know": + if prediction == "i don't know" or prediction == "i don't know.": n_miss += 1 continue if prediction == ground_truth: