diff --git a/local_evaluation.py b/local_evaluation.py index 995cf2f7bebccb428fff58e346f207e294543f94..7c5eb86167ed5ce4bf48fd3dde5668228c545b71 100644 --- a/local_evaluation.py +++ b/local_evaluation.py @@ -57,6 +57,8 @@ def generate_model_outputs(data_df, model): data_df.iterrows(), total=len(data_df), desc="Generating Responses" ): is_multiple_choice = row["task_type"] == "multiple-choice" + # the 'task_type' column won't be available during evaluation, so you should use something like + # ```is_multiple_choice = row['is_multiple_choice']`` prompt = row["input_field"] model_output = model.predict(prompt, is_multiple_choice) outputs.append(model_output)