diff --git a/local_evaluation.py b/local_evaluation.py index 6ab525aa7c0e467cfd29239527f932dfed9ee442..9e970e54f6f2b434dc1a8d53b6b9d456c8793b89 100644 --- a/local_evaluation.py +++ b/local_evaluation.py @@ -108,9 +108,9 @@ def evaluate_outputs(data_df, outputs, log_every_n_steps=1): per_task_metrics[task_name]["sample_score"].append(metric_score) - if row_idx % log_every_n_steps == 0: + if (row_idx + 1) % log_every_n_steps == 0: print_sample( - row_idx, model_output, ground_truth, metric, metric_score + row_idx + 1, model_output, ground_truth, metric, metric_score ) return per_task_metrics