diff --git a/local_evaluation.py b/local_evaluation.py
index 7dd899c9488c7d2245e6bfc6b9181fa9e24b6771..8ed8dd1740c7a0a64578c047fffe847fa98d301e 100644
--- a/local_evaluation.py
+++ b/local_evaluation.py
@@ -117,14 +117,14 @@ def evaluate(LocalEvalConfig):
                               index=instructions_df.InputInstructionWithGameID).to_dict()
     cpreds, cgt = [], []
     for instructionWithGameID, instruction_is_clear in classifier_gt.items():
-        cgt.append(int(instruction_is_clear.lower() == 'yes'))
+        cgt.append(int(instruction_is_clear.lower() == 'no'))
         pred = classifer_preds.get(instructionWithGameID, None)
         if pred is not None:
             cpreds.append(pred)
         else:
             warnings.warn(f"No prediction for instruction + game id {instructionWithGameID}")
             # if any instruction is not predicted, default value will be taken as 1
-            cpred.append(1)
+            cpred.append(0)
 
     clariq_f1_score = f1_score(y_true=cgt, y_pred=cpreds, average='macro')
 
@@ -158,4 +158,4 @@ if __name__ == "__main__":
         RANKER_RESULTS_FILE = './local-eval-ranker-results.json'
         DATA_FOLDER = './public_data'
     
-    evaluate(LocalEvalConfig)
\ No newline at end of file
+    evaluate(LocalEvalConfig)