diff --git a/agents/bart_agent.py b/agents/bart_agent.py
index 36864f501c799866334b2a1bf0ebacd138fc32e2..6602f50a06698b2cdcc01be19809bde2e1c39855 100644
--- a/agents/bart_agent.py
+++ b/agents/bart_agent.py
@@ -74,8 +74,7 @@ class BARTResponseAgent(object):
     def generate_responses(self, test_data: List[Dict]) -> List[str]:
         """
         You will be provided with a batch of upto 50 independent conversations
-        Return a string for every conversation
-        
+
         Input 1
         [
             {"persona A": ..., "persona B": ... "dialogue": ... }, # conversation 1  Turn 1
@@ -96,9 +95,16 @@ class BARTResponseAgent(object):
 
         Note: Turn numbers will NOT be provided as input
 
+        Return a dictionary with the following format
+
+        "use_api": True/False                                                               - Note that this cannot be used when using GPU
+        "prompts": [ <list of the prompts that go as "content" to the api > ]               - Note that every call is independent and we don't use threads
+        "max_generated_tokens": [ list of ints for the max generation limit on each call]   - Note that the submission will fail if the total generation limit is exceeded
+        "final_responses: [ <list of strings with the final responses> ]                    - Only used when use_api is set to False
+
         """
 
-        all_responses = []
+        final_responses = []
 
         for conversation in tqdm(test_data):
             tensor_input_ids, tensor_attention_mask = self.prepare_tensors(conversation)
@@ -116,8 +122,13 @@ class BARTResponseAgent(object):
                     spaces_between_special_tokens=False,
                     clean_up_tokenization_spaces=False
             )[0].strip()
-            all_responses.append(response)
+            final_responses.append(response)
 
         self.turn_id = self.turn_id % 7 + 1 # Turn id goes from 1 to 7
 
-        return all_responses
+        response = {
+            "use_api": False,                                    # Ignored if GPU true is set in aicrowd.json
+            "prompts": ["" for _ in test_data],                  # Ignored if GPU true is set in aicrowd.json
+            "final_responses": final_responses
+        }
+        return response
diff --git a/agents/dummy_agent.py b/agents/dummy_agent.py
index ec6d451fcdcb60c6da20fa5fbbefd096068799c5..59235031320333b97d6c49d915093844d42f20d9 100644
--- a/agents/dummy_agent.py
+++ b/agents/dummy_agent.py
@@ -8,7 +8,6 @@ class DummyResponseAgent(object):
     def generate_responses(self, test_data: List[Dict]) -> List[str]:
         """
         You will be provided with a batch of upto 50 independent conversations
-        Return a string for every conversation
         
         Input 1
         [
@@ -30,6 +29,19 @@ class DummyResponseAgent(object):
 
         Note: Turn numbers will NOT be provided as input
 
+        Return a dictionary with the following format
+
+        "use_api": True/False                                                               - Note that this cannot be used when using GPU
+        "prompts": [ <list of the prompts that go as "content" to the api > ]               - Note that every call is independent and we don't use threads
+        "max_generated_tokens": [ list of ints for the max generation limit on each call]   - Note that the submission will fail if the total generation limit is exceeded
+        "final_responses: [ <list of strings with the final responses> ]                    - Only used when use_api is set to False
+
         """
         # print(f"{len(test_data)=}, {test_data[0].keys()=}, {len(test_data[-1]['dialogue'])}")
-        return ["THIS IS A TEST REPLY" for _ in test_data]
\ No newline at end of file
+
+        response = {
+            "use_api": False,                                    # Ignored if GPU true is set in aicrowd.json
+            "prompts": ["" for _ in test_data],                  # Ignored if GPU true is set in aicrowd.json
+            "final_responses": ["THIS IS A TEST REPLY" for _ in test_data]
+        }
+        return response
\ No newline at end of file
diff --git a/local_evaluation.py b/local_evaluation.py
index 16da38396920e49049683ce02649e675bfa12e76..cc26054251eb6b04d505fac1f312749edff1e13a 100644
--- a/local_evaluation.py
+++ b/local_evaluation.py
@@ -36,7 +36,7 @@ def get_responses(agent, test_data, BATCH_SIZE):
     for batch_idx in np.array_split(range(len(test_data)), split_size):
         for turn_id in range(7):
             batch_inputs = [test_data[i][f"turn_{turn_id}"] for i in batch_idx]
-            responses = agent.generate_responses(batch_inputs)
+            responses = agent.generate_responses(batch_inputs)['final_responses']
             for bi, resp in zip(batch_idx, responses):
                 all_responses[bi][f"turn_{turn_id}"] = resp
     return all_responses