From 37c567c5216ae38d64b059630581e089fd5feefa Mon Sep 17 00:00:00 2001
From: Dipam Chakraborty <dipamc77@gmail.com>
Date: Thu, 7 Dec 2023 20:06:26 +0530
Subject: [PATCH] add docs for prompt engineering track

---
 README.md             | 67 ++++++++++++++++++++++++++++++++++---------
 agents/README.md      | 43 +++++++++++++++++++++++++--
 agents/user_config.py |  4 +--
 3 files changed, 96 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 76c7986..2a060a8 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,21 @@ We also provide a list of other resources that may be related to this task:
 
 ![](https://lh7-us.googleusercontent.com/rJQ8H9qwj2LtKPswZCiPRD01_cD440o1zymhDPAHsNt4fQaAv9IsYerhlHJmL-2rH88t7WbDRkr2uEKg7MosnLEtWEBMdRYOr9SQD3B09Xjn5vmOMg_6hXXZBMW9uXQaHaF69aWefon64brDQuIBfjU)
 
+# GPU and Prompt Engineering Tracks
+
+We provide two separate settings for participants to choose from, the GPU track and the Prompt Engineering Track.
+
+## GPU Track
+
+In this track we provide participants with access to a single GPU with 24GB VRAM, this will allow them to fine tune and submit their own LLMs that are specific for this task.
+
+## Prompt Engineering Track
+
+In the prompt engineering track, we provide participants with access to the OpenAI API. This will allow anyone to test their prompt engineering skills with a powerful LLM and combine it with advanced etrieval based methods to generate context.
+
+## Can I participate in both tracks?
+
+Yes, anyone can participate in both tracks, the prize pool is common. The submission limits will apply to both tracks combined. See below details of how to specify the track for the submissions.
 
 #  Getting Started
 1. **Sign up** to join the competition [on the AIcrowd website](https://www.aicrowd.com/challenges/commonsense-persona-grounded-dialogue-challenge-2023/problems/task-1-commonsense-dialogue-response-generation).
@@ -90,7 +105,7 @@ We also provide a list of other resources that may be related to this task:
 
 We recommend that you place the code for all your models in the `agents/` directory (though it is not mandatory). You should implement the following
 
-- `generate_responses` - This function is called to generate the response of a conversation given two persona information.
+- `generate_responses` - This function is called to generate the response of a conversation given persona information.
 
 **Add your agent name in** `agent/user_config.py`, this is what will be used for the evaluations.
   
@@ -108,19 +123,19 @@ You can add your SSH Keys to your GitLab account by going to your profile settin
 
 2.  **Clone the repository**
 
-    ```h
+    ```bash
     git clone git@gitlab.aicrowd.com:aicrowd/challenges/commonsense-persona-grounded-dialogue-challenge-2023/commonsense-persona-grounded-dialogue-challenge-task-1-starter-kit
     ```
 
 3. **Install** competition specific dependencies!
-    ```
+    ```bash
     cd commonsense-persona-grounded-dialogue-challenge-task-1-starter-kit
     pip install -r requirements.txt
     ```
 
 4. Write your own model as described in [How to write your own model](#how-to-write-your-own-model) section.
 
-5. Test your model locally using `python local_evaluation.py`
+5. Test your model locally using `python local_evaluation.py` or `python local_evaluation_with_api.py`
 
 6. Make a submission as described in [How to make a submission](#how-to-make-a-submission) section.
 
@@ -141,13 +156,14 @@ The different files and directories have following meaning:
 
 ```
 .
-â”œâ”€â”€ aicrowd.json           # Submission meta information - like your username
-â”œâ”€â”€ apt.txt                # Linux packages to be installed inside docker image
-â”œâ”€â”€ requirements.txt       # Python packages to be installed
-â”œâ”€â”€ local_evaluation.py    # Use this to check your model evaluation flow locally
-â”œâ”€â”€ dummy_data_task1.json  # A set of dummy conversations you can use for integration testing
-â””â”€â”€ agents                 # Place your models related code here
-    â”œâ”€â”€ dummy_agent.py             # Dummy agent for example interface
+â”œâ”€â”€ aicrowd.json                    # Submission meta information - like your username
+â”œâ”€â”€ apt.txt                         # Linux packages to be installed inside docker image
+â”œâ”€â”€ requirements.txt                # Python packages to be installed
+â”œâ”€â”€ local_evaluation.py             # Use this to check your model evaluation flow locally
+â”œâ”€â”€ local_evaluation_with_api.py    # Use this to check your model evaluation flow locally
+â”œâ”€â”€ dummy_data_task1.json           # A set of dummy conversations you can use for integration testing
+â””â”€â”€ agents                          # Place your models related code here
+    â”œâ”€â”€ dummy_agent.py              # Dummy agent for example interface
     â””â”€â”€ user_config.py              # IMPORTANT: Add your agent name here
 ```
 
@@ -155,11 +171,22 @@ Finally, **you must specify an AIcrowd submission JSON in `aicrowd.json` to be s
 
 The `aicrowd.json` of each submission should contain the following content:
 
+**For GPU Track** - Set the GPU flag to true
+```json
+{
+  "challenge_id": "task-1-commonsense-dialogue-response-generation",
+  "authors": ["your-aicrowd-username"],
+  "gpu": true, 
+  "description": "(optional) description about your awesome model"
+}
+```
+
+**For Prompt Engineering Track** - Set the GPU flag to false
 ```json
 {
   "challenge_id": "task-1-commonsense-dialogue-response-generation",
   "authors": ["your-aicrowd-username"],
-  "gpu": true,
+  "gpu": false, 
   "description": "(optional) description about your awesome model"
 }
 ```
@@ -170,20 +197,32 @@ This JSON is used to map your submission to the challenge - so please remember t
 ### Evaluation Metrics
 
 
-### Time and compute constraints
+### Time, compute and api constraints
 
 You will be provided conversations with 7 turns each in `batches of upto 50 conversations`. For each batch of conversations, the first set of turns will be provided to your model. After the response is receieved the further turns of the same conversation will be provided. Each conversation will have exactly 7 turns. Your model needs to `complete all 7 responses of 50 conversations within **1 hour**`. The number of batches of conversation your model will process will vary based on the challenge round.
 
 Before running on the challenge dataset, your model will be run on the dummy data, as a sanity check. This will show up as the `convai-validation` phase on your submission pages. The dummy data will contain `5 conversations of 7 turns each`, your model needs to `complete the validation phase within **15 minutes**`.
 
+Before your model starts processing conversations, it is provided an additional time upto *5 minutes* to load models or preprocess any data if needed.
+
+## GPU Track
 Your model will be run on an AWS g5.2xlarge node. This node has **8 vCPUs, 32 GB RAM, and one Nvidia A10G GPU with 24 GB VRAM**.
 
-Before your model starts processing conversations, it is provided an additional time upto *5 minutes* to load models or preprocess any data if needed.
+## Prompt Engineering Track
+Your model will be run on an AWS m5.xlarge node. This node has *4 vCPUs, 16 GB RAM**
+
+For API usage, the following constraints will apply:
+
+* A maximum of 2 api calls per utterance is allowed.
+* Input token limit per dialog (the combined number of input tokens for 7 utterances) - 10,000 
+* Output token limit per dialog (the combined number of output tokens for 7 utterances) - 1,000
 
 ## Local Evaluation
 
 Participants can run the evaluation protocol for their model locally with or without any constraint posed by the challenge to benchmark their models privately. See `local_evaluation.py` for details. You can change it as you like, your changes to `local_evaluation.py` will **NOT** be used for the competition.
 
+To test your submissions with the prompt engineering track, please use `local_evaluation_with_api.py`
+
 ## Note about Dummy test data
 
 The file `dummy_data_task1.json` is a dummy test dataset to test your code before submission. All dialogues in the dataset based on a same pair of persona A and persona B, but the actual test dataset for evaluation is not like this and was created based on different pairs of personas.
diff --git a/agents/README.md b/agents/README.md
index b97d803..c087b33 100644
--- a/agents/README.md
+++ b/agents/README.md
@@ -1,6 +1,16 @@
 ## How to write your own agents
 
-We recommend that you place the code for all your agents  in the `agents` directory (though it is not mandatory). All your submissions should contain an Agent class. We have added dummy agent example in [`dummy_agent.py`](dummy_agent.py). The agent class should contain the `generate_responses`
+We recommend that you place the code for all your agents  in the `agents` directory (though it is not mandatory). All your submissions should contain an Agent class. We have added dummy agent example in [`dummy_agent.py`](dummy_agent.py) and a api usage example in [`prompt_agent.py`]. The agent class should contain the `generate_responses`
+
+## How to participate in GPU Track
+
+Set `"gpu": true` in `aicrowd.json`. While the gpu flag is set to true, the api will not be usable.
+
+## How to participate in Prompt Engineering Track
+
+Set `"gpu": false` in `aicrowd.json`. API usage will be enabled only when GPU is not used.
+
+## Submission details
 
 **Add your agent class name in** [`user_config.py`](user_config.py) as UserAgent
 
@@ -27,5 +37,34 @@ Input 7
     ...
     {"persona A": ..., "persona B": ... "dialogue": ... }  # conversation 50 Turn 7
 ]
-Model should return 50 responses for Turn 7
 ```
+
+## Output format
+
+The `generate_responses` function should return a dictionary with the following data
+
+```python
+   "use_api": True/False 
+   "prompts": [ list of the prompts that go as "content" to the api ]            
+   "max_generated_tokens": [ list of ints for the max generation limit on each cal l]
+   "final_responses: [ <list of strings with the final responses> ]                 
+```
+
+When passing the final responses, the use_api flag should be set to false.
+
+## How to use the API?
+
+Since the code isn't connected to the internet, we provide a passthrough to use the API by using the `generate_responses` function.
+
+In short, the output of `generate_responses` can set the `use_api` flag and pass a set of prompts. The users 
+
+These are the steps to use the API
+
+1. Set the `use_api` flag to True
+2. Create the prompts for all the utterances and pass it as a list in the `prompts` key.
+3. Specify the `max_generated_tokens` for each of the prompts, these can be different for each prompt.
+4. The evaluator will run these prompts and return the results in the `api_responses` input argument in `generate_responses`.
+
+## Which model is used for the API
+
+We use `gpt-3.5-turbo-1106` with each prompt from a `user` role.
diff --git a/agents/user_config.py b/agents/user_config.py
index 6a00cc6..f67f832 100644
--- a/agents/user_config.py
+++ b/agents/user_config.py
@@ -3,5 +3,5 @@ from agents.prompt_agent import DummyPromptAgent
 from agents.bart_agent import BARTResponseAgent
 
 # UserAgent = DummyResponseAgent
-# UserAgent = DummyPromptAgent
-UserAgent = BARTResponseAgent
\ No newline at end of file
+UserAgent = DummyPromptAgent
+# UserAgent = BARTResponseAgent
\ No newline at end of file
-- 
GitLab