Skip to content
Snippets Groups Projects
Commit 64e4f64e authored by Dipam Chakraborty's avatar Dipam Chakraborty
Browse files

initial commit

parents
No related branches found
No related merge requests found
__pycache__
\ No newline at end of file
# TODO
from typing import List, Dict
class DummyResponseAgent(object):
def __init__(self):
""" Load your model(s) here """
pass
def generate_responses(self, test_data: List[Dict]) -> List[str]:
"""
You will be provided with a batch of upto 50 independent conversations
Return a string for every conversation
"""
return ["THIS IS A TEST REPLY" for _ in test_data]
\ No newline at end of file
from agents.dummy_agent import DummyResponseAgent
UserAgent = DummyResponseAgent
\ No newline at end of file
{
"challenge_id": "task-1-commonsense-dialogue-response-generation",
"authors": [
"aicrowd-bot"
],
"description": "(optional) description about your awesome agent"
}
\ No newline at end of file
git
\ No newline at end of file
This diff is collapsed.
from typing import List, Dict
import json
import numpy as np
from metrics import word_f1, bleu
from agents.user_config import UserAgent
def load_json_data(file_path: str, keys: List[str], modifications: dict = None) -> List[Dict]:
with open(file_path, "r") as fp:
data = json.load(fp)
result = []
for dialogue in data:
updated_dialogue = {}
for turn_id, sample in dialogue.items():
sample_data = {key: sample[key] for key in keys}
if modifications:
for key, value in modifications.items():
sample_data[key] = value(sample_data[key])
updated_dialogue[turn_id] = sample_data
result.append(updated_dialogue)
return result
def load_data(file_path: str) -> List[Dict]:
keys = ["persona A", "persona B", "dialogue", "gold_reference"]
modifications = {"dialogue": lambda x: x[:-1]}
return load_json_data(file_path, keys, modifications)
def get_responses(agent, test_data, BATCH_SIZE):
all_responses = [{} for _ in range(len(test_data))]
for batch_idx in np.array_split(range(len(test_data)), BATCH_SIZE):
for turn_id in range(7):
batch_inputs = [test_data[i][f"turn_{turn_id}"] for i in batch_idx]
responses = agent.generate_responses(batch_inputs)
for resp in responses:
for bi in batch_idx:
all_responses[bi][f"turn_{turn_id}"] = resp
return all_responses
def evaluate(responses, test_data):
f1_scores = []
bleu_scores = []
for response, test_data_single in zip(responses, test_data):
for turn_id in range(7):
f1 = word_f1(response[f"turn_{turn_id}"],
[test_data_single[f"turn_{turn_id}"]['gold_reference']])
bleu_score = bleu(response[f"turn_{turn_id}"],
[test_data_single[f"turn_{turn_id}"]['gold_reference']])
f1_scores.append(f1)
bleu_scores.append(bleu_score)
return np.mean(f1_scores), np.mean(bleu_scores)
if __name__ == "__main__":
BATCH_SIZE = 2
data_path = 'dummy_data_task1.json'
test_data = load_data(data_path)
agent = UserAgent()
responses = get_responses(agent, test_data, BATCH_SIZE)
f1_score, bleu_score = evaluate(responses, test_data)
print("Word F1 Score:", f1_score)
print("Word Bleu Score:", bleu_score)
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
from collections import Counter
from nltk.translate import bleu_score as nltkbleu
import re
from typing import (
List,
Tuple,
)
re_art = re.compile(r'\b(a|an|the)\b')
re_punc = re.compile(r'[!"#$%&()*+,-./:;<=>?@\[\]\\^`{|}~_\']')
def normalize_answer(s):
"""
Lower text and remove punctuation, articles and extra whitespace.
"""
s = s.lower()
s = re_punc.sub(' ', s)
s = re_art.sub(' ', s)
s = ' '.join(s.split())
return s
def _word_prec_recall_f1_score(pred_items, gold_items) -> Tuple[float, float, float]:
"""
Compute precision, recall and f1 given a set of gold and prediction items.
:param pred_items: iterable of predicted values
:param gold_items: iterable of gold values
:return: tuple (p, r, f1) for precision, recall, f1
"""
common = Counter(gold_items) & Counter(pred_items)
num_same = sum(common.values())
if num_same == 0:
return 0, 0, 0
precision = 1.0 * num_same / len(pred_items)
recall = 1.0 * num_same / len(gold_items)
f1 = (2 * precision * recall) / (precision + recall)
return precision, recall, f1
def word_f1(pred_label: str, gold_labels: List[str], expose_p_and_r: bool = False) -> float:
if pred_label is None or gold_labels is None:
return 0
g_tokens = normalize_answer(pred_label).split()
scores = [
_word_prec_recall_f1_score(g_tokens, normalize_answer(a).split())
for a in gold_labels
]
max_p, max_r, max_f1 = 0, 0, 0
for p, r, f1 in scores:
max_p, max_r, max_f1 = max(max_p, p), max(max_r, r), max(f1, max_f1)
if expose_p_and_r:
return max_p, max_r, max_f1
else:
return max_f1
def bleu(guess: str, answers: List[str], k: int = 4) -> float:
# cumulative K-gram BLEU score, 4 by default.
weights = [1 / k for _ in range(k)]
score = nltkbleu.sentence_bleu(
[normalize_answer(a).split(" ") for a in answers],
normalize_answer(guess).split(" "),
smoothing_function=nltkbleu.SmoothingFunction(epsilon=1e-12).method1,
weights=weights,
)
return score
\ No newline at end of file
numpy
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment