From 3fd2dac2ea6722b0e556d9379082e40612e3bb9e Mon Sep 17 00:00:00 2001 From: yilun_jin <jyl.jal123@gmail.com> Date: Sat, 23 Mar 2024 20:39:33 +0000 Subject: [PATCH] Update parsers.py --- parsers.py | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/parsers.py b/parsers.py index f3cbe60..a29ef43 100644 --- a/parsers.py +++ b/parsers.py @@ -80,8 +80,11 @@ class ShoppingBenchTaskParsers: An integer representing the selected option. Returns -1 if the parsing fails due to an invalid response format. """ + response = response.strip() + if response == '': + return -1 try: - return int(response.strip()[0]) + return int(response[0]) except ValueError: return -1 @@ -108,7 +111,10 @@ class ShoppingBenchTaskParsers: for item in cleaned_response.split(","): try: # Attempt to convert each item to an integer and add it to the list. - ranked_items.append(int(item)) + int_item = int(item) + if int_item > 5: + continue + ranked_items.append(int_item) except ValueError: pass # Skip non-numeric items. @@ -191,12 +197,13 @@ class ShoppingBenchTaskParsers: ) except (SyntaxError, ValueError): # Fallback: split the string by commas and strip whitespace. - return [entity.strip() for entity in response.split(",")] + # we remove empty entities. it will not cause bug, just an implementation choice. + return [entity.strip() for entity in response.split(",") if entity.strip() != ''] if __name__ == "__main__": # Example usage of the ShoppingBenchTaskParsers class for various task types. - + # MULTICHOICE EXAMPLE multic_choice_parser = ShoppingBenchTaskParsers("multichoice") print("Multichoice Example:") @@ -204,8 +211,15 @@ if __name__ == "__main__": print( multic_choice_parser.parse("a") ) # Expected output (failure case): -1 + print(multic_choice_parser.parse('\n')) # Should be -1 + print(multic_choice_parser.parse(' ')) # Should also be -1 + print(multic_choice_parser.parse(' 2')) # Should be 2 + print(multic_choice_parser.parse('\n1')) # Should be 1 + print(multic_choice_parser.parse('\n 3')) # Should be 3 + print(multic_choice_parser.parse('\n ')) # Should be -1 print() - + # MULTI CHOICE EXAMPLE TEST COMPLETE + # RANKING EXAMPLE ranking_parser = ShoppingBenchTaskParsers("ranking") print("Ranking Example:") @@ -221,17 +235,28 @@ if __name__ == "__main__": print( ranking_parser.parse("1, 4, 5, aicrowd, 6") ) # Expected output: [1, 4, 5, 6] # remove alphanumeric chars + print( + ranking_parser.parse('\n')) # Should be empty list + print(ranking_parser.parse(' \n')) + print(ranking_parser.parse('The answer is: 1, 2, 3, 4, 5')) # Should be 1, 2, 3, 4, 5 + print(ranking_parser.parse(',1,2,3,4,5')) # Should be 1, 2, 3, 4, 5 + print(ranking_parser.parse('1 2'))# Should be empty list print() - + # RANKING TEST COMPLETE + # GENERATION EXAMPLE generation_parser = ShoppingBenchTaskParsers("generation") print("Generation Example:") print( generation_parser.parse("This is a generated response") ) # Expected output: 'This is a generated response.' + print(generation_parser.parse("\nThe answer is \n\n good.\n\n\n\n\n\n\n")) # Expected: The answer is \n\n good. + print(generation_parser.parse('\n \n')) # Should be nothing. print() + # GENERATION TEST COMPLETE + # RETRIEVAL EXAMPLE retrieval_parser = ShoppingBenchTaskParsers("retrieval") print("Retrieval Example:") @@ -244,12 +269,16 @@ if __name__ == "__main__": print( retrieval_parser.parse("100, 200, jjhg") ) # Expected output (removed alphhanumeric chars): [100, 200] + print(retrieval_parser.parse('100, 200, \n\n\n 300')) # Expected: [100, 200, 300] print( retrieval_parser.parse("100, 200, 300, 400") ) # Expected output (only consider first 3 elems): [100, 200, 300] - + print(retrieval_parser.parse('\n 100, 200, 300')) # Should be 100, 200, 300 + print(retrieval_parser.parse('\n \n \n')) # Should be empty list print() - + print() + # RETRIEVAL TEST COMPLETE + # NAMED ENTITY RECOGNITION EXAMPLE ner_parser = ShoppingBenchTaskParsers("named_entity_recognition") print("Named Entity Recognition Example:") @@ -264,3 +293,7 @@ if __name__ == "__main__": ) # failure case - not tolerant to [ if quotes not used # - extra '[' characters added to boundary elems]): ['[New York', 'ShopBench', 'Amazon]'] # Expected output: ['[New York', 'ShopBench', 'Amazon]'] + print(ner_parser.parse('\n, New York, ShopBench')) # Should be ['New York', 'ShopBench'] + print(ner_parser.parse(' ')) # Should be [] + print(ner_parser.parse('\n \n')) # Should be [] + -- GitLab