diff --git a/aicrowd.json b/aicrowd.json index 3fe7d2e3e00e8123d64fd5bf61ef47bd8bf28ece..71fd837392a7975c5405958243c702f869e521b5 100755 --- a/aicrowd.json +++ b/aicrowd.json @@ -1,8 +1,8 @@ { "challenge_id": "esci-challenge-for-improving-product-search", - "task": "task_1_query-product_ranking", + "task": "task_2_multiclass_product_classification", "gpu": true, "authors": ["aicrowd-username"], - "description": "task_3_product_substitute_identification, task_2_multiclass_product_classification", + "description": "task_3_product_substitute_identification, task_1_query-product_ranking", "license": "MIT" } \ No newline at end of file diff --git a/utils/lgb_predict_task2.py b/utils/lgb_predict_task2.py index ad66fcf95f62e9f14c459f39053072aec2867822..c6f325981ddd688986b8a1b1cc0f26ceb42673a3 100755 --- a/utils/lgb_predict_task2.py +++ b/utils/lgb_predict_task2.py @@ -10,7 +10,7 @@ LGB_CONFIG = { "model_file": "/models/lgb-us-task-2.txt", "features": ['pred_0','pred_1','pred_2','pred_3', 'label_0','label_1','label_2','label_3', - 'query_count','is_isbn','has_isbn','fold'] + 'query_count','is_isbn','has_isbn', 'fold', 'model'] }, "jp": { "product_feat": "/models/es-jp-product-feat-remove-intersection.csv", @@ -48,11 +48,10 @@ def lgb_predict_task2(df, locale): ) temp = ( df.groupby("query")["example_id"] - .count() + .nunique() .reset_index() .rename({"example_id": "query_count"}, axis=1) ) - temp["query_count"] = temp["query_count"] // df["fold"].nunique() df = pd.merge(left=df, right=temp, on="query", how="left") # query_count df["is_isbn"] = df["product_id"].apply(lambda x: int(x[0] != "B")) # is_isbn temp = ( diff --git a/utils/lgb_predict_task3.py b/utils/lgb_predict_task3.py index 26e79b3de8ce3f2ab899d9156d782cd21adf11ae..f972a4b3097bd79002555e3ec65dcc871e3d48b4 100755 --- a/utils/lgb_predict_task3.py +++ b/utils/lgb_predict_task3.py @@ -7,7 +7,7 @@ LGB_CONFIG = { "model_file": "/models/lgb-us-task-3.txt", "features": ['pred_0','pred_1','pred_2','pred_3', 'label_0','label_1','label_2','label_3', - 'query_count','is_isbn','has_isbn','fold'] + 'query_count','is_isbn','has_isbn', 'fold', 'model'] }, "jp": { "product_feat": "/models/es-jp-product-feat-remove-intersection.csv", @@ -46,11 +46,10 @@ def lgb_predict_task3(df, locale): ) temp = ( df.groupby("query")["example_id"] - .count() + .nunique() .reset_index() .rename({"example_id": "query_count"}, axis=1) ) - temp["query_count"] = temp["query_count"] // df["fold"].nunique() df = pd.merge(left=df, right=temp, on="query", how="left") # query_count df["is_isbn"] = df["product_id"].apply(lambda x: int(x[0] != "B")) # is_isbn temp = ( diff --git a/utils/run_task2.py b/utils/run_task2.py index 3956f073a33caa4d2bf673e7fdb0826bc800184d..85001b2274b704c75960e7138900f638f869b48d 100755 --- a/utils/run_task2.py +++ b/utils/run_task2.py @@ -5,7 +5,6 @@ from .onnx_predict import onnx_predict from .lgb_predict_task2 import lgb_predict_task2 from scipy.special import softmax -# ensemble 4 models for task 2 CONFIG = { "us": [ { @@ -59,7 +58,7 @@ CONFIG = { "product_brand": 4609, "product_color_name": 3225, }, - "model": ["/models/us-bigbird-des-0-fp16.onnx", "/models/us-bigbird-des-1-fp16.onnx"], + "model": ["/models/us-bigbird-kd-0-fp16.onnx", "/models/us-bigbird-kd-1-fp16.onnx"], "product": "/models/bigbird.h5", "key": "us", "type": "bigbird", @@ -82,12 +81,12 @@ CONFIG = { "product_color_name": 2551, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "jp", "type": "mdeberta", "fold_offset": 0, - "description":False, + "description":True, }, ], "es": [ @@ -104,12 +103,12 @@ CONFIG = { "product_color_name": 6776, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "es", "type": "mdeberta", "fold_offset": 0, - "description": False, + "description": True, }, ], } @@ -155,6 +154,7 @@ class Task2Predictor(BasePredictor): pred = onnx_predict(sub_test, config) pred[list(range(4))] = softmax(pred[list(range(4))].values, 1) # type: ignore pred.columns = ["example_id", "fold"] + ["pred_0",'pred_1','pred_2','pred_3'] + pred['model'] = i onnx_pred.append(pred.copy()) progress += 0.1 register_progress(progress) diff --git a/utils/run_task3.py b/utils/run_task3.py index f5bf0b4fd0ae484569ecc26a18499a72f87d6b54..3d23762c35816a57a6e4bb69ba46133dbaa54c22 100755 --- a/utils/run_task3.py +++ b/utils/run_task3.py @@ -4,6 +4,7 @@ from .clean import DeBertaCleanV2, ESclean, JSClean # type: ignore from .onnx_predict import onnx_predict from .lgb_predict_task3 import lgb_predict_task3 from scipy.special import softmax +import time CONFIG = { "us": [ @@ -58,7 +59,7 @@ CONFIG = { "product_brand": 4609, "product_color_name": 3225, }, - "model": ["/models/us-bigbird-des-0-fp16.onnx", "/models/us-bigbird-des-1-fp16.onnx"], + "model": ["/models/us-bigbird-kd-0-fp16.onnx", "/models/us-bigbird-kd-1-fp16.onnx"], "product": "/models/bigbird.h5", "key": "us", "type": "bigbird", @@ -81,12 +82,12 @@ CONFIG = { "product_color_name": 2551, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "jp", "type": "mdeberta", "fold_offset": 0, - "description":False, + "description":True, }, ], "es": [ @@ -103,12 +104,12 @@ CONFIG = { "product_color_name": 6776, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "es", "type": "mdeberta", "fold_offset": 0, - "description": False, + "description": True, }, ], } @@ -154,6 +155,7 @@ class Task3Predictor(BasePredictor): pred = onnx_predict(sub_test, config) pred[list(range(4))] = softmax(pred[list(range(4))].values, 1) # type: ignore pred.columns = ["example_id", "fold"] + ["pred_0",'pred_1','pred_2','pred_3'] + pred['model'] = i onnx_pred.append(pred.copy()) progress += 0.1 register_progress(progress) @@ -165,4 +167,4 @@ class Task3Predictor(BasePredictor): predictions_df = pd.concat(all_output).reset_index(drop=True) print("Writing Task-3 Predictions to : ", predictions_output_path) predictions_df.to_csv(predictions_output_path, index=False, header=True) - register_progress(1) + register_progress(1) \ No newline at end of file