From 7c54e6449fb76cf2ff5c9958f8cfcf9c38b36b02 Mon Sep 17 00:00:00 2001 From: wufanyou <fanyou.wu@outlook.com> Date: Wed, 13 Jul 2022 13:58:16 +0800 Subject: [PATCH] update --- aicrowd.json | 4 ++-- utils/lgb_predict_task2.py | 5 ++--- utils/lgb_predict_task3.py | 5 ++--- utils/run_task2.py | 12 ++++++------ utils/run_task3.py | 14 ++++++++------ 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/aicrowd.json b/aicrowd.json index 3fe7d2e..71fd837 100755 --- a/aicrowd.json +++ b/aicrowd.json @@ -1,8 +1,8 @@ { "challenge_id": "esci-challenge-for-improving-product-search", - "task": "task_1_query-product_ranking", + "task": "task_2_multiclass_product_classification", "gpu": true, "authors": ["aicrowd-username"], - "description": "task_3_product_substitute_identification, task_2_multiclass_product_classification", + "description": "task_3_product_substitute_identification, task_1_query-product_ranking", "license": "MIT" } \ No newline at end of file diff --git a/utils/lgb_predict_task2.py b/utils/lgb_predict_task2.py index ad66fcf..c6f3259 100755 --- a/utils/lgb_predict_task2.py +++ b/utils/lgb_predict_task2.py @@ -10,7 +10,7 @@ LGB_CONFIG = { "model_file": "/models/lgb-us-task-2.txt", "features": ['pred_0','pred_1','pred_2','pred_3', 'label_0','label_1','label_2','label_3', - 'query_count','is_isbn','has_isbn','fold'] + 'query_count','is_isbn','has_isbn', 'fold', 'model'] }, "jp": { "product_feat": "/models/es-jp-product-feat-remove-intersection.csv", @@ -48,11 +48,10 @@ def lgb_predict_task2(df, locale): ) temp = ( df.groupby("query")["example_id"] - .count() + .nunique() .reset_index() .rename({"example_id": "query_count"}, axis=1) ) - temp["query_count"] = temp["query_count"] // df["fold"].nunique() df = pd.merge(left=df, right=temp, on="query", how="left") # query_count df["is_isbn"] = df["product_id"].apply(lambda x: int(x[0] != "B")) # is_isbn temp = ( diff --git a/utils/lgb_predict_task3.py b/utils/lgb_predict_task3.py index 26e79b3..f972a4b 100755 --- a/utils/lgb_predict_task3.py +++ b/utils/lgb_predict_task3.py @@ -7,7 +7,7 @@ LGB_CONFIG = { "model_file": "/models/lgb-us-task-3.txt", "features": ['pred_0','pred_1','pred_2','pred_3', 'label_0','label_1','label_2','label_3', - 'query_count','is_isbn','has_isbn','fold'] + 'query_count','is_isbn','has_isbn', 'fold', 'model'] }, "jp": { "product_feat": "/models/es-jp-product-feat-remove-intersection.csv", @@ -46,11 +46,10 @@ def lgb_predict_task3(df, locale): ) temp = ( df.groupby("query")["example_id"] - .count() + .nunique() .reset_index() .rename({"example_id": "query_count"}, axis=1) ) - temp["query_count"] = temp["query_count"] // df["fold"].nunique() df = pd.merge(left=df, right=temp, on="query", how="left") # query_count df["is_isbn"] = df["product_id"].apply(lambda x: int(x[0] != "B")) # is_isbn temp = ( diff --git a/utils/run_task2.py b/utils/run_task2.py index 3956f07..85001b2 100755 --- a/utils/run_task2.py +++ b/utils/run_task2.py @@ -5,7 +5,6 @@ from .onnx_predict import onnx_predict from .lgb_predict_task2 import lgb_predict_task2 from scipy.special import softmax -# ensemble 4 models for task 2 CONFIG = { "us": [ { @@ -59,7 +58,7 @@ CONFIG = { "product_brand": 4609, "product_color_name": 3225, }, - "model": ["/models/us-bigbird-des-0-fp16.onnx", "/models/us-bigbird-des-1-fp16.onnx"], + "model": ["/models/us-bigbird-kd-0-fp16.onnx", "/models/us-bigbird-kd-1-fp16.onnx"], "product": "/models/bigbird.h5", "key": "us", "type": "bigbird", @@ -82,12 +81,12 @@ CONFIG = { "product_color_name": 2551, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "jp", "type": "mdeberta", "fold_offset": 0, - "description":False, + "description":True, }, ], "es": [ @@ -104,12 +103,12 @@ CONFIG = { "product_color_name": 6776, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "es", "type": "mdeberta", "fold_offset": 0, - "description": False, + "description": True, }, ], } @@ -155,6 +154,7 @@ class Task2Predictor(BasePredictor): pred = onnx_predict(sub_test, config) pred[list(range(4))] = softmax(pred[list(range(4))].values, 1) # type: ignore pred.columns = ["example_id", "fold"] + ["pred_0",'pred_1','pred_2','pred_3'] + pred['model'] = i onnx_pred.append(pred.copy()) progress += 0.1 register_progress(progress) diff --git a/utils/run_task3.py b/utils/run_task3.py index f5bf0b4..3d23762 100755 --- a/utils/run_task3.py +++ b/utils/run_task3.py @@ -4,6 +4,7 @@ from .clean import DeBertaCleanV2, ESclean, JSClean # type: ignore from .onnx_predict import onnx_predict from .lgb_predict_task3 import lgb_predict_task3 from scipy.special import softmax +import time CONFIG = { "us": [ @@ -58,7 +59,7 @@ CONFIG = { "product_brand": 4609, "product_color_name": 3225, }, - "model": ["/models/us-bigbird-des-0-fp16.onnx", "/models/us-bigbird-des-1-fp16.onnx"], + "model": ["/models/us-bigbird-kd-0-fp16.onnx", "/models/us-bigbird-kd-1-fp16.onnx"], "product": "/models/bigbird.h5", "key": "us", "type": "bigbird", @@ -81,12 +82,12 @@ CONFIG = { "product_color_name": 2551, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "jp", "type": "mdeberta", "fold_offset": 0, - "description":False, + "description":True, }, ], "es": [ @@ -103,12 +104,12 @@ CONFIG = { "product_color_name": 6776, }, "tokenizer": "/models/spm-jp-es.model", - "model": ['/models/us-es-jp-mdeberta-0-fp16.onnx', '/models/us-es-jp-mdeberta-1-fp16.onnx'], + "model":['/models/us-es-jp-mdeberta-des-0-fp16.onnx', '/models/us-es-jp-mdeberta-des-1-fp16.onnx'], "product": "/models/product.h5", "key": "es", "type": "mdeberta", "fold_offset": 0, - "description": False, + "description": True, }, ], } @@ -154,6 +155,7 @@ class Task3Predictor(BasePredictor): pred = onnx_predict(sub_test, config) pred[list(range(4))] = softmax(pred[list(range(4))].values, 1) # type: ignore pred.columns = ["example_id", "fold"] + ["pred_0",'pred_1','pred_2','pred_3'] + pred['model'] = i onnx_pred.append(pred.copy()) progress += 0.1 register_progress(progress) @@ -165,4 +167,4 @@ class Task3Predictor(BasePredictor): predictions_df = pd.concat(all_output).reset_index(drop=True) print("Writing Task-3 Predictions to : ", predictions_output_path) predictions_df.to_csv(predictions_output_path, index=False, header=True) - register_progress(1) + register_progress(1) \ No newline at end of file -- GitLab