From f201b911afe42f12f0d15774266b72a15cef36f2 Mon Sep 17 00:00:00 2001 From: "S.P. Mohanty" <spmohanty91@gmail.com> Date: Thu, 21 Mar 2024 16:23:51 +0000 Subject: [PATCH] Add scripts for local debug with docker --- .dockerignore | 1 + Dockerfile | 40 ++++++++++++++++++++++++++++++++++++++++ apt.txt | 1 + docker_run.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ local_evaluation.py | 29 ++++++++++++++--------------- 5 files changed, 98 insertions(+), 15 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 apt.txt create mode 100755 docker_run.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f3195f9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +models/** \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8e869df --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 + +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en \ + LC_ALL=en_US.UTF-8 \ + USER_NAME=aicrowd \ + HOME_DIR=/home/aicrowd \ + CONDA_DIR=/home/aicrowd/.conda \ + PATH=/home/aicrowd/.conda/bin:${PATH} \ + SHELL=/bin/bash + +# Install system dependencies and clean up in one layer +COPY apt.txt /tmp/apt.txt +RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` locales wget \ + && locale-gen en_US.UTF-8 \ + && rm -rf /var/cache/apt/* /var/lib/apt/lists/* \ + && apt clean + +# Set up user +RUN groupadd -g 1001 aicrowd && \ + useradd -m -s /bin/bash -u 1001 -g aicrowd -G sudo aicrowd + +USER ${USER_NAME} +WORKDIR ${HOME_DIR} + +# Install Miniconda and Python packages +RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh \ + && bash miniconda.sh -b -p ${CONDA_DIR} \ + && . ${CONDA_DIR}/etc/profile.d/conda.sh \ + && conda install cmake -y \ + && conda clean -y -a \ + && rm -rf miniconda.sh + +COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt +RUN pip install -r requirements.txt --no-cache-dir +COPY --chown=1001:1001 requirements_eval.txt ${HOME_DIR}/requirements_eval.txt +RUN pip install -r requirements_eval.txt --no-cache-dir + +## Add your custom commands below diff --git a/apt.txt b/apt.txt new file mode 100644 index 0000000..0899c29 --- /dev/null +++ b/apt.txt @@ -0,0 +1 @@ +git \ No newline at end of file diff --git a/docker_run.sh b/docker_run.sh new file mode 100755 index 0000000..a93b850 --- /dev/null +++ b/docker_run.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +#!/bin/bash + +# This script builds a Docker image from the current directory +# and runs a container from this image, executing local_evaluation.py +# with the current directory mounted at /submission inside the container. + +# Step 1: Define the name of the Docker image. +LAST_COMMIT_HASH=$(git rev-parse HEAD) +IMAGE_NAME="aicrowd/amazon-kddcup24-submission:${LAST_COMMIT_HASH}" + +# Step 2: Build the Docker image. +# The '.' at the end specifies that the Docker context is the current directory. +# This means Docker will look for a Dockerfile in the current directory to build the image. +START_TIME=$(date +%s) +DOCKER_BUILDKIT=1 docker build -t $IMAGE_NAME . +END_TIME=$(date +%s) +BUILD_TIME=$((END_TIME - START_TIME)) +echo "Total build time: $BUILD_TIME seconds" + +# Step 3: Run the Docker container. +# -v "$(pwd)":/submission mounts the current directory ($(pwd) outputs the current directory path) +# to /submission inside the container. This way, the container can access the contents +# of the current directory as if they were located at /submission inside the container. +# 'python /submission/local_evaluation.py' is the command executed inside the container. +# the -w sets the workind directory to /submission. +# It then local_evaluation.py using software runtime set up in the Dockerfile. +docker run -v "$(pwd)":/submission -w /submission $IMAGE_NAME python local_evaluation.py + + +# Note 1: Please refer to the Dockerfile to understand how the software runtime is set up. +# The Dockerfile should include all necessary commands to install Python, the necessary +# dependencies, and any other software required to run local_evaluation.py. + +# Note 2: Note the .dockerignore file in the root of this directory. +# In the .dockerignore file, specify any files or directories that should not be included +# in the Docker context. This typically includes large files, models, or datasets that +# are not necessary for building the Docker image. Excluding these can significantly +# speed up the build process by reducing the size of the build context sent to the Docker daemon. + +# Ensure your Dockerfile and .dockerignore are properly set up before running this script. diff --git a/local_evaluation.py b/local_evaluation.py index 9e970e5..c4cf6ba 100644 --- a/local_evaluation.py +++ b/local_evaluation.py @@ -1,12 +1,11 @@ -import pandas as pd -from tqdm import tqdm -import torch -import numpy as np import os - import metrics +import numpy as np +import pandas as pd import parsers +import torch +from tqdm import tqdm VERSION = "0.1.0" @@ -168,22 +167,22 @@ def get_evaluation_methods(): "hit rate@3": metrics.calculate_hit_rate_3, "rougel": metrics.calculate_rougel, "sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity( - generated_text=generated_text, - reference_texts=reference_texts, - model_name="all-MiniLM-L6-v2" + generated_text=generated_text, + reference_texts=reference_texts, + model_name="all-MiniLM-L6-v2", ), "multilingual-sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity( - generated_text=generated_text, - reference_texts=reference_texts, - model_name="paraphrase-multilingual-MiniLM-L12-v2" + generated_text=generated_text, + reference_texts=reference_texts, + model_name="paraphrase-multilingual-MiniLM-L12-v2", ), - "micro f1": metrics.calculate_true_positive_false_positives_false_negatives, + "micro f1": metrics.calculate_true_positive_false_positives_false_negatives, "ndcg": metrics.calculate_ndcg, "bleu": metrics.calculate_bleu_score, "jp-bleu": lambda generated_text, reference_text: metrics.calculate_bleu_score( - generated_text=generated_text, - reference_text=reference_text, - is_japanese=True + generated_text=generated_text, + reference_text=reference_text, + is_japanese=True, ), } -- GitLab