From f201b911afe42f12f0d15774266b72a15cef36f2 Mon Sep 17 00:00:00 2001
From: "S.P. Mohanty" <spmohanty91@gmail.com>
Date: Thu, 21 Mar 2024 16:23:51 +0000
Subject: [PATCH] Add scripts for local debug with docker

---
 .dockerignore       |  1 +
 Dockerfile          | 40 ++++++++++++++++++++++++++++++++++++++++
 apt.txt             |  1 +
 docker_run.sh       | 42 ++++++++++++++++++++++++++++++++++++++++++
 local_evaluation.py | 29 ++++++++++++++---------------
 5 files changed, 98 insertions(+), 15 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile
 create mode 100644 apt.txt
 create mode 100755 docker_run.sh

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..f3195f9
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+models/**
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..8e869df
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,40 @@
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=en_US.UTF-8 \
+    LANGUAGE=en_US:en \
+    LC_ALL=en_US.UTF-8 \
+    USER_NAME=aicrowd \
+    HOME_DIR=/home/aicrowd \
+    CONDA_DIR=/home/aicrowd/.conda \
+    PATH=/home/aicrowd/.conda/bin:${PATH} \
+    SHELL=/bin/bash
+
+# Install system dependencies and clean up in one layer
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` locales wget \
+    && locale-gen en_US.UTF-8 \
+    && rm -rf /var/cache/apt/* /var/lib/apt/lists/* \
+    && apt clean
+
+# Set up user
+RUN groupadd -g 1001 aicrowd && \
+    useradd -m -s /bin/bash -u 1001 -g aicrowd -G sudo aicrowd
+
+USER ${USER_NAME}
+WORKDIR ${HOME_DIR}
+
+# Install Miniconda and Python packages
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh \
+    && bash miniconda.sh -b -p ${CONDA_DIR} \
+    && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+    && conda install cmake -y \
+    && conda clean -y -a \
+    && rm -rf miniconda.sh
+
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir
+COPY --chown=1001:1001 requirements_eval.txt ${HOME_DIR}/requirements_eval.txt
+RUN pip install -r requirements_eval.txt --no-cache-dir
+
+## Add your custom commands below
diff --git a/apt.txt b/apt.txt
new file mode 100644
index 0000000..0899c29
--- /dev/null
+++ b/apt.txt
@@ -0,0 +1 @@
+git
\ No newline at end of file
diff --git a/docker_run.sh b/docker_run.sh
new file mode 100755
index 0000000..a93b850
--- /dev/null
+++ b/docker_run.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+#!/bin/bash
+
+# This script builds a Docker image from the current directory
+# and runs a container from this image, executing local_evaluation.py
+# with the current directory mounted at /submission inside the container.
+
+# Step 1: Define the name of the Docker image.
+LAST_COMMIT_HASH=$(git rev-parse HEAD)
+IMAGE_NAME="aicrowd/amazon-kddcup24-submission:${LAST_COMMIT_HASH}"
+
+# Step 2: Build the Docker image.
+# The '.' at the end specifies that the Docker context is the current directory.
+# This means Docker will look for a Dockerfile in the current directory to build the image.
+START_TIME=$(date +%s)
+DOCKER_BUILDKIT=1 docker build -t $IMAGE_NAME .
+END_TIME=$(date +%s)
+BUILD_TIME=$((END_TIME - START_TIME))
+echo "Total build time: $BUILD_TIME seconds"
+
+# Step 3: Run the Docker container.
+# -v "$(pwd)":/submission mounts the current directory ($(pwd) outputs the current directory path)
+# to /submission inside the container. This way, the container can access the contents
+# of the current directory as if they were located at /submission inside the container.
+# 'python /submission/local_evaluation.py' is the command executed inside the container.
+# the -w sets the workind directory to /submission.
+# It then local_evaluation.py using software runtime set up in the Dockerfile.
+docker run -v "$(pwd)":/submission -w /submission $IMAGE_NAME python local_evaluation.py
+
+
+# Note 1: Please refer to the Dockerfile to understand how the software runtime is set up.
+# The Dockerfile should include all necessary commands to install Python, the necessary
+# dependencies, and any other software required to run local_evaluation.py.
+
+# Note 2: Note the .dockerignore file in the root of this directory.
+# In the .dockerignore file, specify any files or directories that should not be included
+# in the Docker context. This typically includes large files, models, or datasets that
+# are not necessary for building the Docker image. Excluding these can significantly
+# speed up the build process by reducing the size of the build context sent to the Docker daemon.
+
+# Ensure your Dockerfile and .dockerignore are properly set up before running this script.
diff --git a/local_evaluation.py b/local_evaluation.py
index 9e970e5..c4cf6ba 100644
--- a/local_evaluation.py
+++ b/local_evaluation.py
@@ -1,12 +1,11 @@
-import pandas as pd
-from tqdm import tqdm
-import torch
-import numpy as np
 import os
 
-
 import metrics
+import numpy as np
+import pandas as pd
 import parsers
+import torch
+from tqdm import tqdm
 
 VERSION = "0.1.0"
 
@@ -168,22 +167,22 @@ def get_evaluation_methods():
         "hit rate@3": metrics.calculate_hit_rate_3,
         "rougel": metrics.calculate_rougel,
         "sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity(
-            generated_text=generated_text, 
-            reference_texts=reference_texts, 
-            model_name="all-MiniLM-L6-v2"
+            generated_text=generated_text,
+            reference_texts=reference_texts,
+            model_name="all-MiniLM-L6-v2",
         ),
         "multilingual-sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity(
-            generated_text=generated_text, 
-            reference_texts=reference_texts, 
-            model_name="paraphrase-multilingual-MiniLM-L12-v2"
+            generated_text=generated_text,
+            reference_texts=reference_texts,
+            model_name="paraphrase-multilingual-MiniLM-L12-v2",
         ),
-        "micro f1": metrics.calculate_true_positive_false_positives_false_negatives, 
+        "micro f1": metrics.calculate_true_positive_false_positives_false_negatives,
         "ndcg": metrics.calculate_ndcg,
         "bleu": metrics.calculate_bleu_score,
         "jp-bleu": lambda generated_text, reference_text: metrics.calculate_bleu_score(
-            generated_text=generated_text, 
-            reference_text=reference_text, 
-            is_japanese=True
+            generated_text=generated_text,
+            reference_text=reference_text,
+            is_japanese=True,
         ),
     }
 
-- 
GitLab