Add scripts for local debug with docker

f201b911 · spmohanty · 594b52b0 · f201b911 · f201b911 · f201b911
Commit f201b911 authored 1 year ago by spmohanty
--- a/.dockerignore
+++ b/.dockerignore
+models/**
\ No newline at end of file
--- a/Dockerfile
+++ b/Dockerfile
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=en_US.UTF-8 \
+    LANGUAGE=en_US:en \
+    LC_ALL=en_US.UTF-8 \
+    USER_NAME=aicrowd \
+    HOME_DIR=/home/aicrowd \
+    CONDA_DIR=/home/aicrowd/.conda \
+    PATH=/home/aicrowd/.conda/bin:${PATH} \
+    SHELL=/bin/bash
+
+# Install system dependencies and clean up in one layer
+COPY apt.txt /tmp/apt.txt
+RUN apt -qq update && apt -qq install -y --no-install-recommends `cat /tmp/apt.txt` locales wget \
+    && locale-gen en_US.UTF-8 \
+    && rm -rf /var/cache/apt/* /var/lib/apt/lists/* \
+    && apt clean
+
+# Set up user
+RUN groupadd -g 1001 aicrowd && \
+    useradd -m -s /bin/bash -u 1001 -g aicrowd -G sudo aicrowd
+
+USER ${USER_NAME}
+WORKDIR ${HOME_DIR}
+
+# Install Miniconda and Python packages
+RUN wget -nv -O miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_22.11.1-1-Linux-x86_64.sh \
+    && bash miniconda.sh -b -p ${CONDA_DIR} \
+    && . ${CONDA_DIR}/etc/profile.d/conda.sh \
+    && conda install cmake -y \
+    && conda clean -y -a \
+    && rm -rf miniconda.sh
+
+COPY --chown=1001:1001 requirements.txt ${HOME_DIR}/requirements.txt
+RUN pip install -r requirements.txt --no-cache-dir
+COPY --chown=1001:1001 requirements_eval.txt ${HOME_DIR}/requirements_eval.txt
+RUN pip install -r requirements_eval.txt --no-cache-dir
+
+## Add your custom commands below
--- a/apt.txt
+++ b/apt.txt
+git
\ No newline at end of file
--- a/docker_run.sh
+++ b/docker_run.sh
+#!/bin/bash
+
+#!/bin/bash
+
+# This script builds a Docker image from the current directory
+# and runs a container from this image, executing local_evaluation.py
+# with the current directory mounted at /submission inside the container.
+
+# Step 1: Define the name of the Docker image.
+LAST_COMMIT_HASH=$(git rev-parse HEAD)
+IMAGE_NAME="aicrowd/amazon-kddcup24-submission:${LAST_COMMIT_HASH}"
+
+# Step 2: Build the Docker image.
+# The '.' at the end specifies that the Docker context is the current directory.
+# This means Docker will look for a Dockerfile in the current directory to build the image.
+START_TIME=$(date +%s)
+DOCKER_BUILDKIT=1 docker build -t $IMAGE_NAME .
+END_TIME=$(date +%s)
+BUILD_TIME=$((END_TIME - START_TIME))
+echo "Total build time: $BUILD_TIME seconds"
+
+# Step 3: Run the Docker container.
+# -v "$(pwd)":/submission mounts the current directory ($(pwd) outputs the current directory path)
+# to /submission inside the container. This way, the container can access the contents
+# of the current directory as if they were located at /submission inside the container.
+# 'python /submission/local_evaluation.py' is the command executed inside the container.
+# the -w sets the workind directory to /submission.
+# It then local_evaluation.py using software runtime set up in the Dockerfile.
+docker run -v "$(pwd)":/submission -w /submission $IMAGE_NAME python local_evaluation.py
+
+
+# Note 1: Please refer to the Dockerfile to understand how the software runtime is set up.
+# The Dockerfile should include all necessary commands to install Python, the necessary
+# dependencies, and any other software required to run local_evaluation.py.
+
+# Note 2: Note the .dockerignore file in the root of this directory.
+# In the .dockerignore file, specify any files or directories that should not be included
+# in the Docker context. This typically includes large files, models, or datasets that
+# are not necessary for building the Docker image. Excluding these can significantly
+# speed up the build process by reducing the size of the build context sent to the Docker daemon.
+
+# Ensure your Dockerfile and .dockerignore are properly set up before running this script.
--- a/local_evaluation.py
+++ b/local_evaluation.py
-import pandas as pd
-from tqdm import tqdm
-import torch
-import numpy as np
 import os

-
 import metrics
+import numpy as np
+import pandas as pd
 import parsers
+import torch
+from tqdm import tqdm

 VERSION = "0.1.0"

@@ -168,22 +167,22 @@ def get_evaluation_methods():
        "hit rate@3": metrics.calculate_hit_rate_3,
        "rougel": metrics.calculate_rougel,
        "sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity(
-            generated_text=generated_text, 
-            reference_texts=reference_texts, 
-            model_name="all-MiniLM-L6-v2"
+            generated_text=generated_text,
+            reference_texts=reference_texts,
+            model_name="all-MiniLM-L6-v2",
        ),
        "multilingual-sent-transformer": lambda generated_text, reference_texts: metrics.calculate_cosine_similarity(
-            generated_text=generated_text, 
-            reference_texts=reference_texts, 
-            model_name="paraphrase-multilingual-MiniLM-L12-v2"
+            generated_text=generated_text,
+            reference_texts=reference_texts,
+            model_name="paraphrase-multilingual-MiniLM-L12-v2",
        ),
-        "micro f1": metrics.calculate_true_positive_false_positives_false_negatives, 
+        "micro f1": metrics.calculate_true_positive_false_positives_false_negatives,
        "ndcg": metrics.calculate_ndcg,
        "bleu": metrics.calculate_bleu_score,
        "jp-bleu": lambda generated_text, reference_text: metrics.calculate_bleu_score(
-            generated_text=generated_text, 
-            reference_text=reference_text, 
-            is_japanese=True
+            generated_text=generated_text,
+            reference_text=reference_text,
+            is_japanese=True,
        ),
    }