Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • Archana_Badagi/food-round2
  • eric_a_scuccimarra/food-round2
  • joel_joseph/food-round2
  • darthgera123/food-round2
  • reshmarameshbabu/food-round2
  • nikhil_rayaprolu/food-round2
6 results
Show changes
Commits on Source (194)
Showing with 1279 additions and 254 deletions
*.pth filter=lfs diff=lfs merge=lfs -text
File moved
......@@ -21,10 +21,33 @@ please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate y
### Python
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf).
We use the following tools for linting and formatting:
- [flake8](http://flake8.pycqa.org/en/latest/): linter
- [yapf](https://github.com/google/yapf): formatter
- [isort](https://github.com/timothycrosley/isort): sort imports
Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg).
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
After you clone the repository, you will need to install initialize pre-commit hook.
```
pip install -U pre-commit
```
From the repository folder
```
pre-commit install
```
After this on every commit check code linters and formatter will be enforced.
>Before you create a PR, make sure that your code lints and is formatted by yapf.
### C++ and CUDA
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
\ No newline at end of file
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
blank_issues_enabled: false
......@@ -25,13 +25,11 @@ A placeholder for the command.
3. What dataset did you use?
**Environment**
- OS: [e.g., Ubuntu 16.04.6]
- GCC [e.g., 5.4.0]
- PyTorch version [e.g., 1.1.0]
- How you installed PyTorch [e.g., pip, conda, source]
- GPU model [e.g., 1080Ti, V100]
- CUDA and CUDNN version
- [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
1. Please run `python tools/collect_env.py` to collect necessary environment infomation and paste it here.
2. You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch [e.g., pip, conda, source]
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
**Error traceback**
If applicable, paste the error trackback here.
......
......@@ -115,3 +115,4 @@ data
*.pkl.json
*.log.json
work_dirs/
[isort]
line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet
known_third_party = Cython,asynctest,cv2,matplotlib,mmcv,numpy,pycocotools,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
repos:
- repo: https://github.com/asottile/seed-isort-config
rev: v1.9.3
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.29.0
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
hooks:
- id: flake8
- id: trailing-whitespace
- id: check-yaml
- id: end-of-file-fixer
- id: requirements-txt-fixer
dist: xenial
dist: bionic # ubuntu 18.04
language: python
install:
- pip install flake8
python:
- "3.5"
- "3.6"
- "3.7"
env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1
cache: pip
# Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis
before_install:
- INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
- wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
- sudo dpkg -i ${INSTALLER}
- wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
- sudo apt-key add 7fa2af80.pub
- sudo apt update -qq
- sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
- sudo apt clean
- CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
- LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
- PATH=${CUDA_HOME}/bin:${PATH}
install:
- pip install Pillow==6.2.2 # remove this line when torchvision>=0.5
- pip install Cython torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2
- pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
- pip install -r requirements.txt
before_script:
- flake8 .
- isort -rc --check-only --diff mmdet/ tools/ tests/
- yapf -r -d --style .style.yapf mmdet/ tools/ tests/ configs/
script:
- flake8
- python setup.py check -m -s
- python setup.py build_ext --inplace
- coverage run --source mmdet -m py.test -v --xdoctest-modules tests mmdet
after_success:
- coverage report
ARG PYTORCH="1.1.0"
ARG CUDA="10.0"
ARG CUDNN="7.5"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
build-essential \
bzip2 \
cmake \
curl \
git \
g++ \
libboost-all-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
tar \
timidity \
unzip \
wget \
locales \
zlib1g-dev \
python3-dev \
python3 \
python3-pip \
python3-tk \
libjpeg-dev \
libpng-dev
# Python3
RUN pip3 install pip --upgrade
RUN pip3 install utm cython aicrowd_api timeout_decorator \
numpy \
aicrowd-repo2docker \
pillow
RUN pip3 install git+https://github.com/AIcrowd/coco.git#subdirectory=PythonAPI
RUN conda install cython -y && conda clean --all
RUN git clone --branch v1.0rc1 https://github.com/open-mmlab/mmdetection.git /mmdetection
WORKDIR /mmdetection
RUN pip install --no-cache-dir -e .
RUN python3.6 -m pip install aicrowd_api aicrowd-repo2docker
# Unicode support:
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
# Enables X11 sharing and creates user home directory
ENV USER_NAME aicrowd
ENV HOME_DIR /home/$USER_NAME
#
# Replace HOST_UID/HOST_GUID with your user / group id (needed for X11)
ENV HOST_UID 1000
ENV HOST_GID 1000
RUN export uid=${HOST_UID} gid=${HOST_GID} && \
mkdir -p ${HOME_DIR} && \
echo "$USER_NAME:x:${uid}:${gid}:$USER_NAME,,,:$HOME_DIR:/bin/bash" >> /etc/passwd && \
echo "$USER_NAME:x:${uid}:" >> /etc/group && \
echo "$USER_NAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USER_NAME && \
chmod 0440 /etc/sudoers.d/$USER_NAME && \
chown ${uid}:${gid} -R ${HOME_DIR}
USER ${USER_NAME}
WORKDIR ${HOME_DIR}
COPY . .
RUN sudo chown ${HOST_UID}:${HOST_GID} -R *
RUN sudo chmod 775 -R *
## Installation
### Requirements
- Linux
- Python 3.5+ ([Say goodbye to Python2](https://python3statement.org/))
- PyTorch 1.0+ or PyTorch-nightly
- CUDA 9.0+
- NCCL 2+
- GCC 4.9+
- [mmcv](https://github.com/open-mmlab/mmcv)
We have tested the following versions of OS and softwares:
- OS: Ubuntu 16.04/18.04 and CentOS 7.2
- CUDA: 9.0/9.2/10.0
- NCCL: 2.1.15/2.2.13/2.3.7/2.4.2
- GCC: 4.9/5.3/5.4/7.3
### Install mmdetection
a. Create a conda virtual environment and activate it. Then install Cython.
```shell
conda create -n open-mmlab python=3.7 -y
conda activate open-mmlab
conda install cython
```
b. Install PyTorch stable or nightly and torchvision following the [official instructions](https://pytorch.org/).
c. Clone the mmdetection repository.
```shell
git clone https://github.com/open-mmlab/mmdetection.git
cd mmdetection
```
d. Compile cuda extensions.
```shell
./compile.sh
```
e. Install mmdetection (other dependencies will be installed automatically).
```shell
python setup.py develop
# or "pip install -e ."
```
Note:
1. It is recommended that you run the step e each time you pull some updates from github. If there are some updates of the C/CUDA codes, you also need to run step d.
The git commit id will be written to the version number with step e, e.g. 0.6.0+2e7045c. The version will also be saved in trained models.
2. Following the above instructions, mmdetection is installed on `dev` mode, any modifications to the code will take effect without installing it again.
### Prepare COCO dataset.
It is recommended to symlink the dataset root to `$MMDETECTION/data`.
```
mmdetection
├── mmdet
├── tools
├── configs
├── data
│ ├── coco
│ │ ├── annotations
│ │ ├── train2017
│ │ ├── val2017
│ │ ├── test2017
│ ├── VOCdevkit
│ │ ├── VOC2007
│ │ ├── VOC2012
```
### Scripts
[Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is
a script for setting up mmdetection with conda.
### Notice
You can run `python(3) setup.py develop` or `pip install -e .` to install mmdetection if you want to make modifications to it frequently.
If there are more than one mmdetection on your machine, and you want to use them alternatively.
Please insert the following code to the main file
```python
import os.path as osp
import sys
sys.path.insert(0, osp.join(osp.dirname(osp.abspath(__file__)), '../'))
```
or run the following command in the terminal of corresponding folder.
```shell
export PYTHONPATH=`pwd`:$PYTHONPATH
```
# food-recognition-challenge-mmdetection-baseline
![AIcrowd-Logo](https://raw.githubusercontent.com/AIcrowd/AIcrowd/master/app/assets/images/misc/aicrowd-horizontal.png)
# MMDetection
# Problem Statement
**News**: We released the technical report on [ArXiv](https://arxiv.org/abs/1906.07155).
The goal of this challenge is to train models which can look at images of food items and detect the individual food items present in them.
We provide a novel dataset of food images collected using the MyFoodRepo project where numerous volunteer Swiss users provide images of their daily food intake. The images have been hand labelled by a group of experts to map the individual food items to an ontology of Swiss Food items.
## Introduction
This is an evolving dataset, where we will release more data as the dataset grows in size.
The master branch works with **PyTorch 1.1** or higher.
![image1](https://i.imgur.com/zS2Nbf0.png)
mmdetection is an open source object detection toolbox based on PyTorch. It is
a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
# Baseline
MMdetection is an open source object detection toolbox based on PyTorch, with a large Model Zoo with many customised models that can be plugged and tested in with just a single config file modification. PYou can read more about it at: [mmdetection github](https://github.com/open-mmlab/mmdetection/)
![demo image](demo/coco_test_12510.jpg)
Follow the installation instructions as given in the above link.
# Installation
### Major features
Ensure you have `docker` and `nvidia-docker` installed by following the instructions here :
- **Modular Design**
* [Docker](https://docs.docker.com/install/)
* [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
**NOTE** : You do not need nvidia-docker if you do not want to use GPU when testing your submission locally
We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules.
[MMDetection Installation instructions](https://github.com/open-mmlab/mmdetection/blob/master/docs/INSTALL.md)
# Dataset
- **Support of multiple frameworks out of box**
The dataset for the [AIcrowd Food Recognition Challenge](https://www.aicrowd.com/challenges/food-recognition-challenge) is available at [https://www.aicrowd.com/challenges/food-recognition-challenge/dataset_files](https://www.aicrowd.com/challenges/food-recognition-challenge/dataset_files)
The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc.
This dataset contains :
* `train-v0.2.tar.gz` : This is the Training Set of **7949** (as RGB images) food images, along with their corresponding annotations in [MS-COCO format](http://cocodataset.org/#home)
- **High efficiency**
* `val-v0.2.tar.gz`: This is the suggested Validation Set of **418** (as RGB images) food images, along with their corresponding annotations in [MS-COCO format](http://cocodataset.org/#home)
All basic bbox and mask operations run on GPUs now. The training speed is faster than or comparable to other codebases, including [Detectron](https://github.com/facebookresearch/Detectron), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet).
* `test_images-v0.2.tar.gz` : This is the debug Test Set for Round-1, where you are provided the same images as the validation set.
- **State of the art**
To get started, we would advise you to download all the files, and untar them inside the `data/` folder of this repository, so that you have a directory structure like this :
The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward.
```bash
|-- data/
| |-- test_images/ (has all images for prediction)(**NOTE** : They are the same as the validation set images)
| |-- train/
| | |-- images (has all the images for training)
| | |__ annotation.json : Annotation of the data in MS COCO format
| | |__ annotation-small.json : Smaller version of the previous dataset
| |-- val/
| | |-- images (has all the images for training)
| | |__ annotation.json : Annotation of the data in MS COCO format
| | |__ annotation-small.json : Smaller version of the previous dataset
```
Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox.
We are also assuming that you have already installed all the requirements for this notebook, or you can still install them by :
## License
# Usage
This project is released under the [Apache 2.0 license](LICENSE).
# Training with MMDetection:
Let us look at training MMDetection using Hybrid Task Cascade [HTC research paper](https://arxiv.org/abs/1901.07518).
## Updates
A score of AP_50 of 0.526 and AR_50 of 0.729, can be achieved with Hybrid Task Cascade of Resnet50 Backbone.
v0.6.0 (14/04/2019)
- Up to 30% speedup compared to the model zoo.
- Support both PyTorch stable and nightly version.
- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.
MMDetection provides us with a config file especially for HTC, available at [HTC config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc)
v0.6rc0(06/02/2019)
- Migrate to PyTorch 1.0.
Also make sure you have downloaded the training data to a subfolder of your project.
v0.5.7 (06/02/2019)
- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
- This is the last release based on PyTorch 0.4.1.
Modify your config file and point your dataset variables to your data folder.
v0.5.6 (17/01/2019)
- Add support for Group Normalization.
- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
As given in [MMDetection Getting Started](https://github.com/open-mmlab/mmdetection/blob/master/docs/GETTING_STARTED.md),
You can use:
v0.5.5 (22/12/2018)
- Add SSD for COCO and PASCAL VOC.
- Add ResNeXt backbones and detection models.
- Refactoring for Samplers/Assigners and add OHEM.
- Add VOC dataset and evaluation scripts.
python tools/train.py ${CONFIG_FILE}
to train the model on a single GPU or
v0.5.4 (27/11/2018)
- Add SingleStageDetector and RetinaNet.
./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
to train the model on multiple GPUs.
v0.5.3 (26/11/2018)
- Add Cascade R-CNN and Cascade Mask R-CNN.
- Add support for Soft-NMS in config files.
Make sure you have edited the config file to point to the dataset and also have made changes to the number of classes.
If you are going to use the dataloader from the mmdetection.
v0.5.2 (21/10/2018)
- Add support for custom datasets.
- Add a script to convert PASCAL VOC annotations to the expected format.
## Testing with MMDetection:
To test your results with MMDetection,
you can use the commands:
```
*single-gpu testing*
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]
v0.5.1 (20/10/2018)
- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.
- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.
*multi-gpu testing*
./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
```
## Benchmark and model zoo
**Log Analysis**
Supported methods and backbones are shown in the below table.
Results and models are available in the [Model zoo](MODEL_ZOO.md).
The training logs can be analyzed using the plot_curve provided with the mmdetection:
| | ResNet | ResNeXt | SENet | VGG | HRNet |
|--------------------|:--------:|:--------:|:--------:|:--------:|:-----:|
| RPN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Fast R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Faster R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Mask R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Cascade R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Cascade Mask R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| SSD | ✗ | ✗ | ✗ | ✓ | ✗ |
| RetinaNet | ✓ | ✓ | ☐ | ✗ | ✓ |
| GHM | ✓ | ✓ | ☐ | ✗ | ✓ |
| Mask Scoring R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| FCOS | ✓ | ✓ | ☐ | ✗ | ✓ |
| Grid R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Hybrid Task Cascade| ✓ | ✓ | ☐ | ✗ | ✓ |
| Libra R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Guided Anchoring | ✓ | ✓ | ☐ | ✗ | ✓ |
```
import os
import matplotlib
%matplotlib inline
from tools.analyze_logs import plot_curve
matplotlib.rcParams['figure.figsize'] = [20, 10]
args = {
'keys':['segm_mAP_50'],
'legend':'segm_mAP_50',
'backend': None,
'json_logs': [os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json'],
'title': 'loss'
}
print(os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json')
plot_curve([os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json'], args)
```
Other features
- [x] DCNv2
- [x] Group Normalization
- [x] Weight Standardization
- [x] OHEM
- [x] Soft-NMS
- [x] Generalized Attention
- [x] GCNet
- [x] Mixed Precision (FP16) Training
## Installation
Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
## Other Associated Notebooks
* [Dataset Utils](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb)
* [Import Dependencies](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Import-dependencies)
* [Configuration Variables](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Configuration-Variables)
* [Parsing Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Parsing-the-annotations)
* [Collecting and Visualizing Images](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Collecting-and-Visualizing-Images)
* [Understanding Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Understanding-Annotations)
* [Visualizing Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Visualizing-Annotations)
* [Advanced](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Advanced)
* [Convert poly segmentation to rle](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#1.-Convert-poly-segmentation-to-rle)
* [Convert segmentation to pixel level masks](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#2.-Convert-segmentation-to-pixel-level-masks)
* [Random Submission](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/run.py)
* [Locally test the evaluation function](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Local%20Evaluation.ipynb)
## Get Started
## Other Baselines
# Round 1
* [Colab Notebook for Data Analysis and Tutorial](https://colab.research.google.com/drive/1A5p9GX5X3n6OMtLjfhnH6Oeq13tWNtFO#scrollTo=ok54AWT_VoWV)
A notebook with data analysis on the Food Recognition Dataset and then a short tutorial on training with keras and pytorch. This lets you immediately jump onto the challenge and solve the challenge
### Pretrained Baselines
* [mmdetection (pytorch)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-pytorch-baseline)
* [matterport-maskrcnn (keras - tensorflow)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-recognition)
Please see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of MMDetection.
# Round 2
* [Colab Notebook for Data Analysis and Tutorial](https://colab.research.google.com/drive/1vXdv9quZ7CXO5lLCjhyz3jtejRzDq221)
A notebook with data analysis on the Food Recognition Dataset and then a short tutorial on training with keras and pytorch. This lets you immediately jump onto the challenge and solve the challenge
### Pretrained Baselines
* [mmdetection (pytorch)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-round2)
## Contributing
# Submission Instructions
We appreciate all contributions to improve MMDetection. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for the contributing guideline.
To submit to the challenge you'll need to ensure you've set up an appropriate repository structure, create a private git repository at https://gitlab.aicrowd.com with the contents of your submission, and push a git tag corresponding to the version of your repository you'd like to submit.
## Acknowledgement
## Repository Structure
We have created this sample submission repository which you can use as reference.
MMDetection is an open source project that is contributed by researchers and engineers from various colledges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors.
#### aicrowd.json
Each repository should have a aicrowd.json file with the following fields:
```
{
"challenge_id" : "aicrowd-food-recognition-challenge",
"grader_id": "aicrowd-food-recognition-challenge",
"authors" : ["aicrowd-user"],
"description" : "Food Recognition Challenge Submission",
"license" : "MIT",
"gpu": true
}
```
This file is used to identify your submission as a part of the Food Recognition Challenge. You must use the `challenge_id` and `grader_id` specified above in the submission. The `gpu` key in the `aicrowd.json` lets your specify if your submission requires a GPU or not. In which case, a NVIDIA-K80 will be made available to your submission when evaluation the submission.
## Citation
#### Submission environment configuration
You can specify the software runtime of your code by modifying the included [Dockerfile](Dockerfile).
If you use this toolbox or benchmark in your research, please cite this project.
#### Code Entrypoint
The evaluator will use `/home/aicrowd/run.sh` as the entrypoint. Please remember to have a `run.sh` at the root which can instantiate any necessary environment variables and execute your code. This repository includes a sample `run.sh` file.
### Local Debug
```
@article{mmdetection,
title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark},
author = {Kai Chen, Jiaqi Wang, Jiangmiao Pang, Yuhang Cao, Yu Xiong, Xiaoxiao Li,
Shuyang Sun, Wansen Feng, Ziwei Liu, Jiarui Xu, Zheng Zhang, Dazhi Cheng,
Chenchen Zhu, Tianheng Cheng, Qijie Zhao, Buyu Li, Xin Lu, Rui Zhu, Yue Wu,
Jifeng Dai, Jingdong Wang, Jianping Shi, Wanli Ouyang, Chen Change Loy, Dahua Lin},
journal = {arXiv preprint arXiv:1906.07155},
year = {2019}
}
export TEST_IMAGES_PATH="../data/test_images" # or path to your local folder containing images
export IMAGE_NAME="aicrowd-food-recognition-challenge-submission"
./build.sh
./debug.sh
######################################
## NOTE :
##
## * If you do not wish to your a GPU when testing locally, please feel free to replace nvidia-docker with docker
##
## * If you want to test on images located at an alternate location, set the `TEST_IMAGES_PATH` environment variable accordingly before running `build.sh` and `debug.sh`.
```
### Submitting
To make a submission, you will have to create a private repository on [https://gitlab.aicrowd.com](https://gitlab.aicrowd.com).
You will have to add your SSH Keys to your GitLab account by following the instructions [here](https://docs.gitlab.com/ee/gitlab-basics/create-your-ssh-keys.html).
If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/ssh/README.html#generating-a-new-ssh-key-pair).
Then you can create a submission by making a *tag push* to your repository, adding the correct git remote and pushing to the remote:
```
git clone https://gitlab.aicrowd.com/nikhil_rayaprolu/food-round2
cd food-round2
# Add AICrowd git remote endpoint
git remote add aicrowd git@gitlab.aicrowd.com:<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline.git
git push aicrowd master
# Create a tag for your submission and push
git tag -am "submission-v0.1" submission-v0.1
git push aicrowd master
git push aicrowd submission-v0.1
# Note : If the contents of your repository (latest commit hash) does not change,
# then pushing a new tag will not trigger a new evaluation.
```
You now should be able to see the details of your submission at :
[gitlab.aicrowd.com/<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline/issues](gitlab.aicrowd.com/<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline/issues)
## Using http instead of ssh (Personal Access Token):
In order to use http to clone repositories and submit on gitlab:
a) Create a personal access token
1. Log in to GitLab.
2. In the upper-right corner, click your avatar and select Settings.
3. On the User Settings menu, select Access Tokens.
4. Choose a name and optional expiry date for the token.
5. Choose the desired scopes.
6. Click the Create personal access token button.
7. Save the personal access token somewhere safe, lets call it XXX for now.
Once you leave or refresh the page, you won’t be able to access it again.
b) to clone a repo use the following command:
git clone [https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git](https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git)
c)submit a solution:
```
cd into your submission repo on gitlab
cd (repo_name)
#Add AICrowd git remote endpoint
git remote add aicrowd https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git
git push aicrowd master
# Create a tag for your submission and push
git tag -am "submission-v0.1" submission-v0.1
git push aicrowd master
git push aicrowd submission-v0.1
# Note : If the contents of your repository (latest commit hash) does not change,
# then pushing a new tag will not trigger a new evaluation.
```
## Contact
**Best of Luck**
## Miscelaneous Resources
* [Convert Annotations from MS COCO format to PascalVOC format](https://github.com/CasiaFan/Dataset_to_VOC_converter/blob/master/anno_coco2voc.py)
## Credits
* Parts of the documentation for this baseline was taken from : https://github.com/AIcrowd/food-recognition-challenge-starter-kit
* and the baseline is built using MMDetection: https://github.com/open-mmlab/mmdetection/
# Author
**[Nikhil Rayaprolu](nikhil@aicrowd.com)**
This repo is currently maintained by Kai Chen ([@hellock](http://github.com/hellock)), Jiangmiao Pang ([@OceanPang](https://github.com/OceanPang)), Jiaqi Wang ([@myownskyW7](https://github.com/myownskyW7)) and Yuhang Cao ([@yhcao6](https://github.com/yhcao6)).
\ No newline at end of file
{
"challenge_id" : "aicrowd-food-recognition-challenge",
"grader_id": "aicrowd-food-recognition-challenge",
"authors" : ["nikhil13prs"],
"description" : "Food Recognition Challenge Submission",
"license" : "MIT",
"gpu": true
}
#!/usr/bin/env python
import aicrowd_api
import os
########################################################################
# Instatiate Event Notifier
########################################################################
aicrowd_events = aicrowd_api.events.AIcrowdEvents()
def execution_start():
########################################################################
# Register Evaluation Start event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_started",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_started"
}
)
def execution_progress(progress_payload):
image_ids = progress_payload["image_ids"]
########################################################################
# Register Evaluation Progress event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_progress",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_progress",
"image_ids" : image_ids
}
)
def execution_success(payload):
predictions_output_path = payload["predictions_output_path"]
########################################################################
# Register Evaluation Complete event
########################################################################
expected_output_path = os.getenv("AICROWD_PREDICTIONS_OUTPUT_PATH", False)
if expected_output_path != predictions_output_path:
raise Exception("Please write the output to the path specified in the environment variable : AICROWD_PREDICTIONS_OUTPUT_PATH instead of {}".format(predictions_output_path))
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
message="execution_success",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_success",
"predictions_output_path" : predictions_output_path
},
blocking=True
)
def execution_error(error):
########################################################################
# Register Evaluation Complete event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_ERROR,
message="execution_error",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_error",
"error" : error
},
blocking=True
)
[
{
"id": 2578,
"name": "water",
"name_readable": "Water",
"supercategory": "food"
},
{
"id": 2939,
"name": "pizza-margherita-baked",
"name_readable": "Pizza, Margherita, baked",
"supercategory": "food"
},
{
"id": 1085,
"name": "broccoli",
"name_readable": "Broccoli",
"supercategory": "food"
},
{
"id": 1040,
"name": "salad-leaf-salad-green",
"name_readable": "Salad, leaf / salad, green",
"supercategory": "food"
},
{
"id": 1070,
"name": "zucchini",
"name_readable": "Zucchini",
"supercategory": "food"
},
{
"id": 2022,
"name": "egg",
"name_readable": "Egg",
"supercategory": "food"
},
{
"id": 2053,
"name": "butter",
"name_readable": "Butter",
"supercategory": "food"
},
{
"id": 1566,
"name": "bread-white",
"name_readable": "Bread, white",
"supercategory": "food"
},
{
"id": 1151,
"name": "apple",
"name_readable": "Apple",
"supercategory": "food"
},
{
"id": 2131,
"name": "dark-chocolate",
"name_readable": "Dark chocolate",
"supercategory": "food"
},
{
"id": 2521,
"name": "white-coffee-with-caffeine",
"name_readable": "White coffee, with caffeine",
"supercategory": "food"
},
{
"id": 1068,
"name": "sweet-pepper",
"name_readable": "Sweet pepper",
"supercategory": "food"
},
{
"id": 1026,
"name": "mixed-salad-chopped-without-sauce",
"name_readable": "Mixed salad (chopped without sauce)",
"supercategory": "food"
},
{
"id": 2738,
"name": "tomato-sauce",
"name_readable": "Tomato sauce",
"supercategory": "food"
},
{
"id": 1565,
"name": "bread-wholemeal",
"name_readable": "Bread, wholemeal",
"supercategory": "food"
},
{
"id": 2512,
"name": "coffee-with-caffeine",
"name_readable": "Coffee, with caffeine",
"supercategory": "food"
},
{
"id": 1061,
"name": "cucumber",
"name_readable": "Cucumber",
"supercategory": "food"
},
{
"id": 1311,
"name": "cheese",
"name_readable": "Cheese",
"supercategory": "food"
},
{
"id": 1505,
"name": "pasta-spaghetti",
"name_readable": "Pasta, spaghetti",
"supercategory": "food"
},
{
"id": 1468,
"name": "rice",
"name_readable": "Rice",
"supercategory": "food"
},
{
"id": 1967,
"name": "salmon",
"name_readable": "Salmon",
"supercategory": "food"
},
{
"id": 1078,
"name": "carrot",
"name_readable": "Carrot",
"supercategory": "food"
},
{
"id": 1116,
"name": "onion",
"name_readable": "Onion",
"supercategory": "food"
},
{
"id": 1022,
"name": "mixed-vegetables",
"name_readable": "Mixed vegetables",
"supercategory": "food"
},
{
"id": 2504,
"name": "espresso-with-caffeine",
"name_readable": "Espresso, with caffeine",
"supercategory": "food"
},
{
"id": 1154,
"name": "banana",
"name_readable": "Banana",
"supercategory": "food"
},
{
"id": 1163,
"name": "strawberries",
"name_readable": "Strawberries",
"supercategory": "food"
},
{
"id": 2750,
"name": "mayonnaise",
"name_readable": "Mayonnaise",
"supercategory": "food"
},
{
"id": 1210,
"name": "almonds",
"name_readable": "Almonds",
"supercategory": "food"
},
{
"id": 2620,
"name": "wine-white",
"name_readable": "Wine, white",
"supercategory": "food"
},
{
"id": 1310,
"name": "hard-cheese",
"name_readable": "Hard cheese",
"supercategory": "food"
},
{
"id": 1893,
"name": "ham-raw",
"name_readable": "Ham, raw",
"supercategory": "food"
},
{
"id": 1069,
"name": "tomato",
"name_readable": "Tomato",
"supercategory": "food"
},
{
"id": 1058,
"name": "french-beans",
"name_readable": "French beans",
"supercategory": "food"
},
{
"id": 1180,
"name": "mandarine",
"name_readable": "Mandarine",
"supercategory": "food"
},
{
"id": 2618,
"name": "wine-red",
"name_readable": "Wine, red",
"supercategory": "food"
},
{
"id": 1010,
"name": "potatoes-steamed",
"name_readable": "Potatoes steamed",
"supercategory": "food"
},
{
"id": 1588,
"name": "croissant",
"name_readable": "Croissant",
"supercategory": "food"
},
{
"id": 1879,
"name": "salami",
"name_readable": "Salami",
"supercategory": "food"
},
{
"id": 3080,
"name": "boisson-au-glucose-50g",
"name_readable": "Boisson au glucose 50g",
"supercategory": "food"
},
{
"id": 2388,
"name": "biscuits",
"name_readable": "Biscuits",
"supercategory": "food"
},
{
"id": 1108,
"name": "corn",
"name_readable": "Corn",
"supercategory": "food"
},
{
"id": 1032,
"name": "leaf-spinach",
"name_readable": "Leaf spinach",
"supercategory": "food"
},
{
"id": 2099,
"name": "jam",
"name_readable": "Jam",
"supercategory": "food"
},
{
"id": 2530,
"name": "tea-green",
"name_readable": "Tea, green",
"supercategory": "food"
},
{
"id": 1013,
"name": "chips-french-fries",
"name_readable": "Chips, french fries",
"supercategory": "food"
},
{
"id": 1323,
"name": "parmesan",
"name_readable": "Parmesan",
"supercategory": "food"
},
{
"id": 2634,
"name": "beer",
"name_readable": "Beer",
"supercategory": "food"
},
{
"id": 1056,
"name": "avocado",
"name_readable": "Avocado",
"supercategory": "food"
},
{
"id": 1520,
"name": "bread-french-white-flour",
"name_readable": "Bread, French (white flour)",
"supercategory": "food"
},
{
"id": 1788,
"name": "chicken",
"name_readable": "Chicken",
"supercategory": "food"
},
{
"id": 1352,
"name": "soft-cheese",
"name_readable": "Soft cheese",
"supercategory": "food"
},
{
"id": 2498,
"name": "tea",
"name_readable": "Tea",
"supercategory": "food"
},
{
"id": 2711,
"name": "sauce-savoury",
"name_readable": "Sauce (savoury)",
"supercategory": "food"
},
{
"id": 2103,
"name": "honey",
"name_readable": "Honey",
"supercategory": "food"
},
{
"id": 1554,
"name": "bread-whole-wheat",
"name_readable": "Bread, whole wheat",
"supercategory": "food"
},
{
"id": 1556,
"name": "bread-sourdough",
"name_readable": "Bread, sourdough",
"supercategory": "food"
},
{
"id": 1307,
"name": "gruyere",
"name_readable": "Gruyère",
"supercategory": "food"
},
{
"id": 1060,
"name": "pickle",
"name_readable": "Pickle",
"supercategory": "food"
},
{
"id": 1220,
"name": "mixed-nuts",
"name_readable": "Mixed nuts",
"supercategory": "food"
},
{
"id": 2580,
"name": "water-mineral",
"name_readable": "Water, mineral",
"supercategory": "food"
}
]
#!/bin/bash
docker build -t $IMAGE_NAME
\ No newline at end of file
#!/usr/bin/env bash
PYTHON=${PYTHON:-"python"}
echo "Building roi align op..."
cd mmdet/ops/roi_align
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building roi pool op..."
cd ../roi_pool
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building nms op..."
cd ../nms
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building dcn..."
cd ../dcn
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building sigmoid focal loss op..."
cd ../sigmoid_focal_loss
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
echo "Building masked conv op..."
cd ../masked_conv
if [ -d "build" ]; then
rm -r build
fi
$PYTHON setup.py build_ext --inplace
# model settings
model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
albu_train_transforms = [
dict(
type='ShiftScaleRotate',
shift_limit=0.0625,
scale_limit=0.0,
rotate_limit=0,
interpolation=1,
p=0.5),
dict(
type='RandomBrightnessContrast',
brightness_limit=[0.1, 0.3],
contrast_limit=[0.1, 0.3],
p=0.2),
dict(
type='OneOf',
transforms=[
dict(
type='RGBShift',
r_shift_limit=10,
g_shift_limit=10,
b_shift_limit=10,
p=1.0),
dict(
type='HueSaturationValue',
hue_shift_limit=20,
sat_shift_limit=30,
val_shift_limit=20,
p=1.0)
],
p=0.1),
dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
dict(type='ChannelShuffle', p=0.1),
dict(
type='OneOf',
transforms=[
dict(type='Blur', blur_limit=3, p=1.0),
dict(type='MedianBlur', blur_limit=3, p=1.0)
],
p=0.1),
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='Pad', size_divisor=32),
dict(
type='Albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_labels'],
min_visibility=0.0,
filter_lost_elements=True),
keymap={
'img': 'image',
'gt_masks': 'masks',
'gt_bboxes': 'bboxes'
},
update_pad_shape=False,
skip_img_without_anno=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'],
meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
'pad_shape', 'scale_factor'))
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection
## Introduction
```
@article{zhang2019bridging,
title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection},
author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.},
journal = {arXiv preprint arXiv:1912.02424},
year = {2019}
}
```
## Results and Models
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50 | pytorch | 1x | 3.6 | 0.357 | 12.8 | 39.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/atss/atss_r50_fpn_1x_20200113-a7aa251e.pth)|
# model settings
model = dict(
type='ATSS',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
extra_convs_on_inputs=False,
num_outs=5),
bbox_head=dict(
type='ATSSHead',
num_classes=81,
in_channels=256,
stacked_convs=4,
feat_channels=256,
octave_base_scale=8,
scales_per_octave=1,
anchor_ratios=[1.0],
anchor_strides=[8, 16, 32, 64, 128],
target_means=[.0, .0, .0, .0],
target_stds=[0.1, 0.1, 0.2, 0.2],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(type='ATSSAssigner', topk=9),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.6),
max_per_img=100)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/atss_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]