Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • Archana_Badagi/food-round2
  • eric_a_scuccimarra/food-round2
  • joel_joseph/food-round2
  • darthgera123/food-round2
  • reshmarameshbabu/food-round2
  • nikhil_rayaprolu/food-round2
6 results
Show changes
Commits on Source (170)
Showing
with 1344 additions and 213 deletions
*.pth filter=lfs diff=lfs merge=lfs -text
File moved
......@@ -21,10 +21,33 @@ please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate y
### Python
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf).
We use the following tools for linting and formatting:
- [flake8](http://flake8.pycqa.org/en/latest/): linter
- [yapf](https://github.com/google/yapf): formatter
- [isort](https://github.com/timothycrosley/isort): sort imports
Style configurations of yapf and isort can be found in [.style.yapf](../.style.yapf) and [.isort.cfg](../.isort.cfg).
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
After you clone the repository, you will need to install initialize pre-commit hook.
```
pip install -U pre-commit
```
From the repository folder
```
pre-commit install
```
After this on every commit check code linters and formatter will be enforced.
>Before you create a PR, make sure that your code lints and is formatted by yapf.
### C++ and CUDA
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
\ No newline at end of file
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
blank_issues_enabled: false
......@@ -25,13 +25,11 @@ A placeholder for the command.
3. What dataset did you use?
**Environment**
- OS: [e.g., Ubuntu 16.04.6]
- GCC [e.g., 5.4.0]
- PyTorch version [e.g., 1.1.0]
- How you installed PyTorch [e.g., pip, conda, source]
- GPU model [e.g., 1080Ti, V100]
- CUDA and CUDNN version
- [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
1. Please run `python tools/collect_env.py` to collect necessary environment infomation and paste it here.
2. You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch [e.g., pip, conda, source]
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
**Error traceback**
If applicable, paste the error trackback here.
......
......@@ -116,5 +116,3 @@ data
*.log.json
work_dirs/
# Pytorch
*.pth
[isort]
line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet
known_third_party = mmcv,numpy,matplotlib,pycocotools,six,seaborn,terminaltables,torch,torchvision
known_third_party = Cython,asynctest,cv2,matplotlib,mmcv,numpy,pycocotools,robustness_eval,roi_align,roi_pool,seaborn,six,terminaltables,torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
\ No newline at end of file
default_section = THIRDPARTY
repos:
- repo: https://github.com/asottile/seed-isort-config
rev: v1.9.3
hooks:
- id: seed-isort-config
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
- id: isort
- repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.29.0
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
hooks:
- id: flake8
- id: trailing-whitespace
- id: check-yaml
- id: end-of-file-fixer
- id: requirements-txt-fixer
dist: xenial
dist: bionic # ubuntu 18.04
language: python
install:
- pip install isort flake8 yapf
python:
- "3.5"
- "3.6"
- "3.7"
env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1
cache: pip
# Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis
before_install:
- INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
- wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
- sudo dpkg -i ${INSTALLER}
- wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
- sudo apt-key add 7fa2af80.pub
- sudo apt update -qq
- sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
- sudo apt clean
- CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
- LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
- PATH=${CUDA_HOME}/bin:${PATH}
install:
- pip install Pillow==6.2.2 # remove this line when torchvision>=0.5
- pip install Cython torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2
- pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
- pip install -r requirements.txt
before_script:
- flake8 .
- isort -rc --check-only --diff mmdet/ tools/ tests/
- yapf -r -d --style .style.yapf mmdet/ tools/ tests/ configs/
script:
- flake8
- isort -rc --diff mmdet/ tools/
- yapf -r -d --style .style.yapf mmdet/ tools/
\ No newline at end of file
- python setup.py check -m -s
- python setup.py build_ext --inplace
- coverage run --source mmdet -m py.test -v --xdoctest-modules tests mmdet
after_success:
- coverage report
ARG PYTORCH="1.1.0"
ARG CUDA="10.0"
ARG CUDNN="7.5"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
build-essential \
bzip2 \
cmake \
curl \
git \
g++ \
libboost-all-dev \
pkg-config \
rsync \
software-properties-common \
sudo \
tar \
timidity \
unzip \
wget \
locales \
zlib1g-dev \
python3-dev \
python3 \
python3-pip \
python3-tk \
libjpeg-dev \
libpng-dev
# Python3
RUN pip3 install pip --upgrade
RUN pip3 install utm cython aicrowd_api timeout_decorator \
numpy \
aicrowd-repo2docker \
pillow
RUN pip3 install git+https://github.com/AIcrowd/coco.git#subdirectory=PythonAPI
RUN conda install cython -y && conda clean --all
RUN git clone --branch v1.0rc1 https://github.com/open-mmlab/mmdetection.git /mmdetection
WORKDIR /mmdetection
RUN pip install --no-cache-dir -e .
RUN python3.6 -m pip install aicrowd_api aicrowd-repo2docker
# Unicode support:
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
# Enables X11 sharing and creates user home directory
ENV USER_NAME aicrowd
ENV HOME_DIR /home/$USER_NAME
#
# Replace HOST_UID/HOST_GUID with your user / group id (needed for X11)
ENV HOST_UID 1000
ENV HOST_GID 1000
RUN export uid=${HOST_UID} gid=${HOST_GID} && \
mkdir -p ${HOME_DIR} && \
echo "$USER_NAME:x:${uid}:${gid}:$USER_NAME,,,:$HOME_DIR:/bin/bash" >> /etc/passwd && \
echo "$USER_NAME:x:${uid}:" >> /etc/group && \
echo "$USER_NAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USER_NAME && \
chmod 0440 /etc/sudoers.d/$USER_NAME && \
chown ${uid}:${gid} -R ${HOME_DIR}
USER ${USER_NAME}
WORKDIR ${HOME_DIR}
COPY . .
RUN sudo chown ${HOST_UID}:${HOST_GID} -R *
RUN sudo chmod 775 -R *
# food-recognition-challenge-mmdetection-baseline
![AIcrowd-Logo](https://raw.githubusercontent.com/AIcrowd/AIcrowd/master/app/assets/images/misc/aicrowd-horizontal.png)
# MMDetection
# Problem Statement
**News**: We released the technical report on [ArXiv](https://arxiv.org/abs/1906.07155).
The goal of this challenge is to train models which can look at images of food items and detect the individual food items present in them.
We provide a novel dataset of food images collected using the MyFoodRepo project where numerous volunteer Swiss users provide images of their daily food intake. The images have been hand labelled by a group of experts to map the individual food items to an ontology of Swiss Food items.
## Introduction
This is an evolving dataset, where we will release more data as the dataset grows in size.
The master branch works with **PyTorch 1.1** or higher.
![image1](https://i.imgur.com/zS2Nbf0.png)
mmdetection is an open source object detection toolbox based on PyTorch. It is
a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/).
# Baseline
MMdetection is an open source object detection toolbox based on PyTorch, with a large Model Zoo with many customised models that can be plugged and tested in with just a single config file modification. PYou can read more about it at: [mmdetection github](https://github.com/open-mmlab/mmdetection/)
![demo image](demo/coco_test_12510.jpg)
Follow the installation instructions as given in the above link.
# Installation
### Major features
Ensure you have `docker` and `nvidia-docker` installed by following the instructions here :
- **Modular Design**
* [Docker](https://docs.docker.com/install/)
* [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
**NOTE** : You do not need nvidia-docker if you do not want to use GPU when testing your submission locally
We decompose the detection framework into different components and one can easily construct a customized object detection framework by combining different modules.
[MMDetection Installation instructions](https://github.com/open-mmlab/mmdetection/blob/master/docs/INSTALL.md)
# Dataset
- **Support of multiple frameworks out of box**
The dataset for the [AIcrowd Food Recognition Challenge](https://www.aicrowd.com/challenges/food-recognition-challenge) is available at [https://www.aicrowd.com/challenges/food-recognition-challenge/dataset_files](https://www.aicrowd.com/challenges/food-recognition-challenge/dataset_files)
The toolbox directly supports popular and contemporary detection frameworks, *e.g.* Faster RCNN, Mask RCNN, RetinaNet, etc.
This dataset contains :
* `train-v0.2.tar.gz` : This is the Training Set of **7949** (as RGB images) food images, along with their corresponding annotations in [MS-COCO format](http://cocodataset.org/#home)
- **High efficiency**
* `val-v0.2.tar.gz`: This is the suggested Validation Set of **418** (as RGB images) food images, along with their corresponding annotations in [MS-COCO format](http://cocodataset.org/#home)
All basic bbox and mask operations run on GPUs now. The training speed is faster than or comparable to other codebases, including [Detectron](https://github.com/facebookresearch/Detectron), [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark) and [SimpleDet](https://github.com/TuSimple/simpledet).
* `test_images-v0.2.tar.gz` : This is the debug Test Set for Round-1, where you are provided the same images as the validation set.
- **State of the art**
To get started, we would advise you to download all the files, and untar them inside the `data/` folder of this repository, so that you have a directory structure like this :
The toolbox stems from the codebase developed by the *MMDet* team, who won [COCO Detection Challenge](http://cocodataset.org/#detection-leaderboard) in 2018, and we keep pushing it forward.
```bash
|-- data/
| |-- test_images/ (has all images for prediction)(**NOTE** : They are the same as the validation set images)
| |-- train/
| | |-- images (has all the images for training)
| | |__ annotation.json : Annotation of the data in MS COCO format
| | |__ annotation-small.json : Smaller version of the previous dataset
| |-- val/
| | |-- images (has all the images for training)
| | |__ annotation.json : Annotation of the data in MS COCO format
| | |__ annotation-small.json : Smaller version of the previous dataset
```
Apart from MMDetection, we also released a library [mmcv](https://github.com/open-mmlab/mmcv) for computer vision research, which is heavily depended on by this toolbox.
We are also assuming that you have already installed all the requirements for this notebook, or you can still install them by :
## License
# Usage
This project is released under the [Apache 2.0 license](LICENSE).
# Training with MMDetection:
Let us look at training MMDetection using Hybrid Task Cascade [HTC research paper](https://arxiv.org/abs/1901.07518).
## Updates
A score of AP_50 of 0.526 and AR_50 of 0.729, can be achieved with Hybrid Task Cascade of Resnet50 Backbone.
v1.0rc0 (27/07/2019)
- Implement lots of new methods and components (Mixed Precision Training, HTC, Libra R-CNN, Guided Anchoring, Empirical Attention, Mask Scoring R-CNN, Grid R-CNN (Plus), GHM, GCNet, FCOS, HRNet, Weight Standardization, etc.). Thank all collaborators!
- Support two additional datasets: WIDER FACE and Cityscapes.
- Refactoring for loss APIs and make it more flexible to adopt different losses and related hyper-parameters.
- Speed up multi-gpu testing.
- Integrate all compiling and installing in a single script.
MMDetection provides us with a config file especially for HTC, available at [HTC config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc)
v0.6.0 (14/04/2019)
- Up to 30% speedup compared to the model zoo.
- Support both PyTorch stable and nightly version.
- Replace NMS and SigmoidFocalLoss with Pytorch CUDA extensions.
Also make sure you have downloaded the training data to a subfolder of your project.
v0.6rc0(06/02/2019)
- Migrate to PyTorch 1.0.
Modify your config file and point your dataset variables to your data folder.
v0.5.7 (06/02/2019)
- Add support for Deformable ConvNet v2. (Many thanks to the authors and [@chengdazhi](https://github.com/chengdazhi))
- This is the last release based on PyTorch 0.4.1.
As given in [MMDetection Getting Started](https://github.com/open-mmlab/mmdetection/blob/master/docs/GETTING_STARTED.md),
You can use:
v0.5.6 (17/01/2019)
- Add support for Group Normalization.
- Unify RPNHead and single stage heads (RetinaHead, SSDHead) with AnchorHead.
python tools/train.py ${CONFIG_FILE}
to train the model on a single GPU or
v0.5.5 (22/12/2018)
- Add SSD for COCO and PASCAL VOC.
- Add ResNeXt backbones and detection models.
- Refactoring for Samplers/Assigners and add OHEM.
- Add VOC dataset and evaluation scripts.
./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
to train the model on multiple GPUs.
v0.5.4 (27/11/2018)
- Add SingleStageDetector and RetinaNet.
Make sure you have edited the config file to point to the dataset and also have made changes to the number of classes.
If you are going to use the dataloader from the mmdetection.
v0.5.3 (26/11/2018)
- Add Cascade R-CNN and Cascade Mask R-CNN.
- Add support for Soft-NMS in config files.
## Testing with MMDetection:
To test your results with MMDetection,
you can use the commands:
```
*single-gpu testing*
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show]
v0.5.2 (21/10/2018)
- Add support for custom datasets.
- Add a script to convert PASCAL VOC annotations to the expected format.
*multi-gpu testing*
./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
```
v0.5.1 (20/10/2018)
- Add BBoxAssigner and BBoxSampler, the `train_cfg` field in config files are restructured.
- `ConvFCRoIHead` / `SharedFCRoIHead` are renamed to `ConvFCBBoxHead` / `SharedFCBBoxHead` for consistency.
**Log Analysis**
## Benchmark and model zoo
The training logs can be analyzed using the plot_curve provided with the mmdetection:
Supported methods and backbones are shown in the below table.
Results and models are available in the [Model zoo](MODEL_ZOO.md).
```
import os
import matplotlib
%matplotlib inline
from tools.analyze_logs import plot_curve
matplotlib.rcParams['figure.figsize'] = [20, 10]
args = {
'keys':['segm_mAP_50'],
'legend':'segm_mAP_50',
'backend': None,
'json_logs': [os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json'],
'title': 'loss'
}
print(os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json')
plot_curve([os.getcwd()+'/work_dirs/htc_r50_fpn/20191206_105437.log.json'], args)
```
| | ResNet | ResNeXt | SENet | VGG | HRNet |
|--------------------|:--------:|:--------:|:--------:|:--------:|:-----:|
| RPN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Fast R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Faster R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Mask R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Cascade R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Cascade Mask R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| SSD | ✗ | ✗ | ✗ | ✓ | ✗ |
| RetinaNet | ✓ | ✓ | ☐ | ✗ | ✓ |
| GHM | ✓ | ✓ | ☐ | ✗ | ✓ |
| Mask Scoring R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| FCOS | ✓ | ✓ | ☐ | ✗ | ✓ |
| Grid R-CNN (Plus) | ✓ | ✓ | ☐ | ✗ | ✓ |
| Hybrid Task Cascade| ✓ | ✓ | ☐ | ✗ | ✓ |
| Libra R-CNN | ✓ | ✓ | ☐ | ✗ | ✓ |
| Guided Anchoring | ✓ | ✓ | ☐ | ✗ | ✓ |
Other features
- [x] DCNv2
- [x] Group Normalization
- [x] Weight Standardization
- [x] OHEM
- [x] Soft-NMS
- [x] Generalized Attention
- [x] GCNet
- [x] Mixed Precision (FP16) Training
## Installation
## Other Associated Notebooks
Please refer to [INSTALL.md](INSTALL.md) for installation and dataset preparation.
* [Dataset Utils](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb)
* [Import Dependencies](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Import-dependencies)
* [Configuration Variables](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Configuration-Variables)
* [Parsing Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Parsing-the-annotations)
* [Collecting and Visualizing Images](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Collecting-and-Visualizing-Images)
* [Understanding Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Understanding-Annotations)
* [Visualizing Annotations](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Visualizing-Annotations)
* [Advanced](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#Advanced)
* [Convert poly segmentation to rle](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#1.-Convert-poly-segmentation-to-rle)
* [Convert segmentation to pixel level masks](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Dataset%20Utils.ipynb#2.-Convert-segmentation-to-pixel-level-masks)
* [Random Submission](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/run.py)
* [Locally test the evaluation function](https://github.com/AIcrowd/food-recognition-challenge-starter-kit/blob/master/Local%20Evaluation.ipynb)
## Other Baselines
# Round 1
* [Colab Notebook for Data Analysis and Tutorial](https://colab.research.google.com/drive/1A5p9GX5X3n6OMtLjfhnH6Oeq13tWNtFO#scrollTo=ok54AWT_VoWV)
A notebook with data analysis on the Food Recognition Dataset and then a short tutorial on training with keras and pytorch. This lets you immediately jump onto the challenge and solve the challenge
### Pretrained Baselines
* [mmdetection (pytorch)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-pytorch-baseline)
* [matterport-maskrcnn (keras - tensorflow)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-recognition)
## Get Started
# Round 2
* [Colab Notebook for Data Analysis and Tutorial](https://colab.research.google.com/drive/1vXdv9quZ7CXO5lLCjhyz3jtejRzDq221)
A notebook with data analysis on the Food Recognition Dataset and then a short tutorial on training with keras and pytorch. This lets you immediately jump onto the challenge and solve the challenge
### Pretrained Baselines
* [mmdetection (pytorch)](https://gitlab.aicrowd.com/nikhil_rayaprolu/food-round2)
Please see [GETTING_STARTED.md](GETTING_STARTED.md) for the basic usage of MMDetection.
# Submission Instructions
## Contributing
To submit to the challenge you'll need to ensure you've set up an appropriate repository structure, create a private git repository at https://gitlab.aicrowd.com with the contents of your submission, and push a git tag corresponding to the version of your repository you'd like to submit.
We appreciate all contributions to improve MMDetection. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for the contributing guideline.
## Repository Structure
We have created this sample submission repository which you can use as reference.
## Acknowledgement
#### aicrowd.json
Each repository should have a aicrowd.json file with the following fields:
MMDetection is an open source project that is contributed by researchers and engineers from various colledges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
We wish that the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new detectors.
```
{
"challenge_id" : "aicrowd-food-recognition-challenge",
"grader_id": "aicrowd-food-recognition-challenge",
"authors" : ["aicrowd-user"],
"description" : "Food Recognition Challenge Submission",
"license" : "MIT",
"gpu": true
}
```
This file is used to identify your submission as a part of the Food Recognition Challenge. You must use the `challenge_id` and `grader_id` specified above in the submission. The `gpu` key in the `aicrowd.json` lets your specify if your submission requires a GPU or not. In which case, a NVIDIA-K80 will be made available to your submission when evaluation the submission.
#### Submission environment configuration
You can specify the software runtime of your code by modifying the included [Dockerfile](Dockerfile).
## Citation
#### Code Entrypoint
The evaluator will use `/home/aicrowd/run.sh` as the entrypoint. Please remember to have a `run.sh` at the root which can instantiate any necessary environment variables and execute your code. This repository includes a sample `run.sh` file.
If you use this toolbox or benchmark in your research, please cite this project.
### Local Debug
```
@article{mmdetection,
title = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark},
author = {Kai Chen, Jiaqi Wang, Jiangmiao Pang, Yuhang Cao, Yu Xiong, Xiaoxiao Li,
Shuyang Sun, Wansen Feng, Ziwei Liu, Jiarui Xu, Zheng Zhang, Dazhi Cheng,
Chenchen Zhu, Tianheng Cheng, Qijie Zhao, Buyu Li, Xin Lu, Rui Zhu, Yue Wu,
Jifeng Dai, Jingdong Wang, Jianping Shi, Wanli Ouyang, Chen Change Loy, Dahua Lin},
journal = {arXiv preprint arXiv:1906.07155},
year = {2019}
}
export TEST_IMAGES_PATH="../data/test_images" # or path to your local folder containing images
export IMAGE_NAME="aicrowd-food-recognition-challenge-submission"
./build.sh
./debug.sh
######################################
## NOTE :
##
## * If you do not wish to your a GPU when testing locally, please feel free to replace nvidia-docker with docker
##
## * If you want to test on images located at an alternate location, set the `TEST_IMAGES_PATH` environment variable accordingly before running `build.sh` and `debug.sh`.
```
### Submitting
To make a submission, you will have to create a private repository on [https://gitlab.aicrowd.com](https://gitlab.aicrowd.com).
You will have to add your SSH Keys to your GitLab account by following the instructions [here](https://docs.gitlab.com/ee/gitlab-basics/create-your-ssh-keys.html).
If you do not have SSH Keys, you will first need to [generate one](https://docs.gitlab.com/ee/ssh/README.html#generating-a-new-ssh-key-pair).
Then you can create a submission by making a *tag push* to your repository, adding the correct git remote and pushing to the remote:
```
git clone https://gitlab.aicrowd.com/nikhil_rayaprolu/food-round2
cd food-round2
# Add AICrowd git remote endpoint
git remote add aicrowd git@gitlab.aicrowd.com:<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline.git
git push aicrowd master
# Create a tag for your submission and push
git tag -am "submission-v0.1" submission-v0.1
git push aicrowd master
git push aicrowd submission-v0.1
# Note : If the contents of your repository (latest commit hash) does not change,
# then pushing a new tag will not trigger a new evaluation.
```
You now should be able to see the details of your submission at :
[gitlab.aicrowd.com/<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline/issues](gitlab.aicrowd.com/<YOUR_AICROWD_USER_NAME>/food-challenge-pytorch-baseline/issues)
## Using http instead of ssh (Personal Access Token):
In order to use http to clone repositories and submit on gitlab:
a) Create a personal access token
1. Log in to GitLab.
2. In the upper-right corner, click your avatar and select Settings.
3. On the User Settings menu, select Access Tokens.
4. Choose a name and optional expiry date for the token.
5. Choose the desired scopes.
6. Click the Create personal access token button.
7. Save the personal access token somewhere safe, lets call it XXX for now.
Once you leave or refresh the page, you won’t be able to access it again.
b) to clone a repo use the following command:
git clone [https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git](https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git)
c)submit a solution:
```
cd into your submission repo on gitlab
cd (repo_name)
#Add AICrowd git remote endpoint
git remote add aicrowd https://oauth2:XXX@gitlab.aicrowd.com/(username)/(repo_name).git
git push aicrowd master
# Create a tag for your submission and push
git tag -am "submission-v0.1" submission-v0.1
git push aicrowd master
git push aicrowd submission-v0.1
# Note : If the contents of your repository (latest commit hash) does not change,
# then pushing a new tag will not trigger a new evaluation.
```
**Best of Luck**
## Miscelaneous Resources
* [Convert Annotations from MS COCO format to PascalVOC format](https://github.com/CasiaFan/Dataset_to_VOC_converter/blob/master/anno_coco2voc.py)
## Credits
* Parts of the documentation for this baseline was taken from : https://github.com/AIcrowd/food-recognition-challenge-starter-kit
* and the baseline is built using MMDetection: https://github.com/open-mmlab/mmdetection/
## Contact
# Author
**[Nikhil Rayaprolu](nikhil@aicrowd.com)**
This repo is currently maintained by Kai Chen ([@hellock](http://github.com/hellock)), Jiangmiao Pang ([@OceanPang](https://github.com/OceanPang)), Jiaqi Wang ([@myownskyW7](https://github.com/myownskyW7)) and Yuhang Cao ([@yhcao6](https://github.com/yhcao6)).
\ No newline at end of file
{
"challenge_id" : "aicrowd-food-recognition-challenge",
"grader_id": "aicrowd-food-recognition-challenge",
"authors" : ["nikhil13prs"],
"description" : "Food Recognition Challenge Submission",
"license" : "MIT",
"gpu": true
}
#!/usr/bin/env python
import aicrowd_api
import os
########################################################################
# Instatiate Event Notifier
########################################################################
aicrowd_events = aicrowd_api.events.AIcrowdEvents()
def execution_start():
########################################################################
# Register Evaluation Start event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_started",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_started"
}
)
def execution_progress(progress_payload):
image_ids = progress_payload["image_ids"]
########################################################################
# Register Evaluation Progress event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_INFO,
message="execution_progress",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_progress",
"image_ids" : image_ids
}
)
def execution_success(payload):
predictions_output_path = payload["predictions_output_path"]
########################################################################
# Register Evaluation Complete event
########################################################################
expected_output_path = os.getenv("AICROWD_PREDICTIONS_OUTPUT_PATH", False)
if expected_output_path != predictions_output_path:
raise Exception("Please write the output to the path specified in the environment variable : AICROWD_PREDICTIONS_OUTPUT_PATH instead of {}".format(predictions_output_path))
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_SUCCESS,
message="execution_success",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_success",
"predictions_output_path" : predictions_output_path
},
blocking=True
)
def execution_error(error):
########################################################################
# Register Evaluation Complete event
########################################################################
aicrowd_events.register_event(
event_type=aicrowd_events.AICROWD_EVENT_ERROR,
message="execution_error",
payload={ #Arbitrary Payload
"event_type": "food_recognition_challenge:execution_error",
"error" : error
},
blocking=True
)
[
{
"id": 2578,
"name": "water",
"name_readable": "Water",
"supercategory": "food"
},
{
"id": 2939,
"name": "pizza-margherita-baked",
"name_readable": "Pizza, Margherita, baked",
"supercategory": "food"
},
{
"id": 1085,
"name": "broccoli",
"name_readable": "Broccoli",
"supercategory": "food"
},
{
"id": 1040,
"name": "salad-leaf-salad-green",
"name_readable": "Salad, leaf / salad, green",
"supercategory": "food"
},
{
"id": 1070,
"name": "zucchini",
"name_readable": "Zucchini",
"supercategory": "food"
},
{
"id": 2022,
"name": "egg",
"name_readable": "Egg",
"supercategory": "food"
},
{
"id": 2053,
"name": "butter",
"name_readable": "Butter",
"supercategory": "food"
},
{
"id": 1566,
"name": "bread-white",
"name_readable": "Bread, white",
"supercategory": "food"
},
{
"id": 1151,
"name": "apple",
"name_readable": "Apple",
"supercategory": "food"
},
{
"id": 2131,
"name": "dark-chocolate",
"name_readable": "Dark chocolate",
"supercategory": "food"
},
{
"id": 2521,
"name": "white-coffee-with-caffeine",
"name_readable": "White coffee, with caffeine",
"supercategory": "food"
},
{
"id": 1068,
"name": "sweet-pepper",
"name_readable": "Sweet pepper",
"supercategory": "food"
},
{
"id": 1026,
"name": "mixed-salad-chopped-without-sauce",
"name_readable": "Mixed salad (chopped without sauce)",
"supercategory": "food"
},
{
"id": 2738,
"name": "tomato-sauce",
"name_readable": "Tomato sauce",
"supercategory": "food"
},
{
"id": 1565,
"name": "bread-wholemeal",
"name_readable": "Bread, wholemeal",
"supercategory": "food"
},
{
"id": 2512,
"name": "coffee-with-caffeine",
"name_readable": "Coffee, with caffeine",
"supercategory": "food"
},
{
"id": 1061,
"name": "cucumber",
"name_readable": "Cucumber",
"supercategory": "food"
},
{
"id": 1311,
"name": "cheese",
"name_readable": "Cheese",
"supercategory": "food"
},
{
"id": 1505,
"name": "pasta-spaghetti",
"name_readable": "Pasta, spaghetti",
"supercategory": "food"
},
{
"id": 1468,
"name": "rice",
"name_readable": "Rice",
"supercategory": "food"
},
{
"id": 1967,
"name": "salmon",
"name_readable": "Salmon",
"supercategory": "food"
},
{
"id": 1078,
"name": "carrot",
"name_readable": "Carrot",
"supercategory": "food"
},
{
"id": 1116,
"name": "onion",
"name_readable": "Onion",
"supercategory": "food"
},
{
"id": 1022,
"name": "mixed-vegetables",
"name_readable": "Mixed vegetables",
"supercategory": "food"
},
{
"id": 2504,
"name": "espresso-with-caffeine",
"name_readable": "Espresso, with caffeine",
"supercategory": "food"
},
{
"id": 1154,
"name": "banana",
"name_readable": "Banana",
"supercategory": "food"
},
{
"id": 1163,
"name": "strawberries",
"name_readable": "Strawberries",
"supercategory": "food"
},
{
"id": 2750,
"name": "mayonnaise",
"name_readable": "Mayonnaise",
"supercategory": "food"
},
{
"id": 1210,
"name": "almonds",
"name_readable": "Almonds",
"supercategory": "food"
},
{
"id": 2620,
"name": "wine-white",
"name_readable": "Wine, white",
"supercategory": "food"
},
{
"id": 1310,
"name": "hard-cheese",
"name_readable": "Hard cheese",
"supercategory": "food"
},
{
"id": 1893,
"name": "ham-raw",
"name_readable": "Ham, raw",
"supercategory": "food"
},
{
"id": 1069,
"name": "tomato",
"name_readable": "Tomato",
"supercategory": "food"
},
{
"id": 1058,
"name": "french-beans",
"name_readable": "French beans",
"supercategory": "food"
},
{
"id": 1180,
"name": "mandarine",
"name_readable": "Mandarine",
"supercategory": "food"
},
{
"id": 2618,
"name": "wine-red",
"name_readable": "Wine, red",
"supercategory": "food"
},
{
"id": 1010,
"name": "potatoes-steamed",
"name_readable": "Potatoes steamed",
"supercategory": "food"
},
{
"id": 1588,
"name": "croissant",
"name_readable": "Croissant",
"supercategory": "food"
},
{
"id": 1879,
"name": "salami",
"name_readable": "Salami",
"supercategory": "food"
},
{
"id": 3080,
"name": "boisson-au-glucose-50g",
"name_readable": "Boisson au glucose 50g",
"supercategory": "food"
},
{
"id": 2388,
"name": "biscuits",
"name_readable": "Biscuits",
"supercategory": "food"
},
{
"id": 1108,
"name": "corn",
"name_readable": "Corn",
"supercategory": "food"
},
{
"id": 1032,
"name": "leaf-spinach",
"name_readable": "Leaf spinach",
"supercategory": "food"
},
{
"id": 2099,
"name": "jam",
"name_readable": "Jam",
"supercategory": "food"
},
{
"id": 2530,
"name": "tea-green",
"name_readable": "Tea, green",
"supercategory": "food"
},
{
"id": 1013,
"name": "chips-french-fries",
"name_readable": "Chips, french fries",
"supercategory": "food"
},
{
"id": 1323,
"name": "parmesan",
"name_readable": "Parmesan",
"supercategory": "food"
},
{
"id": 2634,
"name": "beer",
"name_readable": "Beer",
"supercategory": "food"
},
{
"id": 1056,
"name": "avocado",
"name_readable": "Avocado",
"supercategory": "food"
},
{
"id": 1520,
"name": "bread-french-white-flour",
"name_readable": "Bread, French (white flour)",
"supercategory": "food"
},
{
"id": 1788,
"name": "chicken",
"name_readable": "Chicken",
"supercategory": "food"
},
{
"id": 1352,
"name": "soft-cheese",
"name_readable": "Soft cheese",
"supercategory": "food"
},
{
"id": 2498,
"name": "tea",
"name_readable": "Tea",
"supercategory": "food"
},
{
"id": 2711,
"name": "sauce-savoury",
"name_readable": "Sauce (savoury)",
"supercategory": "food"
},
{
"id": 2103,
"name": "honey",
"name_readable": "Honey",
"supercategory": "food"
},
{
"id": 1554,
"name": "bread-whole-wheat",
"name_readable": "Bread, whole wheat",
"supercategory": "food"
},
{
"id": 1556,
"name": "bread-sourdough",
"name_readable": "Bread, sourdough",
"supercategory": "food"
},
{
"id": 1307,
"name": "gruyere",
"name_readable": "Gruyère",
"supercategory": "food"
},
{
"id": 1060,
"name": "pickle",
"name_readable": "Pickle",
"supercategory": "food"
},
{
"id": 1220,
"name": "mixed-nuts",
"name_readable": "Mixed nuts",
"supercategory": "food"
},
{
"id": 2580,
"name": "water-mineral",
"name_readable": "Water, mineral",
"supercategory": "food"
}
]
#!/bin/bash
docker build -t $IMAGE_NAME
\ No newline at end of file
# model settings
model = dict(
type='MaskRCNN',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=81,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=81,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
albu_train_transforms = [
dict(
type='ShiftScaleRotate',
shift_limit=0.0625,
scale_limit=0.0,
rotate_limit=0,
interpolation=1,
p=0.5),
dict(
type='RandomBrightnessContrast',
brightness_limit=[0.1, 0.3],
contrast_limit=[0.1, 0.3],
p=0.2),
dict(
type='OneOf',
transforms=[
dict(
type='RGBShift',
r_shift_limit=10,
g_shift_limit=10,
b_shift_limit=10,
p=1.0),
dict(
type='HueSaturationValue',
hue_shift_limit=20,
sat_shift_limit=30,
val_shift_limit=20,
p=1.0)
],
p=0.1),
dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),
dict(type='ChannelShuffle', p=0.1),
dict(
type='OneOf',
transforms=[
dict(type='Blur', blur_limit=3, p=1.0),
dict(type='MedianBlur', blur_limit=3, p=1.0)
],
p=0.1),
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='Pad', size_divisor=32),
dict(
type='Albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_labels'],
min_visibility=0.0,
filter_lost_elements=True),
keymap={
'img': 'image',
'gt_masks': 'masks',
'gt_bboxes': 'bboxes'
},
update_pad_shape=False,
skip_img_without_anno=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'],
meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',
'pad_shape', 'scale_factor'))
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection
## Introduction
```
@article{zhang2019bridging,
title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection},
author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.},
journal = {arXiv preprint arXiv:1912.02424},
year = {2019}
}
```
## Results and Models
| Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
|:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
| R-50 | pytorch | 1x | 3.6 | 0.357 | 12.8 | 39.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/atss/atss_r50_fpn_1x_20200113-a7aa251e.pth)|
# model settings
model = dict(
type='ATSS',
pretrained='torchvision://resnet50',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
extra_convs_on_inputs=False,
num_outs=5),
bbox_head=dict(
type='ATSSHead',
num_classes=81,
in_channels=256,
stacked_convs=4,
feat_channels=256,
octave_base_scale=8,
scales_per_octave=1,
anchor_ratios=[1.0],
anchor_strides=[8, 16, 32, 64, 128],
target_means=[.0, .0, .0, .0],
target_stds=[0.1, 0.1, 0.2, 0.2],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)))
# training and testing settings
train_cfg = dict(
assigner=dict(type='ATSSAssigner', topk=9),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.6),
max_per_img=100)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/atss_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -2,7 +2,7 @@
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained='modelzoo://resnet101',
pretrained='torchvision://resnet101',
backbone=dict(
type='ResNet',
depth=101,
......@@ -44,13 +44,8 @@ model = dict(
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
......@@ -62,13 +57,8 @@ model = dict(
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
......@@ -80,13 +70,8 @@ model = dict(
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0))
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
......@@ -189,13 +174,37 @@ test_cfg = dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
......@@ -203,35 +212,17 @@ data = dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......
......@@ -52,13 +52,8 @@ model = dict(
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='BBoxHead',
with_avg_pool=True,
......@@ -69,13 +64,8 @@ model = dict(
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
dict(
type='BBoxHead',
with_avg_pool=True,
......@@ -86,13 +76,8 @@ model = dict(
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0))
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
],
mask_roi_extractor=None,
mask_head=dict(
......@@ -191,49 +176,55 @@ test_cfg = dict(
score_thr=0.05,
nms=dict(type='nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
imgs_per_gpu=1,
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_label=False,
test_mode=True))
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
......