diff --git a/core/metrics b/core/metrics
deleted file mode 160000
index 3a3dc7eb623027d0081fadb7e43eeca3ddd9be8d..0000000000000000000000000000000000000000
--- a/core/metrics
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 3a3dc7eb623027d0081fadb7e43eeca3ddd9be8d
diff --git a/core/metrics/LICENSE b/core/metrics/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..b64064d04b00535f09ad9000d7d3613eff184715
--- /dev/null
+++ b/core/metrics/LICENSE
@@ -0,0 +1,3 @@
+Copyright 2020 Amazon.com, Inc. or its affiliates.
+
+https://cdla.dev/sharing-1-0/
diff --git a/core/metrics/README.md b/core/metrics/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..61fa8370c2e6d9dc4cfdd5512e6393dbdbe89597
--- /dev/null
+++ b/core/metrics/README.md
@@ -0,0 +1,51 @@
+__Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.__
+This package provides the evaluation code for PrimeAir airborne detection challenge 
+The driving script is ```run_airborne_metrics.py``` 
+```
+usage: run_airborne_metrics.py [-h] --dataset-folder DATASET_FOLDER
+                               --results-folder RESULTS_FOLDER
+                               [--summaries-folder SUMMARIES_FOLDER]
+                               [--min-score MIN_SCORE]
+                               [--min-track-len MIN_TRACK_LEN]
+                               [--log-level LOG_LEVEL]
+run_airborne_metrics.py: error: the following arguments are required: --dataset-folder/-d, --results-folder/-r
+```
+The metrics gets a folder with the dataset ground truth file or files (```--dataset-folder```) 
+and a folder with results (```--results-folder```)
+It evaluates all the results present in the result folder and saves the json files with evaluation 
+details into the summary folder (```--summaries-folder ``` if provided, otherwise ```summaries``` folder
+is created in ```--results-folder```)  
+Additional options is to perform filtering based on minimum detection score or/ and minimum track length
+by providing ```--min-score``` or/ and ```--min-track-len```accordingly 
+
+Before running the examples below you will need to place results in resulst folder (named results_example below)
+and groundtruth.csv (preferred over .json) in ground truth folder (named validation_gt) 
+
+For example:
+```
+cd challenge_metrics 
+python3.6 -m pip install .  # need to install once unless you make changes to the code
+python3.6 run_airborne_metrics.py -d ./validation_gt  -r ./results_example -s ./summaries 
+```
+will evaluate all the detections
+OR
+```
+python3.6 run_airborne_metrics.py -d ./validation_gt  -r ./results_example -s ./summaries --min-track-len 10
+```
+will use evaluate only detections that correspond to tracks with track_len of 10 and above (in on-line fashion)
+
+Detection results json file should contain detection records per image (img_name)  with the following fields:
+'img_name' - img_name as appears in the ground truth file
+'detections' - List[Dict]: with the following fields:
+    'n' - name of the class (typically airborne)
+    'x' - x coordinate  of the center of the bounding box
+    'y' - y of the center of the bounding box
+    'w' - width 
+    'h' - height
+    's' - score
+    'track_id' / 'object_id' - optional track or object id associated with the detection 
+Please see sample detection results file in ```results_example```
+
+The results will be found in:
+1) the results folder in a sub-folder named as results with 'metrics' suffix appended 
+2) the summaries folder you provided as input 
diff --git a/core/metrics/airborne_metrics/__init__.py b/core/metrics/airborne_metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/core/metrics/airborne_metrics/calculate_airborne_metrics.py b/core/metrics/airborne_metrics/calculate_airborne_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f9bd23f8ffc24d932752e33e3f4ecbbe48a85c3
--- /dev/null
+++ b/core/metrics/airborne_metrics/calculate_airborne_metrics.py
@@ -0,0 +1,883 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+"""This module computes airborne level metrics.
+The module assumes the following inputs available :
+1) Ground truth enriched with encounters data frame (artifact of calculate_encounters.py)
+2) Ground truth vs. Detection matches data frame (artifact of match_groundtruth_results.py)
+3) Below/Mixed/Above horizon indicator for encounter (Below == -1, Above == 1, Mixed  = (-1,1))
+The module saves an artifact that provides detection information for each encounter:
+1) The range at which the encounter was detected 
+2) The latency (in frames) it took to detect the encounter
+""" 
+
+import argparse
+import json
+import logging
+import os
+from functools import partial
+from collections import  Counter, OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from airborne_metrics.calculate_encounters import exclude_encounters_from_evaluation, DEFAULT_MIN_DETECTION_RANGE_M, ENC_END_RANGE_SCALE 
+from airborne_metrics.pandas_utils import is_in_df_columns 
+from airborne_metrics.script_utils import setup_logging
+
+RESULTS_NAME_PREFIX_ = 'airborne_metrics'
+DEFAULT_MIN_DETECTION_SCORE_ = 0.0 # minimum possible score of detection algorithm
+DEFAULT_MAX_DETECTION_SCORE_ = 1.0 # maximum possible score of detection algorithm
+DEFAULT_TEMPORAL_WINDOW_FOR_FL_DR_ = 30 # frames to consider for frame level detection rate calculation
+DEFAULT_MIN_FL_DR_ = 0.5 # that means that within 30 frames at least 15 should be detected
+DEFAULT_METRICS_VALUE_PRECISION_ = 5 # required for binary search for target value of target metrics
+SCORE_PRECISION_ = 0.00001 # required for binary search for target value of target metrics
+MINS_PER_FLIGHT = 2 # each flight has two minutes 
+MINS_TO_HOURS = 1 / 60 
+TARGET_FAR_PER_HOUR = 1 / 2 # 1 False Alarm per 2 hours
+RANGES_TO_DETECT = [300] # the metrics is reported for detections before 300 m
+FAST_DETECTION_WINDOW_ = 30 # frames - an encounter is detected fast if it is detected within 3 secs.
+
+# Frame level detection rate calculation constants
+DEFAULT_MIN_OBJECT_AREA = 200 # minimum object area for frame level 
+NOT_BIRD_QUERY = 'id.str.contains("Flock") == False and id.str.contains("Bird") == False'
+GT_ABOVE_AREA_QUERY = 'gt_area > {min_area}'
+GT_BELOW_AREA_QUERY = 'gt_area <= {min_area}'
+DEFAULT_OBJECT_OF_INTEREST_QUERY = GT_ABOVE_AREA_QUERY + ' and ' + NOT_BIRD_QUERY
+
+# QUERIES 
+HORIZON_QUERIES = {'Below Horizon': 'is_above_horizon == -1', 
+                   'Mixed': '-1 < is_above_horizon < 1',
+                   'Above Horizon': 'is_above_horizon == 1',
+                   'All': 'is_above_horizon == is_above_horizon'}
+FALSE_DETECTION_QUERY = 'gt_det_no_match == 1'
+POSITIVE_DETECTION_QUERY = 'gt_det_match == 1'
+PLANNED_INTRUDERS = 'range_distance_m == range_distance_m' # that is range is a number (not NaN)
+NON_PLANNED_INTRUDERS = 'range_distance_m != range_distance_m' # that is range is NaN
+NON_PLANNED_AIRCRAFT = NON_PLANNED_INTRUDERS + ' and ' + NOT_BIRD_QUERY
+
+
+log = logging.getLogger(__name__)
+
+def add_flags(parser):
+    """Utility function adding command line arguments to the parser"""
+    # input files 
+    parser.add_argument('--groundtruth-results-matches-filename', '-m', required=True, 
+                        help='Path to the ground truth and detection matches data frame in .csv format')
+    parser.add_argument('--encounters-with-groundtruth-filename', '-e', required=True, 
+                        help='Path to the ground truth enriched with encounters data frame in .csv format')
+    # output  
+    parser.add_argument('--output-dir-path', '-o',
+                        help='Desired folder to save the output data frame with '
+                        'match/no match between groundtruth and detections')
+    parser.add_argument('--results-name-prefix', type=str, default=RESULTS_NAME_PREFIX_,
+                        help='Prefix for results filename')
+    parser.add_argument('--save-intermediate-results', default=False, action='store_true',
+                        help='Specify this if saving intermediate data frame with encounters and '
+                        'corresponding moving frame level detection rate is needed')
+    # working point parameters 
+    parser.add_argument('--min-object-area', type=float, default=DEFAULT_MIN_OBJECT_AREA,
+                        help='The minimum object area for average frame level detection rate calculation')
+    parser.add_argument('--min-enc-range-upper-bound', type=float,  
+                        default=ENC_END_RANGE_SCALE * DEFAULT_MIN_DETECTION_RANGE_M,
+                        help='The minimum range of the encounter should be not less than this value, '
+                        'default is {}'.format(ENC_END_RANGE_SCALE * DEFAULT_MIN_DETECTION_RANGE_M))
+    parser.add_argument('--max-enc-range-lower-bound', type=float, 
+                        help='The maximum range of the encounter should be not less than this value')
+    parser.add_argument('--target-metrics', '-t', type=str, choices=['far', 'fppi', 'fl_dr'], 
+                       help='Provide metrics, FAR or FPPI or FL_DR (frame-level detection rate), '
+                       'to determine a working point. This is useful when comparing to other algorithms. '
+                       'If None is provided, detection score threshold (default = 0) will be used')
+    parser.add_argument('--target-value', '-f', type=float, 
+                       help='Provide the value for the expected target metrics (if chosen). '
+                       'The default target values is calculated if target metrics is FAR. '
+                       'If target metrics is FPPI or FL_DR and target values is None - error will be thrown')
+    parser.add_argument('--target-value-precision', type=int, default=DEFAULT_METRICS_VALUE_PRECISION_,
+                       help='Precision with which to calculate targeted value. Provide this value '
+                       'if you want the metrics to calculate the score based on specific target metrics')
+    parser.add_argument('--min-det-score', type=float, default=DEFAULT_MIN_DETECTION_SCORE_,
+                        help='Minimum possible detection score. Provide this value if you want '
+                       'the metrics to calculate the score for working point based on target metrics')
+    parser.add_argument('--max-det-score', type=float, default=DEFAULT_MAX_DETECTION_SCORE_,
+                        help='Maximum possible detection score. Provide this value if you want '
+                       'the metrics to calculate the score for working point based on target metrics')
+    parser.add_argument('--detection-score-threshold', '-s', type=float, 
+                        default=DEFAULT_MIN_DETECTION_SCORE_,
+                        help='Detection score threshold for working point')
+    # parameters 
+    parser.add_argument('--use-track-fl-dr', action='store_true',
+                        help='Setting up this flag will require the same track_id in detections that ' 
+                        'contribute to the encounter level detection rate calculation')
+    parser.add_argument('--fl-dr-temporal-win', type=int, default=DEFAULT_TEMPORAL_WINDOW_FOR_FL_DR_,  
+                        help='Temporal window for moving frame level detection rate')
+    parser.add_argument('--min-fl-dr', type=float, default=DEFAULT_MIN_FL_DR_,
+                         help='Minimum frame level detection rate within the temporal window')
+
+def _assert_non_negative(value, value_name):
+    """assertion helper"""
+    assert value >= 0.0, ('{} is expected to be non-negative'.format(value_name))
+
+def _assert_strictly_positive(value, value_name):
+    """assertion helper"""
+    assert value > 0.0, ('{} is expected to be strictly positive, but received {}'.format(
+                                                                        value_name, value))
+
+def check_flags(flags):
+    """Utility function to check the input"""
+    _assert_non_negative(flags.min_det_score, 'Minimum detection score')
+    _assert_non_negative(flags.max_det_score, 'Maximum detection score')
+    _assert_non_negative(flags.detection_score_threshold, 'Detection score threshold')
+    assert (flags.target_value is not None or flags.target_metrics is None 
+        or flags.target_metrics == 'far'), (
+                'If target-metrics is specified as fppi or fl_dr, target-value should be provided ')
+    if flags.target_value is not None:
+        _assert_non_negative(flags.target_value, 'Target value')
+        _assert_strictly_positive(flags.target_value_precision, 'Target value precision')
+    _assert_strictly_positive(flags.fl_dr_temporal_win, 
+                                'Temporal window for moving frame level detection rate')
+    assert 0 < flags.min_fl_dr <= 1.0, (
+                        'Minimum frame level detection rate should be in (0,1] range')
+    assert flags.output_dir_path is None or not os.path.isfile(flags.output_dir_path), (
+                                                 'Directory name is expected as output path')
+    assert (flags.groundtruth_results_matches_filename.endswith('.csv') and 
+            flags.encounters_with_groundtruth_filename.endswith('.csv')), (
+            'Unsupported file format, please provide .csv produced by calculate_encounters.py and '
+            'match_groundtruth_results.py')
+
+##########################################################
+# Frame-level FPPI and PD and strict FAR  calculation code
+##########################################################
+
+def _calc_num_no_match_detections(df_matches):
+    """helper to calculate number of not matched detection within the provided data frame of matches"""
+    log.info('Calculating the number of detections that did not match ground truth') 
+    if not is_in_df_columns(df_matches, ['detection_id']):
+        # this is an edge case when there are 0 detections
+        return 0
+    assert is_in_df_columns(df_matches, ['detection_id', 'gt_det_no_match']), (
+        'One or more of detection_id, gt_det_no_match columns is not found, cannot calculate')
+    # Grouping by 'detection_id' yields all the match results between the detection with 
+    # this specific id and a group of ground truth intruders that were evaluated for a match 
+    # (within the same frame).
+    # gt_det_no_match will be 1 if the detection does not match the specific intruder
+    # if minimum of 'gt_det_no_match' over the group of intruders equals 1, it means that the 
+    # detection did not match any of those intruders, hence it is a false detection    
+    df_detections = df_matches[['detection_id', 'gt_det_no_match']].groupby(
+                                            'detection_id')['gt_det_no_match'].min().reset_index(0)
+    number_of_detections = df_matches['detection_id'].nunique()
+    assert number_of_detections == len(df_detections), ('something went wrong with grouping detections, '
+                        'expected {}, but got {}'.format(number_of_detections, len(df_detections)))
+
+    num_no_match_detections = len(df_detections.query(FALSE_DETECTION_QUERY)) 
+    log.info('No match calculation: Number of detections without a match = %d out of %d '
+            'unique detections', num_no_match_detections, number_of_detections)
+    return num_no_match_detections
+
+def _calc_num_unique_track_ids_with_no_match_detection(df_matches):
+    """helper to calculate number of unique tracks that correspond to at least one not matched detection"""
+    log.info('Calculating the number of unique tracks ids that that correspond to at least one '
+            'not matched detection') 
+    if not is_in_df_columns(df_matches, ['detection_id']):
+        # this is an edge case when there are 0 detections
+        return 0
+    required_cols = ['flight_id', 'detection_id', 'track_id', 'gt_det_no_match']
+    assert is_in_df_columns(df_matches, required_cols), (
+        'One or more of detection_id, track_id, gt_det_no_match columns is not found, cannot calculate')
+    
+    # Grouping by 'detection_id' yields all the match results between the detection with 
+    # this specific id and a group of ground truth intruders that were evaluated for a match 
+    # (within the same frame).
+    # gt_det_no_match will be 1 if the detection does not match the specific intruder
+    # if minimum of 'gt_det_no_match' over the group of intruders equals 1, it means that the 
+    # detection did not match any of those intruders, hence it is a false detection 
+    # Only unique track_ids that correspond to false detection are counted    
+    df_detections = df_matches[required_cols].groupby('detection_id')[
+                ['flight_id', 'track_id', 'gt_det_no_match']].agg({
+                'flight_id':'first', 'track_id': 'first', 'gt_det_no_match': 'min'}).reset_index(0)
+    num_false_tracks = df_detections.query(FALSE_DETECTION_QUERY).groupby(['flight_id', 'track_id']).ngroups
+    log.info('Number of unique track_ids that correspond to at least one false detection %d', 
+                                                                                    num_false_tracks)
+    return num_false_tracks
+
+def _filter_matches_based_on_detection_score(df_matches, min_score):
+    """helper to filter data frame of matches based on detection score"""
+    min_score = min_score if min_score is not None else 0    
+    if min_score < 0:
+        raise ValueError('min_score should be positive or zero or None')
+    elif min_score > 0:
+        log.info('Filtering score threshold = %.3f', min_score) 
+        assert is_in_df_columns(df_matches, ['s']), 's (score) column is not found, cannot filter'
+        df_matches_filtered = df_matches.query('s >= {}'.format(min_score))
+        return df_matches_filtered
+    return df_matches
+
+def compute_false_positives_per_image(df_matches, total_frames_processed=None, min_score=None):
+    """Compute FPPI based on a data frame of matches - useful for frame-level metrics 
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results 
+                (typically this is an artifact of match_groundtruth_results.py)
+        total_frames_processed: int, number of processed flights if different from the number of unique
+                frames ('img_name') in the provided df_matches
+        min_score: float, minimum detection score to consider for evaluation 
+    Returns:
+        float, false positives per image
+    """
+    # determine how many images were processed 
+    if total_frames_processed is not None and total_frames_processed <= 0:
+        raise ValueError('total_frames_processed should be strictly positive')
+    if total_frames_processed is None:      
+        log.info('FPPI calculation: Using unique image names in the provided data frame to calculate '
+                                                                'total number of processed frames')
+        assert is_in_df_columns(df_matches, ['img_name']), 'img_name column is not found, cannot calculate'
+        total_frames_processed = df_matches['img_name'].nunique()
+    log.info('FPPI calculation: Total number of processed frames is %d', total_frames_processed)
+    df_matches = _filter_matches_based_on_detection_score(df_matches, min_score)
+    fppi = _calc_num_no_match_detections(df_matches) / total_frames_processed
+    log.info('FPPI = %.5f', fppi)
+    return fppi
+
+def compute_false_alarms_per_hour(df_matches, total_flights_processed=None, min_score=None):
+    """Compute strict FAR based on a data frame of matches, based on the following definition
+    Overall False Alarm Rate (strict FA) - a number of unique reported track ids, 
+    which correspond to at least one false positive cluster, divided by total number of hours 
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results 
+                (typically this is an artifact of match_groundtruth_results.py)
+        total_flights_processed: int, number of processed flights if different from the number of unique
+                flights ('flight_id') in the provided df_matches
+        min_score: float, minimum detection score to consider for evaluation 
+    Returns:
+        float, false alarms per hour
+    """
+    # determine how many images were processed 
+    if total_flights_processed is not None and total_flights_processed <= 0:
+        raise ValueError('total_flights_processed should be strictly positive')
+    if total_flights_processed is None:      
+        log.info('FAR calculation: Using unique flight ids in the provided data frame to calculate '
+                                                                'total number of processed flights')
+        assert is_in_df_columns(df_matches, ['flight_id']), 'flight_id column not found, cannot calculate'
+        total_flights_processed = df_matches['flight_id'].nunique()
+    total_hours_processed = total_flights_processed * MINS_PER_FLIGHT * MINS_TO_HOURS
+    _assert_strictly_positive(total_flights_processed, 'Total processed hours')
+    log.info('FAR calculation: Total number of processed flights is %d', total_flights_processed)    
+    log.info('FAR calculation: Total number of processed hours is %.3f', total_hours_processed)
+   
+    df_matches = _filter_matches_based_on_detection_score(df_matches, min_score)
+
+    num_false_tracks =_calc_num_unique_track_ids_with_no_match_detection(df_matches)
+    far = num_false_tracks / total_hours_processed
+    log.info('FAR = %.5f', far)
+    return far
+
+def _calc_num_detected_intruders(df_matches):
+    """helper to calculate number of detected intruders"""
+    log.info('Calculating the number of intruders that were matched by detections') 
+    if not is_in_df_columns(df_matches, ['detection_id']):
+        # this is an edge case when there are 0 detections
+        return 0
+    assert is_in_df_columns(df_matches, ['detection_id', 'id', 'gt_det_match']), (
+        'One or more of detection_id, id, gt_det_match columns is not found, cannot calculate')
+    
+    # When grouping by 'img_name', 'id' we get all the match results between the specific 
+    # intruder/object (within specific frame) and a group of detections that were evaluated for a match 
+    # "gt_det_match" will be 1 if a detection does match the specific intruder
+    # if maximum of 'gt_det_match' over the group of detections equals 1, it means that the at least
+    # one detection matched the intruder and hence the intruder is detected 
+    df_intrudes = df_matches[['img_name', 'id', 'gt_det_match']].groupby(
+                                    ['img_name', 'id'])['gt_det_match'].max().reset_index(0)
+    num_detected_intruders = len(df_intrudes.query(POSITIVE_DETECTION_QUERY)) 
+    log.info('Detected intruders calculation: Number of detected intruders = %d ', num_detected_intruders)
+    return num_detected_intruders
+
+def compute_probability_of_detection(df_matches, min_score=None):
+    """Compute frame-level PD of valid intruders - useful for frame-level metrics.
+    This function does NOT assume planned intruders 
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results 
+                (typically this is an artifact of match_groundtruth_results.py)
+        min_score: float, minimum detection score to consider for evaluation 
+    Returns:
+        float, frame-level probability of detection
+    """
+    tot_objects_to_detect = df_matches.groupby(['img_name', 'id']).ngroups
+    if tot_objects_to_detect == 0:
+        return 0
+    log.info('PD calculation: Number of intruders to detect = %d', tot_objects_to_detect)
+
+    df_matches = _filter_matches_based_on_detection_score(df_matches, min_score)
+
+    # each intruder/object is identified by img_name and id 
+    # gt_det_match will be 1 if there is match between some detection and this object
+    # grouping by 'img_name', 'id' will create a group with all the matches to all the detections
+    # in this frame (img_name) for this specific object 
+    # if there is at least one detection that matches this objects the maximum of gt_det_match equals 1
+    num_matched_objects = _calc_num_detected_intruders(df_matches) 
+    pd = num_matched_objects / tot_objects_to_detect
+    log.info('PD = %.3f = %d / %d', pd, num_matched_objects, tot_objects_to_detect)
+    return pd, num_matched_objects, tot_objects_to_detect
+
+def _get_planned_intruders_in_range(df_matches, min_range, max_range=None):
+    """helper to get only planned intruders in range"""
+    if max_range is not None:
+        assert is_in_df_columns(df_matches, ['range_distance_m']), (
+                            'range_distance_m column is not found - cannot filter based on range')
+        df_matches_in_range = df_matches.query(
+                                        '{} <= range_distance_m <= {}'.format(min_range, max_range))
+    else:
+        # evaluate against planed intruders only
+        df_matches_in_range = df_matches.query('range_distance_m != range_distance_m')
+    return df_matches_in_range
+
+def compute_probability_of_detection_of_planned_intruders(df_matches, max_range=None, min_range=0,
+                                                         min_score=None):
+    """Compute frame-level PD of PLANNED intruders - useful for frame-level metrics.
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results 
+                (typically this is an artifact of match_groundtruth_results.py)
+        max_range: float, maximum range of intruder to consider for evaluation, if not provided all
+                planned intruders with valid range are used 
+        min_range: float, minimum range of intruder to consider for evaluation, if not provided all
+                planned intruders with valid range are used 
+        min_score: float, minimum detection score to consider for evaluation 
+    Returns:
+        float, probability of detection
+    """
+    log.info('PD calculation: Intruders Range =  [%.1f, %.1f]', min_range, max_range)
+    df_matches_in_range = _get_planned_intruders_in_range(df_matches, min_range, max_range)  
+    return compute_probability_of_detection(df_matches_in_range, min_score)
+
+def compute_probability_of_detection_small_objects(df_matches, min_area=DEFAULT_MIN_OBJECT_AREA, 
+                                    min_score=None, obj_query = DEFAULT_OBJECT_OF_INTEREST_QUERY):
+    """Compute frame-level PD of PLANNED intruders - useful for frame-level metrics.
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results 
+                (typically this is an artifact of match_groundtruth_results.py)
+        min_area: float, minimum area of intruder to consider for evaluation, default is 300
+        min_score: float, minimum detection score to consider for evaluation 
+    Returns:
+        float, probability of detection
+
+    """
+    aircraft_area_query = obj_query.format(min_area=min_area)
+    log.info('PD calculation: %s', aircraft_area_query)
+    df_matches_area = df_matches.query(aircraft_area_query)
+    return compute_probability_of_detection(df_matches_area, min_score)
+
+##########################################################
+# Airborne-level PD calculation code
+##########################################################
+
+def get_valid_encounters(df_encounters):
+    return df_encounters.query('is_valid_encounter == True')
+
+def combine_encounter_with_gt_det_matches(df_matches, df_encounters):
+    """Combines two data frames based on encounter identifier
+    Parameters:
+        df_matches: pd.DataFrame, data frame of matches between ground truth and detection results
+        df_encounters: pd.DataFrame, data frame with ground truth and and encounter info
+    Returns:
+        pd.DataFrame, combined data frame
+    """
+    cols_to_combine_on = ['flight_id', 'img_name', 'frame', 'id']
+    required_columns_str = ', '.join(cols_to_combine_on) 
+    assert is_in_df_columns(df_matches, cols_to_combine_on), (
+        'One or more out of {} columns is not found in data frame of matches, '
+                                'cannot combine'.format(required_columns_str))
+    assert is_in_df_columns(df_encounters, cols_to_combine_on), (
+        'One or more out of {} columns is not found in data frame of encounters, '
+                                'cannot combine'.format(required_columns_str))
+    df = df_encounters.merge(df_matches, on=cols_to_combine_on, 
+            how='left', suffixes=['_orig', '']).sort_values(by =['encounter_id','img_name', 'frame'])
+    return df
+
+def augment_with_moving_frame_level_detection_rate_per_encounter(df_matches, temporal_window):
+    """adds moving frame level detection rate per encounter, based on the provided temporal_window"""
+
+    required_columns_str = ', '.join(['encounter_id', 'gt_det_match']) 
+    assert is_in_df_columns(df_matches, ['encounter_id', 'gt_det_match']), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+    df_matches = df_matches.fillna(value={'gt_det_match': 0})
+    df_with_moving_fl_dr = df_matches.groupby(['encounter_id'])['gt_det_match'].rolling(
+                                                temporal_window).apply(np.mean).reset_index()
+    df_with_moving_fl_dr = df_with_moving_fl_dr.rename(columns={'gt_det_match': 'fl_dr'})
+    return df_with_moving_fl_dr
+
+def augment_with_moving_most_common_track_id_count_per_encounter(df_matches, temporal_window):
+    """adds moving frame level detection rate per encounter, based on the provided temporal_window"""
+
+    required_columns_str = ', '.join(['encounter_id', 'matched_track_id']) 
+    assert is_in_df_columns(df_matches, ['encounter_id', 'matched_track_id']), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+    
+    def get_most_common_freq(all_track_ids): 
+        all_track_ids_list = []
+        for element in all_track_ids:
+            all_track_ids_list.extend(element)
+        track_id_counter = Counter(all_track_ids_list)
+        most_common = track_id_counter.most_common(2) # taking 2 most common track ids 
+                                        # 2 because one can be -1, and is not counted for detection
+        if  most_common[0][0] != -1:
+            most_common_freq = (most_common[0][1] / temporal_window) 
+        else:
+            if len(most_common) > 1:
+                most_common_freq = (most_common[1][1] / temporal_window) 
+            else:
+                most_common_freq = 0
+        return most_common_freq
+
+    def my_rolling_apply_char(frame, window, func):
+        index = frame.index
+        values = [0 if i + 1 - window < 0 else func(frame.iloc[i + 1 - window : i + 1]) for 
+                                                                        i in range(0, len(frame))]
+        return pd.DataFrame(data={'track_fl_dr': values}, index=index).reindex(frame.index)
+
+    df_same_track_id_count = df_matches.groupby(['encounter_id'])
+    
+    df_matched_track_freq = pd.DataFrame(columns=['encounter_id', 'track_fl_dr'])
+    for encounter_name, group in df_same_track_id_count: 
+        df_res = my_rolling_apply_char(group['matched_track_id'], temporal_window, get_most_common_freq)
+        df_res = df_res.assign(encounter_id = encounter_name)
+        df_matched_track_freq = df_matched_track_freq.append(df_res)
+    df_matched_track_freq.index.name = 'frame'
+    return df_matched_track_freq.reset_index()
+
+def augment_with_diff_to_first_frame(df_encounters):
+    """adds difference in frame between each frame in encounter and the first frame of the encounter"""
+    required_columns_str = ', '.join(['frame', 'framemin']) 
+    assert is_in_df_columns(df_encounters, ['frame', 'framemin']), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+
+    diff_to_first_frame = df_encounters['frame'] - df_encounters['framemin']
+    df_encounters = df_encounters.assign(delta_to_min_frame = diff_to_first_frame)
+    return df_encounters
+
+def augment_with_detection_info(df_encounters_info, fl_dr_thresh, use_track_fl_dr=False):
+    """adds maximum moving frame level detection rate to each encounter and if it its above
+    or equal to the provided threshold the detection range and latency are added""" 
+   
+    fl_dr_col = 'fl_dr'
+    if use_track_fl_dr:
+        fl_dr_col = 'track_' + fl_dr_col
+    def calc_detection_info(df):
+        detection_range = np.nan
+        det_latency = np.nan
+        max_fl_dr = df[fl_dr_col].max() 
+        if max_fl_dr >= fl_dr_thresh:
+            first_index_above_thresh = df[df[fl_dr_col].ge(fl_dr_thresh, fill_value=0)].index[0]
+            detection_range = df['range_distance_m'][first_index_above_thresh]
+            det_latency = df['delta_to_min_frame'][first_index_above_thresh]
+        return pd.Series(data=[max_fl_dr, detection_range, det_latency], 
+                        index=['max_fl_dr', 'det_range_m', 'det_latency_frames'])
+    required_cols = ['encounter_id', 'range_distance_m', 'delta_to_min_frame', fl_dr_col]
+    required_columns_str = ', '.join(required_cols) 
+    assert is_in_df_columns(df_encounters_info, required_cols), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+    df_enc_det_info = df_encounters_info.groupby(['encounter_id'])[
+                ['range_distance_m', fl_dr_col, 'delta_to_min_frame']].apply(calc_detection_info)    
+    return df_enc_det_info
+
+def compute_moving_frame_level_detection_rate_per_encounter(df_matches, df_val_encounters, 
+                                                min_score, fl_dr_temporal_win, use_track_fl_dr=False):
+    """Computes moving frame level detection rate per encounter. The detection matches 
+    are counted within the provided fl_dr_temporal_win, which slides across the frames that belong
+    to the encounter. Detections with score less then min_score are filtered. 
+    The detection rate is calculated only for valid encounters.
+    """
+    required_cols = ['s', 'flight_id', 'img_name', 'frame', 'id', 'gt_det_match']
+    if use_track_fl_dr:
+        required_cols += ['track_id']
+    required_columns_str = ', '.join(required_cols) 
+    assert is_in_df_columns(df_matches, required_cols), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+    assert is_in_df_columns(df_val_encounters, ['encounter_id']), (
+        'encounter_id column is not found, cannot augment')
+    
+    def flatten(list_of_lists):
+        if len(list_of_lists) == 0:
+            return list_of_lists
+        if isinstance(list_of_lists[0], list):
+            return flatten(list_of_lists[0]) + flatten(list_of_lists[1:])
+        return list_of_lists[:1] + flatten(list_of_lists[1:]) 
+    
+    log.info('Thresholding score')
+    below_thresh_det = df_matches['s'] < min_score
+    df_matches.loc[below_thresh_det, 'gt_det_match'] =  0 
+    df_intruders_matches = df_matches.groupby(
+            ['flight_id', 'img_name', 'frame', 'id'], as_index=False)['gt_det_match'].max()
+    
+    num_encounters =  df_val_encounters['encounter_id'].nunique()
+    log.info('Number of encounters to detect %d', num_encounters)
+    df_val_encounters = augment_with_diff_to_first_frame(df_val_encounters)
+    log.info('Combining encounters with results')
+
+    df = combine_encounter_with_gt_det_matches(df_intruders_matches, df_val_encounters)
+    
+    log.info('Grouping data frame with matches to getdetection matches per encounter')
+    df_encounters_with_frame_matches = df.groupby(
+                                    ['encounter_id','frame'])['gt_det_match'].max().reset_index(0)
+    log.info('Augmenting with moving frame level detection rate, this might take some time')
+    df_with_moving_fl_dr = augment_with_moving_frame_level_detection_rate_per_encounter(
+                                        df_encounters_with_frame_matches, fl_dr_temporal_win)
+    
+    log.info('Merge frame_level detection rate ')
+    df = df.merge(df_with_moving_fl_dr, on=['encounter_id','frame'], how='left')    
+
+    if use_track_fl_dr:
+        df_matches = df_matches.assign(matched_track_id = [track_id if is_match else  -1 
+                for track_id, is_match in zip(df_matches['track_id'], df_matches['gt_det_match'])])
+        log.info('Grouping data frame with matches to get matched track_ids per frame and object')
+        df_matched_track_ids = df_matches.groupby(['flight_id', 'img_name', 'frame', 'id'], 
+                                as_index=False)['matched_track_id'].agg(lambda x: flatten(list(x)))
+
+        df2 = combine_encounter_with_gt_det_matches(df_matched_track_ids, df_val_encounters)
+        log.info('Grouping data frame with matches to get matched track_ids per encounter and frame')
+        df_encounters_with_matched_track_ids = df2.groupby(
+            ['encounter_id','frame'])['matched_track_id'].agg(lambda x: flatten(list(x))).reset_index(0)
+        df_with_track_id_count = augment_with_moving_most_common_track_id_count_per_encounter(
+                                            df_encounters_with_matched_track_ids, fl_dr_temporal_win)
+        df2 = df2.merge(df_with_track_id_count, on=['encounter_id','frame'], how='left')
+        df = df.merge(df2[['encounter_id','frame', 'matched_track_id', 'track_fl_dr']], 
+                                            on=['encounter_id','frame'], how='left')
+    # asserting correctness of detection rate calculation based on track_id compared to regular fl_dr
+    assert is_in_df_columns(df, ['fl_dr', 'track_fl_dr']), 'fl_dr or track_fl_dr not found'
+    df_wrong = df.query('fl_dr < track_fl_dr') 
+    assert len(df_wrong) == 0, 'track frame level detection rate is wrong'  
+    return df
+
+def get_encounter_frame_level_info(df):
+    """Provides basic information about encounters:
+    encounter_id - encounter id 
+    flight_id - which flight it belongs to 
+    framemin - first frame of the encounter 
+    framemax - last frame of the encounter 
+    tot_num_frames - total number of frames in encounters (without gaps)
+    num_matched_frames  - number of frames within encounter with matched ground truth
+    is_above_horizon - 1 - above horizon, 0 - below horizon, any value in (-1, 1) is mixed 
+    """
+    required_cols = ['encounter_id', 'flight_id', 'framemin', 'framemax','framecount', 'gt_det_match', 
+                    'frame', 'is_above_horizon']
+    required_columns_str = ', '.join(required_cols) 
+    assert is_in_df_columns(df, required_cols), (
+        'One or more out of {} columns is not found, cannot augment'.format(required_columns_str))
+    
+    df_partial= df[required_cols]
+    df_encounter_frame_level_info = df_partial.groupby('encounter_id').agg(
+                {'flight_id': 'first', 'framemin': 'first', 'framemax': 'first', 'framecount': 'first', 
+                'frame': 'count', 'gt_det_match': 'sum', 'is_above_horizon': 'mean'}).reset_index(0)    
+
+    df_encounter_frame_level_info = df_encounter_frame_level_info.rename(columns={
+                                                            'frame': 'tot_num_frames', 
+                                                            'gt_det_match': 'num_matched_frames'})
+    assert len(df_encounter_frame_level_info.query('tot_num_frames != framecount')) == 0, (
+            'something went wrong frame counts do not agree')
+    
+    return df_encounter_frame_level_info
+
+def _augment_with_is_encounter_detected_before_range(df_encounter_detections, 
+                                                    temporal_win, ranges_to_detect):
+    """ helper to figure out if encounter is detected based on detection range and latency"""
+    is_detected_fast = df_encounter_detections['det_latency_frames'] < FAST_DETECTION_WINDOW_ #frames
+    for range_to_detect in ranges_to_detect:
+        is_detected_before_this_range = df_encounter_detections['det_range_m'] >= range_to_detect
+        df_encounter_detections = df_encounter_detections.assign(detected_before = 
+                                1 * np.logical_or(is_detected_before_this_range, is_detected_fast))
+        df_encounter_detections = df_encounter_detections.rename(columns={'detected_before':
+                                                    'detected_before_{}'.format(range_to_detect)})
+    return df_encounter_detections
+
+
+def compute_encounter_detections(df_enc_with_fl_dr, min_frame_level_dr, temporal_win, 
+                                    ranges_to_detect=RANGES_TO_DETECT, use_track_fl_dr=False):
+    """Computes if encounter was detected based on provided function is_encounter_detected_func
+    Params:
+        df: pd.DataFrame, data frame with encounters and matches of detection to intruders info
+        min_frame_level_dr: float, minimum frame level rate required for encounter
+        ranges_to_detect: List[float], list of ranges at which to evaluate detection
+    Returns:
+        pd.DataFrame of encounters with information on detection per encounter   
+    """ 
+    assert is_in_df_columns(df_enc_with_fl_dr, ['encounter_id']), ('encounter_id not in data frame, '
+                                                    'cannot calculate encounter detections')
+    log.info('Checking if encounters were detected')
+    df_encouter_detection_info = augment_with_detection_info(df_enc_with_fl_dr, min_frame_level_dr, 
+                                                                                use_track_fl_dr)
+    df_encounter_frame_level_info = get_encounter_frame_level_info(df_enc_with_fl_dr)      
+    df_encounter_info = df_encounter_frame_level_info.merge(df_encouter_detection_info, 
+                                                            on='encounter_id', how='left')
+    df_encounter_detections = _augment_with_is_encounter_detected_before_range(df_encounter_info, 
+                                                                    temporal_win, ranges_to_detect)
+    return df_encounter_detections
+
+def search_score_for_target_func(min_score, max_score, target_func, target_func_val, 
+                                target_func_val_precision):
+    """This function performs a search for a score that receives a certain value of the provided 
+    target function. The search is done using binary search. There is an assumption that the function
+    is monotonical with respect to the scores. The direction is determined based on minimum and middle
+    scores
+    Parameters:
+        min_score: float, minimal score possible
+        max_score: float, maximal score possible
+        target_func: function handler, there is an assumption that the function is monotonical vs. score
+        target_func_val: float, expected value of the function, such that
+             target_func_val @ output_score = target_func_val
+        target_func_val_precision: int, since the search is done over float outputs of target_func_val
+            there is a need to define a precision at which we compare the results
+    Returns:
+        float, output_score such that target_func_val @ output_score = target_func_val 
+        Note that if target_func_val is not reachable the output_score will match the closes function value
+    """
+    min_s = min_score
+    max_s = max_score
+    min_s_func_val = target_func(min_score=min_s)
+    mid_s = min_s + (max_s - min_s) / 2
+    mid_s_func_val = round(target_func(min_score=mid_s), target_func_val_precision)
+    if mid_s_func_val <= min_s_func_val:
+        move_min_to_mid = lambda mid, target: mid > target
+    else:
+        move_min_to_mid = lambda mid, target: mid < target
+    while max_s - min_s > SCORE_PRECISION_:
+        if mid_s_func_val == target_func_val:
+            return mid_s
+        elif move_min_to_mid(mid_s_func_val, target_func_val):
+            min_s = mid_s
+        else:
+            max_s = mid_s
+        mid_s = min_s + (max_s - min_s) / 2
+        mid_s_func_val = round(target_func(min_score=mid_s), target_func_val_precision)
+    return max_s
+    
+def get_working_point_based_on_metrics(df_matches, target_metrics, target_value, target_value_precision,
+                                       min_det_score, max_det_score, max_range):
+    """Determines the score to threshold detections.
+    Parameters:
+        df_matches: pd.DataFrame, data frame with matches between intruders and detections
+        target_metrics: str, what metrics to use to determine the score
+        target_value: float, expected value of the metrics
+        target_value_precision: int, with which precision to calculate the value 
+        min_det_score: float, minimum detection score
+        max_det_score: float, maximum detection score
+    Returns:
+        float, the score to threshold the detections
+    """
+    thresh_score = None 
+    search_score_func = None 
+    if 'far' in target_metrics:
+        search_score_func = partial(compute_false_alarms_per_hour, df_matches)
+    elif 'fppi' in target_metrics:
+        search_score_func = partial(compute_false_positives_per_image, df_matches)
+    elif 'fl_dr' in target_metrics:
+        search_score_func = partial(compute_probability_of_detection_of_planned_intruders, 
+                                    df_matches, max_range=max_range, min_range=0)
+
+    if search_score_func is not None:
+        log.info('%s = %.5f will be used as metrics for score threshold search', target_metrics, 
+                                                                                    target_value)
+        thresh_score = search_score_for_target_func(min_det_score, max_det_score, 
+                                        search_score_func, target_value, target_value_precision)
+    return thresh_score
+
+def get_max_range_based_on_encounters_info(df_encounters):
+    """looks at all the frames with valid encounters and returns their maximum range"""
+    assert is_in_df_columns(df_encounters, ['range_distance_m']), ('range_distance_m not in data frame, '
+                                                                    'cannot calculate maximum range')
+    return get_valid_encounters(df_encounters)['range_distance_m'].max()
+
+def _is_min_score_as_expected(df_matches, expected_min_score):
+    """assert that minimum score in the results is as expected"""
+    assert is_in_df_columns(df_matches, ['s']), ('s not in data frame, cannot check minimum score')
+    min_score_results = df_matches['s'].min()
+    assert min_score_results >= expected_min_score, ('Expected min score = {} is greater than '
+                'minimal score = {} in the results'.format(expected_min_score, min_score_results))
+    return min_score_results
+
+def _summarize_encounter_detection_rate(summary, eval_criteria, num_det_encs, num_total_encs):
+    summary[eval_criteria] = OrderedDict()
+    summary[eval_criteria]['Encounters'] = OrderedDict()
+    for max_range in RANGES_TO_DETECT:
+        summary[eval_criteria]['Encounters'][max_range] = OrderedDict()
+        for num_key, num_value in num_det_encs[max_range].items():
+            if num_total_encs[num_key]:
+                dr_enc = float(num_value / num_total_encs[num_key])
+            else:
+                dr_enc  = 0.0
+            summary[eval_criteria]['Encounters'][max_range][num_key] = {
+                                                        'detected': int(num_value), 
+                                                        'total': int(num_total_encs[num_key]),
+                                                        'dr': dr_enc}
+            log.info('Max. range %d: %s: %d / %d  = %.3f', max_range, num_key, num_value, 
+                                                            num_total_encs[num_key], dr_enc)
+    return summary
+
+####################################################################################################
+# MAIN
+####################################################################################################
+
+def run(flags):
+    log.info('Reading ground truth detection matches from %s', 
+                                    flags.groundtruth_results_matches_filename)
+    df_gt_det_matches = pd.read_csv(flags.groundtruth_results_matches_filename, low_memory=False)
+    
+    min_score_results = _is_min_score_as_expected(df_gt_det_matches, flags.min_det_score)
+    flags.min_det_score = max(min_score_results, flags.min_det_score)
+
+    log.warning('Reading ground truth with encounters from %s', 
+                                    flags.encounters_with_groundtruth_filename)
+    df_encounters = pd.read_csv(flags.encounters_with_groundtruth_filename, low_memory=False)
+    df_encounters = exclude_encounters_from_evaluation(df_encounters, flags.min_enc_range_upper_bound,
+                                                      flags.max_enc_range_lower_bound)
+    max_encounter_range = get_max_range_based_on_encounters_info(df_encounters)
+    log.info('Maximum range of encounter is %.2f', round(max_encounter_range, 2))
+    
+    if flags.target_metrics is not None:    
+        log.info('Determining threshold for detection score')
+        if flags.target_value is None:
+            if flags.target_metrics != 'far':
+                raise ValueError('Please provide target value for {}'.format(flags.target_metrics))
+            target_value = TARGET_FAR_PER_HOUR
+        else:
+            target_value = flags.target_value 
+        log.info('Will use {} target value for {} calculation'.format(target_value, flags.target_metrics))
+        thresh_score = get_working_point_based_on_metrics(df_gt_det_matches, flags.target_metrics, 
+            target_value, flags.target_value_precision, flags.min_det_score, flags.max_det_score, 
+                                                                            max_encounter_range)
+    else:
+        log.info('The provided minimum detection score %.5f will be used', flags.detection_score_threshold)           
+        thresh_score = max(flags.min_det_score, flags.detection_score_threshold)
+        
+    log.info('Frame level metrics calculation for score threshold = {}'.format(thresh_score))
+    df_no_dupl_objs = df_gt_det_matches.drop_duplicates(['img_name', 'id'])
+    num_planned = len(df_no_dupl_objs.query(PLANNED_INTRUDERS))
+    num_non_planned = len(df_no_dupl_objs.query(NON_PLANNED_INTRUDERS))
+    num_non_planned_aircraft = len(df_no_dupl_objs.query(NON_PLANNED_AIRCRAFT))
+    far = compute_false_alarms_per_hour(df_gt_det_matches, min_score=thresh_score) 
+    fppi = compute_false_positives_per_image(df_gt_det_matches, min_score=thresh_score)
+    fl_dr_range, num_det_range, num_tot_range = compute_probability_of_detection_of_planned_intruders(
+                df_gt_det_matches, min_score=thresh_score, max_range=max_encounter_range, min_range=0)
+    fl_dr_above_area, num_det_above_area, num_tot_above_area = compute_probability_of_detection_small_objects(
+                        df_gt_det_matches, min_area=flags.min_object_area, min_score=thresh_score, 
+                        obj_query= GT_ABOVE_AREA_QUERY + ' and ' + NOT_BIRD_QUERY)
+    fl_dr_below_area, num_det_below_area, num_tot_below_area = compute_probability_of_detection_small_objects(
+                        df_gt_det_matches, min_area=flags.min_object_area, min_score=thresh_score,
+                        obj_query= GT_BELOW_AREA_QUERY + ' and ' + NOT_BIRD_QUERY)
+
+    df_val_encounters = get_valid_encounters(df_encounters)
+    df_val_encounters_with_fl_dr  = compute_moving_frame_level_detection_rate_per_encounter(df_gt_det_matches, 
+        df_val_encounters, thresh_score, flags.fl_dr_temporal_win, use_track_fl_dr=flags.use_track_fl_dr)
+    df_final_results = compute_encounter_detections(df_val_encounters_with_fl_dr, 
+                    flags.min_fl_dr, flags.fl_dr_temporal_win, use_track_fl_dr=False)
+    if flags.use_track_fl_dr:
+        df_final_results_track_fl_dr = compute_encounter_detections(df_val_encounters_with_fl_dr, 
+                    flags.min_fl_dr, flags.fl_dr_temporal_win, use_track_fl_dr=True)
+
+    # saving intermidiate results
+    log.info('Saving results')
+    if flags.output_dir_path is None:
+        output_dir = os.path.dirname(flags.groundtruth_results_matches_filename)
+    else:
+        output_dir = flags.output_dir_path
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+   
+    working_point_far_str = str(round(far, DEFAULT_METRICS_VALUE_PRECISION_)).replace('.', '_')
+    if flags.save_intermediate_results:
+        log.info('Saving intermediate results')
+        results_filename = os.path.join(output_dir, 
+            flags.results_name_prefix + '_moving_{}_fl_dr_far_{}.csv'.format(
+                                                flags.fl_dr_temporal_win, working_point_far_str))
+        log.info('Data frame with moving frame level detection rate is saved to %s', results_filename)
+        df_val_encounters_with_fl_dr.to_csv(results_filename)
+
+    results_filename = os.path.join(output_dir, 
+        flags.results_name_prefix + '_moving_{}_fl_dr_{}_encounter_detections_far_{}'.format(
+            flags.fl_dr_temporal_win, str(flags.min_fl_dr).replace('.','p'), working_point_far_str))
+    df_final_results.to_csv(results_filename + '.csv')
+    df_final_results_track_fl_dr.to_csv(results_filename + '_tracking.csv')
+    log.info('Data frame with information on encounter detection is saved to %s.csv and %s_tracking.csv',
+                                                                results_filename, results_filename)
+    df_final_results.to_json(results_filename + '.json', orient='records', lines=True, indent=4)
+    df_final_results_track_fl_dr.to_csv(results_filename + '_tracking.csv')
+
+    log.info('Data frame with information on encounter detection is saved to %s.json', results_filename)
+    
+    # calculation of metrics
+    log.info('Calculating final summary')
+    num_total_encounters = {}
+    for query_key, query_value in HORIZON_QUERIES.items():
+        num_total_encounters[query_key] = len(df_final_results.query(query_value))
+    num_det_encounters = {}
+    if flags.use_track_fl_dr:
+        num_det_encounters_tracking = {}
+    for det_range in RANGES_TO_DETECT: 
+        num_det_encounters[det_range] = {}
+        num_det_encounters_tracking[det_range] = {}
+        for query_key, query_value in HORIZON_QUERIES.items():
+            num_det_encounters[det_range][query_key] = df_final_results.query(query_value)[
+                                                        'detected_before_{}'.format(det_range)].sum()
+            if flags.use_track_fl_dr:
+                num_det_encounters_tracking[det_range][query_key] = df_final_results_track_fl_dr.query(
+                                            query_value)['detected_before_{}'.format(det_range)].sum()
+
+    summary = {} 
+    log.info('Summary')
+    summary['gt_encounters'] = flags.encounters_with_groundtruth_filename
+    summary['gt_det_matches'] = flags.groundtruth_results_matches_filename
+    summary['target_metrics'] = flags.target_metrics
+    summary['target_value'] = flags.target_value
+    summary['min_det_score'] = float(thresh_score)
+    log.info('The minimum detection score is %.3f', thresh_score)
+    summary['fppi'] = float(fppi)
+    log.info('FPPI: %.5f', fppi)
+    summary['far'] = float(far)
+    log.info('HFAR: %.5f', far)
+    summary['num_planned_intruders'] = int(num_planned)
+    summary['num_non_planned_intruders'] = int(num_non_planned)
+    summary['num_non_planned_aircraft'] = int(num_non_planned_aircraft)
+    log.info('Planned Aircraft: %d', num_planned)
+    log.info('Non-Planned Airborne: %d', num_non_planned)
+    log.info('Non-Planned Aircraft: %d', num_non_planned_aircraft)
+    tot_aircraft = num_non_planned_aircraft + num_planned
+    log.info('All Aircraft: %d', tot_aircraft)
+    summary['max_range'] = float(max_encounter_range)
+    summary['tot_aircraft_in_range'] = int(num_tot_range)
+    summary['det_aircraft_in_range'] = int(num_det_range)
+    summary['fl_dr_in_range'] = float(fl_dr_range)
+    log.info('AFDR, aircraft with range <= %.2f: %.5f = %d / %d', 
+                                     max_encounter_range,fl_dr_range, num_det_range, num_tot_range)
+    tot_aircraft_included_in_fl_dr_area = num_tot_above_area + num_tot_below_area
+    assert tot_aircraft == tot_aircraft_included_in_fl_dr_area, (
+     'Expected number of aircraft is {}, but got {} '.format(tot_aircraft, 
+                                                            tot_aircraft_included_in_fl_dr_area))
+    summary['thresh_area'] = float(flags.min_object_area)
+    summary['tot_aircraft_above_area'] = int(num_det_above_area)
+    summary['det_aircraft_above_area'] = int(num_tot_above_area)
+    summary['fl_dr_above_area'] = float(fl_dr_above_area)
+    log.info('AFDR, aircraft with area > %d: %.5f = %d / %d', 
+                    flags.min_object_area, fl_dr_above_area, num_det_above_area, num_tot_above_area)
+    summary['tot_aircraft_below_area'] = int(num_det_below_area)
+    summary['det_aircraft_below_area'] = int(num_tot_below_area)
+    summary['fl_dr_below_area'] = float(fl_dr_below_area)
+    log.info('AFDR, aircraft with area <= %d: %.5f = %d / %d', 
+                    flags.min_object_area, fl_dr_below_area, num_det_below_area, num_tot_below_area)
+    log.info('Detected Encounters based on Detections: ')
+    summary = _summarize_encounter_detection_rate(summary, 'Detection', num_det_encounters, 
+                                                                        num_total_encounters)
+    if flags.use_track_fl_dr:
+        log.info('Detected Encounters based on Tracking: ')
+        summary = _summarize_encounter_detection_rate(summary, 'Tracking', num_det_encounters_tracking, 
+                                                                            num_total_encounters)
+    summary_json = os.path.join(output_dir, 'summary_far_{}_min_intruder_fl_dr_{}_in_win_{}.json'.format(
+            working_point_far_str, str(flags.min_fl_dr).replace('.','p'), flags.fl_dr_temporal_win))
+    log.info('Saving summary to %s', summary_json)
+    with open(summary_json, 'w') as fj:
+        json.dump(summary, fj, indent=4)
+
+    return far, summary_json
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Calculates airborne metrics given encounters')
+    add_flags(parser)
+    parser.add_argument('--log-level', default=logging.getLevelName(logging.INFO), 
+                                                    help='Logging verbosity level')
+    args = parser.parse_args()
+    setup_logging(args.log_level)    
+    check_flags(args)
+    run(args)
+
diff --git a/core/metrics/airborne_metrics/calculate_encounters.py b/core/metrics/airborne_metrics/calculate_encounters.py
new file mode 100644
index 0000000000000000000000000000000000000000..d57cedc1cd7bdba235c514f246b82574cfbeb020
--- /dev/null
+++ b/core/metrics/airborne_metrics/calculate_encounters.py
@@ -0,0 +1,323 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+""" Encounter calculation script
+INPUT: ground truth json or csv (preferred) file. 
+IMPLEMENTED ALGORITHM:
+The algorithm for calculating the encounters based on data frame representation: 
+1. Sort all the frames by 'flight_id', 'id', 'time', 'frame'
+2. Find difference between consecutive frames
+3. Find change in flights (if the data frame contains more than one flight)
+4. Label all frames with encounter id, switch encounter id if:
+    4.1 Frame difference is above maximum gap premitted
+    OR
+    4.2 Flight changed (not 4.1 might cover this as well, but we would like to avoid edge cases, in which
+    by chance one flight ends with frame = n adn the next flight starts frame = n+1)
+5. Find length of encounters and filter those that are below minimum valid length, those are the valid encounters
+Note: in this approach the encounter implementation we keep things by allowing gaps at the beginning of encounter.
+We allow only small gaps of maximum 3 consecutive frames, so that should not be an issue.
+
+OUTPUT: 
+Data frame (saved as .csv) with original ground truth information and added encounter information. 
+Data frame (saved as .csv) with only VALID encounters' information (Valid encounters have pre-defined length)
+JSON file with only valid encounters' information  - records of encounters. 
+
+Encounter information includes:
+encounter ID, flight ID, list of image names that correspond to this encounter,
+minimum (first) frame, maximum (last) frame,  count of valid frames, length of the encounter with gaps
+"""
+
+import argparse
+from collections import OrderedDict
+from functools import partial
+import json
+import logging
+import numpy as np
+import os
+import pandas as pd
+
+from airborne_metrics.pandas_utils import is_in_df_columns, get_deeplearning_groundtruth_as_data_frame 
+from airborne_metrics.script_utils import setup_logging, assert_file_format
+#############################################
+# Defaults
+###############################################
+DEFAULT_MAX_GAP_ = 3
+DEFAULT_MIN_LENGTH_ = 30
+DEFAULT_MAX_RANGE_M = 700 # encounters will only contain intruders with maximal range of 700 m.
+DEFAULT_MIN_DETECTION_RANGE_M = 300 
+ENC_END_RANGE_SCALE = 1.1
+
+RANGE_INFO_FUNCS_ = ['min', 'max', 'median', 'mean'] # these functions will be used to describe 
+                                                      # encounters based on the intruder's range
+ENC_RANGE_DESCRIPTORS = ['{}_enc_range'.format(func) for func in RANGE_INFO_FUNCS_]
+ENC_FRAME_DESCRIPTORS = ['flight_id', 'framemin', 'framemax', 'framecount', 'enc_len_with_gaps']
+ENC_OTHER_DESCRIPTORS = ['is_above_horizon']    
+ENC_DESCRIPTORS = ENC_RANGE_DESCRIPTORS + ENC_FRAME_DESCRIPTORS + ENC_OTHER_DESCRIPTORS     
+ENC_RANGE_INFO_REDUCER = 'first'
+ENC_FRAME_INFO_REDUCER = 'first'
+ENC_OTHER_INFO_REDUCER = np.mean                       
+##############################################
+# Script related code
+##############################################
+log = logging.getLogger(__name__)
+
+def add_flags(parser):
+    """Utility function adding command line arguments to the parser"""
+    parser.add_argument('--deeplearning-groundtruth', '-g', required=True, 
+                        help='Path to the ground truth file '
+                        '(consider providing the groundtruth in .csv format)')
+    parser.add_argument('--output-dir-path', '-o', required=True,
+                        help='Desired path for the output data frame with encounters')
+    parser.add_argument('--max-range', '-r', type=int, default=DEFAULT_MAX_RANGE_M, 
+                        help='Maximum range of an intruder in valid encounter')
+    parser.add_argument('--min-valid-encounter-length', type=int, default=DEFAULT_MIN_LENGTH_,
+                        help='Minimum number of frames with valid ground truth in valid encounter '
+                        '(default: 30)')
+    parser.add_argument('--max-gap-allowed', type=int, default=DEFAULT_MAX_GAP_,
+                        help='Maximum size of a gap in frames with valid ground truth allowed '
+                        'in a valid encounter (default: 3)')
+    parser.add_argument('--min-enc-range-upper-bound', type=float,  
+                        default=ENC_END_RANGE_SCALE * DEFAULT_MIN_DETECTION_RANGE_M,
+                        help='The minimum range of the encounter should be not less than this value')
+  
+def check_flags(flags):
+    """Utility function to check the input"""
+    log.info('Asserting %s format', flags.deeplearning_groundtruth)
+    assert_file_format(flags.deeplearning_groundtruth)
+    assert not os.path.isfile(flags.output_dir_path), ('Directory name is expected as output path, '
+                                                      'received {}'.format(flags.output_dir_path))
+##################################################
+# Encounter finder + information calculation code 
+##################################################
+def augment_with_frame_difference(df):
+    """add a column to the data frame df with frame differences
+    this function assumes that df is sorted with respect to frames"""
+    assert is_in_df_columns(df, ['frame']), (
+                'frame column is missing, cannot augment with frame difference')
+    frame_diff = df['frame'].diff().fillna(0)
+    df = df.assign(frame_diff = frame_diff) 
+    return df 
+
+def augment_with_flight_change(df):
+    """add a column to the data frame df with 1 one flight changes and 0 otherwise
+    this function assumes that df is sorted with respect to flights""" 
+    assert is_in_df_columns(df, ['flight_id']), (
+                'flight_id column is missing, cannot augment with flight change')
+    # enumerate different flights
+    df = df.assign(flights_code = pd.Categorical(df['flight_id']).codes)
+    # calculate differences between subsequent 
+    # for the first flight we will get NaN, which is set to 0 with fillna 
+    flight_diff = df['flights_code'].diff().fillna(0) 
+    df = df.assign(flight_changed = flight_diff)
+    return df
+
+def augment_with_encounter_switch(max_gap):
+    def augment_with_encounter_switch_with_max_gap(df):
+        """add a column to the data frame df with 1 when encounter index 
+        should be switched and 0 otherwise
+        """
+        assert is_in_df_columns(df, ['frame_diff', 'flight_changed']), (
+          'frame_diff and  flight_changed columns are missing, cannot augment with encounter switch')
+        switch_encounter_index = [(frame_diff < 0) or (frame_diff > max_gap) or (flight_changed == 1) 
+                      for frame_diff, flight_changed in zip(df['frame_diff'], df['flight_changed'])]
+        df = df.assign(switch_enc_index = switch_encounter_index) 
+        return df
+    return augment_with_encounter_switch_with_max_gap
+
+def augment_encounters_with_frame_info(df_encounters):
+    """add 4 columns to a dataframe of encounters with: 1)the first frame, 2) the last frame,
+    3) the count of frames with valid ground truth, 4) the total length/ duration of encounters with 
+    gaps 
+    """
+    assert is_in_df_columns(df_encounters, ['encounter_id', 'frame']), ('encounter_id and frame '
+                  'columns are missing, cannot augment with encounter minimum and maximum frame')
+    # Next we group all the rows that correspond to the same encounter and calculate the minimum 
+    # and maximum frames
+    # those are the first and last frames of aec encounter
+    # we also calculate the length in frames - only valid frames and all frames
+    df_agg_by_enc_len = df_encounters[['encounter_id', 'frame']].groupby('encounter_id').agg(
+                                                  {'frame': ['min', 'max' ,'count']}).reset_index(0)
+    # the data frame will have hirerchical headers, so we concatenate for convenience. 
+    df_agg_by_enc_len.columns = list(map(''.join, df_agg_by_enc_len.columns.values))
+    # we also calculate the actual length of the encounter with gaps (that is including frames with 
+    # missing ground truth)
+    df_agg_by_enc_len = df_agg_by_enc_len.assign(enc_len_with_gaps = 
+                                  df_agg_by_enc_len['framemax'] - df_agg_by_enc_len['framemin'] + 1)
+    df_encounters = df_encounters.merge(df_agg_by_enc_len, on=['encounter_id'], how='left')
+    return df_encounters
+
+def augment_encounters_with_range_info(df_encounters):
+    """add aggregated range per encounter, where aggregation is done
+    based on the provided aggregation_func_name"""
+    assert is_in_df_columns(df_encounters, ['encounter_id', 'range_distance_m']), ('encounter_id '
+        'and frame columns are missing, cannot augment with encounter minimum and maximum frame')
+    df_agg_by_enc_range = df_encounters[['encounter_id','range_distance_m']].groupby(
+                'encounter_id').agg({'range_distance_m': RANGE_INFO_FUNCS_}).reset_index(0)
+    # arrange proper name for the column with aggregated range
+    df_agg_by_enc_range.columns = list(map(''.join, df_agg_by_enc_range.columns.values))
+    for func in RANGE_INFO_FUNCS_:
+      df_agg_by_enc_range = df_agg_by_enc_range.rename(columns={
+          'range_distance_m{}'.format(func): '{}_enc_range'.format(func)})
+    df_encounters = df_encounters.merge(df_agg_by_enc_range, on=['encounter_id'], how='outer')
+    return df_encounters
+
+def get_valid_encounters_info(df_encounters):
+    """This function returns information for valid encounters including:
+    df_encounter_info: DataFrame, encounter information with respect to descriptors
+    df_encounter_images: DataFrame, all the images that below to specific encounter
+    df_encounter_stats: DataFrame, statistics of the encounter descriptors 
+    """
+    assert is_in_df_columns(df_encounters, ['is_valid_encounter']), (
+        'is_valid_encounter column is missing, cannot provide valid encounter information')
+    # filter only valid encounters 
+    df_valid_encounters = df_encounters.query('is_valid_encounter == True')
+    # group by encounter id and calculate for each encounter its decriptors.
+    agg_funcs = OrderedDict() 
+    for frame_col in ENC_FRAME_DESCRIPTORS:
+      agg_funcs.update({frame_col: ENC_FRAME_INFO_REDUCER})
+    for frame_col in ENC_OTHER_DESCRIPTORS:
+      agg_funcs.update({frame_col: ENC_OTHER_INFO_REDUCER})
+    for range_col in ENC_RANGE_DESCRIPTORS:
+      agg_funcs.update({range_col: ENC_RANGE_INFO_REDUCER})
+    df_encounter_info = df_valid_encounters.groupby(['encounter_id'])[ENC_DESCRIPTORS].agg(
+                                                                          agg_funcs).reset_index(0)
+    df_encounter_images = df_valid_encounters.groupby(['encounter_id'])[['img_name']].agg(
+                                                                  lambda x: list(x)).reset_index(0)
+    df_encounter_stats = df_encounter_info[ENC_FRAME_DESCRIPTORS + ENC_RANGE_DESCRIPTORS].describe()
+    return df_encounter_info, df_encounter_images, df_encounter_stats
+
+def augment_with_encounters(df_planned_intruders, min_valid_encounter_length, 
+                            max_gap_allowed, encounters_augmentations):
+    """add encounters' information to a data frame df_intruders 
+    Note: assumption is that df_planned_intruders contains only frames with planned intruders in the 
+    relevant range
+    """    
+    # preprocessing to figure out when encounters happen 
+    df_intruders_sorted = df_planned_intruders.sort_values(by=
+                                                  ['flight_id', 'id','time', 'frame'])
+    pre_processing_augmentations=[augment_with_frame_difference, augment_with_flight_change, 
+                                  augment_with_encounter_switch(max_gap_allowed)]
+    for augmentation in pre_processing_augmentations:
+        df_intruders_sorted = augmentation(df_intruders_sorted)
+    
+    # rolling over each row in the data frame and enumerating encounters
+    def enumerate_encounters_starting_from(ind_to_assign):
+        index_to_assign = ind_to_assign
+        def label_encounter(x):    
+            nonlocal index_to_assign
+            index_to_assign += (1 if x.values[0] else 0)
+            return index_to_assign
+        return label_encounter
+
+    encounter_ids = df_intruders_sorted['switch_enc_index'].rolling(window=1).apply(
+                                                            enumerate_encounters_starting_from(0))
+    df_intruders_with_encounters = df_intruders_sorted.assign(encounter_id = encounter_ids)
+    
+    # adding additional information to encounters like length of the encounters, first and last frame etc.
+    for augmentation in encounters_augmentations:
+        df_intruders_with_encounters = augmentation(df_intruders_with_encounters)
+
+    # valid encounter are encounters with number of frames with gt grater than minimum pre-defined length
+    df_intruders_with_encounters = df_intruders_with_encounters.assign(
+      is_valid_encounter = df_intruders_with_encounters['framecount'] >= min_valid_encounter_length)
+    
+    # final sorting
+    df_intruders_with_encounters = df_intruders_with_encounters.sort_values(
+                                by=['flight_id', 'encounter_id','time', 'frame']).reset_index(0)
+    
+    return df_intruders_with_encounters
+
+def exclude_encounters_from_evaluation(df_encounters, min_enc_range_upper_bound, 
+                                                      max_enc_range_lower_bound = None):
+    """This function excludes encounters from evaluation based on provided range for detection. 
+    Those encounters do not comply with the conditions below are set as invalid
+    The encounter is considered invalid for evaluation given specific range_for_detection if :
+    1) Encounter does not get close enough to the camera: 
+    min_enc_range > min_enc_range_upper_bound 
+    2) Encounter does not start at range that allows detection. The assumption here is that 
+    an airborne alert requires a detection within a temporal segment of 3 secs 
+    and given intruders with velocity = 60 m/s that will require the intruder to appear at least 180m 
+    before the range at which the detection is required.
+    max_enc_range < max_enc_range_lower_bound (by default we do not apply this condition)
+    """ 
+    assert is_in_df_columns(df_encounters, ['min_enc_range', 'max_enc_range']), ('min_enc_range or '
+        'max_enc_range columns are missing, cannot augment with encounter minimum and maximum frame')
+    
+    min_range_too_far_away = df_encounters['min_enc_range'] > min_enc_range_upper_bound
+    df_encounters.loc[min_range_too_far_away, 'is_valid_encounter'] = False
+    
+    if max_enc_range_lower_bound is not None:
+      max_range_too_close= df_encounters['max_enc_range'] < max_enc_range_lower_bound
+      df_encounters.loc[max_range_too_close, 'is_valid_encounter'] = False
+    return df_encounters
+
+####################################################################################################
+# MAIN
+####################################################################################################
+def run(flags):    
+    if not os.path.isdir(flags.output_dir_path):
+      os.makedirs(flags.output_dir_path)
+    # read the ground truth                         
+    df_gt = get_deeplearning_groundtruth_as_data_frame(flags.deeplearning_groundtruth)
+    flight_id = os.getenv('FLIGHT_ID')
+    if flight_id is not None:
+      df_gt = df_gt.query('flight_id == "{}"'.format(flight_id))
+      groundtruth_csv = os.path.join(flags.output_dir_path, 'groundtruth.csv')
+      log.info('Save groundtruth in .csv format to %s', groundtruth_csv) 
+      df_gt.to_csv(groundtruth_csv)
+    elif (flags.deeplearning_groundtruth.endswith('.json') 
+        or flags.deeplearning_groundtruth.endswith('.json.gz')):
+      log.info('Saving groundtruth in .csv format, please use .csv in the future') 
+      df_gt.to_csv(flags.deeplearning_groundtruth.replace('.json', '.csv').replace('.gz', ''))
+
+    # filter to get only ground truth with intruders in the specific range
+    log.info('Filtering ground truth to get intruders in the specified range <= %2.fm.', 
+                                                                                    flags.max_range)
+    df_gt_in_range = df_gt.query('range_distance_m <= {}'.format(flags.max_range))
+
+    # add encounters to the ground truth
+    log.info('Finding encounters and adding their information to the ground truth')
+    encounters_augmentations = [augment_encounters_with_frame_info, 
+                                augment_encounters_with_range_info]    
+
+    df_gt_with_encounters_in_range = augment_with_encounters(df_gt_in_range, 
+                                        min_valid_encounter_length=flags.min_valid_encounter_length,
+                                        max_gap_allowed=flags.max_gap_allowed,
+                                        encounters_augmentations=encounters_augmentations)
+    
+    # save provided ground truth with the added encounters as data frame in .csv format
+    groundtruth_encounters_df_filename = (os.path.join(flags.output_dir_path, 
+          'groundtruth_with_encounters_maxRange{}_maxGap{}_minEncLen{}.csv'.format(flags.max_range,
+                                          flags.max_gap_allowed, flags.min_valid_encounter_length)))
+    log.info('Saving ground truth + encounters dataframe to %s', groundtruth_encounters_df_filename)
+    df_gt_with_encounters_in_range.to_csv(groundtruth_encounters_df_filename) 
+
+    df_gt_with_encounters_in_range = exclude_encounters_from_evaluation(df_gt_with_encounters_in_range, 
+                                                                    flags.min_enc_range_upper_bound)
+    # save encounters' information as data frame in .csv format
+    df_encouters_info, df_encouters_images, df_encounter_stats = get_valid_encounters_info(
+                                                                      df_gt_with_encounters_in_range)
+    encounters_info_filename = os.path.join(flags.output_dir_path, 
+                          'valid_encounters_maxRange{}_maxGap{}_minEncLen{}'.format(flags.max_range,
+                                                                    flags.max_gap_allowed,
+                                                                    flags.min_valid_encounter_length))
+    log.info('Saving only valid encounters info dataframe to %s', encounters_info_filename + '.csv')
+    df_encouters_info.to_csv(encounters_info_filename + '.csv') 
+    df_encounter_stats.to_csv(encounters_info_filename + '_stats.csv')
+    # save encounters' information in .json format
+    log.info('Saving only valid encounters info in json format to %s', 
+                                                                  encounters_info_filename + '.json')
+    df_encouters_info  = df_encouters_info.merge(df_encouters_images, on='encounter_id', how='left')
+    encounters_info_json = df_encouters_info.to_json(encounters_info_filename + '.json', 
+                                                    orient='records', lines=True, indent=4)
+    return groundtruth_encounters_df_filename
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Calculates encounters from the groundtruth')
+    add_flags(parser)
+    parser.add_argument('--log-level', 
+                        default=logging.getLevelName(logging.INFO), help='Logging verbosity level')
+
+    args = parser.parse_args()
+
+    setup_logging(args.log_level)
+    check_flags(args)
+    run(args)
diff --git a/core/metrics/airborne_metrics/match_groundtruth_results.py b/core/metrics/airborne_metrics/match_groundtruth_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eeae72d1c619e4d66598dd7ba9654df9e332790
--- /dev/null
+++ b/core/metrics/airborne_metrics/match_groundtruth_results.py
@@ -0,0 +1,578 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+""" This module performs matching between detection results and ground truth
+using pandas API outer merge between data frame of ground truth and detection 
+results.
+All the ground truth with the same image and object names are merged to all the 
+detection results with the same image and object names, creating all the possible 
+combinations, based on extended IoU, which  comes to alleviate the extra sensitivity of small objects 
+to IOU, by extending all the small objects and evaluated detection results to 
+have a minimum area specified in pixels (while maintaining the original aspect ratio)
+For comparison, original iou is calculated and saved as well
+
+INPUT:
+ground truth
+results 
+NOTE: the matching will not be performed if there are images in results that
+do not appear in the ground truth
+
+OUTPUT:
+Data frame - outer join of groundtruth and detection results with 
+match/no match between those based 
+on the chosen matching function and thresholds  
+"""
+import argparse
+from functools import partial
+import json
+import logging
+import numpy as np
+import os
+import pandas as pd
+
+from airborne_metrics.pandas_utils import is_in_df_columns, get_deeplearning_groundtruth_as_data_frame, get_results_as_data_frame
+from airborne_metrics.script_utils import setup_logging, assert_file_format
+
+#############################################
+# Defaults
+##############################################
+TWO_LINES_TOL_ = 10 # pixels
+IS_MATCH_MAX_DISTANCE_ = 10
+LARGE_DIST_ = np.iinfo(np.int32(10)).max
+EPSILON_ = 1e-6
+MIN_OBJECT_AREA_ = 100 # pixels
+DEFAULT_IOU_IS_MATCH_ = 0.2
+DEFAULT_IOU_IS_NO_MATCH_ = 0.02
+MAX_FRAMES_PER_FLIGHT = 1200
+MIN_TRACK_LEN_ = 0
+
+# Columns 
+DETECTION_BBOX_COLS_ = ['det_right', 'det_left', 'det_bottom', 'det_top']  
+GROUNDTRUTH_BBOX_COLS_ = ['gt_right', 'gt_left', 'gt_bottom', 'gt_top'] 
+RESULTS_NAME_PREFIX_ = 'gt_det'
+##############################################
+# Script related code
+##############################################
+log = logging.getLogger(__name__)
+
+def add_flags(parser):
+    """Utility function adding command line arguments to the parser"""
+    # input files 
+    parser.add_argument('--deeplearning-groundtruth', '-g', required=True, 
+                        help='Path to the ground truth .json or .csv file' 
+                        ' (consider providing the groundtruth in .csv format)')
+    parser.add_argument('--airborne-classifier-results', '-r', required=True, 
+                        help='Path to the detection results .json or .csv file')
+    # output  
+    parser.add_argument('--output-dir-path', '-o', 
+                        help='Desired folder to save the output data frame with '
+                        'match/no match between groundtruth and detections')
+    parser.add_argument('--results-name-prefix', type=str, default=RESULTS_NAME_PREFIX_,
+                        help='Prefix for results filename')
+    # matching algorithm and its parameters
+    parser.add_argument('--extend-small-detections', '-e', action='store_true', 
+                        help='Specify if the airborne classifier detection results '
+                        'should be extended to minimum area')
+    parser.add_argument('--minimum-object-area', '-a', default=MIN_OBJECT_AREA_, type=int,
+                        help='Minimum object area, specify if need to extend'
+                        'ground truth and detections to have this minimum area')
+    parser.add_argument('--is-match-threshold', '-p', type=float, default=DEFAULT_IOU_IS_MATCH_,
+                        help='Threshold for ground truth and detection '
+                        'to be considered a "match"')
+    parser.add_argument('--is-no-match-threshold', '-n', type=float, default=DEFAULT_IOU_IS_NO_MATCH_,
+                        help='Threshold for ground truth and detection '
+                        'to be considered a "no match"')
+    # detection filtering
+    parser.add_argument('--detection-score-threshold', '-t', type=float, default=0.0,
+                        help='Threshold for filtering detections before matching')
+    parser.add_argument('--min-track-len', type=int, default=MIN_TRACK_LEN_,
+                         help='Minimum length of track to include in results')
+
+def _assert_non_negative_threshold(threshold):
+    """assertion helper"""
+    assert threshold >= 0.0, 'Threshold for matching algorithm, is expected to be non-negative'
+
+def check_flags(flags):
+    """Utility function to check the input"""
+    assert_file_format(flags.deeplearning_groundtruth)
+    assert_file_format(flags.airborne_classifier_results)
+    _assert_non_negative_threshold(flags.is_match_threshold)
+    if flags.is_no_match_threshold is not None: 
+        _assert_non_negative_threshold(flags.is_no_match_threshold)
+    assert flags.output_dir_path is None or not os.path.isfile(flags.output_dir_path),(
+                                                        'Directory name is expected as output path')
+
+##################################################
+# Groundtruth - detections matching code
+##################################################
+def _limit_bbox_to_image_size(df, prefix):
+    # """helper function to bring bounding box values within image size limits)"""
+    df['{}_top'.format(prefix)].clip(0, df['size_height'] - 1, inplace=True)
+    df['{}_bottom'.format(prefix)].clip(0, df['size_height'] - 1, inplace=True)
+    df['{}_left'.format(prefix)].clip(0, df['size_width'] - 1, inplace=True)
+    df['{}_right'.format(prefix)].clip(0, df['size_width'] - 1, inplace=True)
+    return df
+
+def augment_with_detection_top_bottom_left_right(df):
+    """ Add 4 columns to the data frame that correspond to 
+    top (y1), bottom(y2), left(x1), right(x2) of the detection 
+    bounding box
+    """
+    required_columns = ['x', 'y', 'h', 'w']
+    required_columns_str = ', '.join(required_columns) 
+    assert is_in_df_columns(df, required_columns), (
+        'One or more out of {} columns is not found, '
+        'cannot perform augmentation with bounding box'.format(required_columns_str))
+    half_height = df['h'] / 2
+    half_width = df['w'] / 2
+    df = df.assign(det_top = df['y'] - half_height)
+    df = df.assign(det_bottom = df['y'] + half_height)
+    df = df.assign(det_left = df['x'] - half_width)
+    df = df.assign(det_right = df['x'] + half_width)
+    return df
+
+def _calc_bbox_area(df, prefix):
+    return ((df['{}_right'.format(prefix)] - df['{}_left'.format(prefix)]) 
+           * (df['{}_bottom'.format(prefix)] - df['{}_top'.format(prefix)]))
+      
+def augment_with_detection_area(df):
+    """Augments data frame with detection area"""
+    required_columns_str = ', '.join(DETECTION_BBOX_COLS_) 
+    assert is_in_df_columns(df, DETECTION_BBOX_COLS_), (
+        'One or more out of {} columns is not found, '
+                        'cannot calculate area'.format(required_columns_str))
+    df = df.assign(det_area = _calc_bbox_area(df, 'det'))
+    return df
+
+def augment_with_groundtruth_area(df):
+    """Augments data frame with ground truth area"""
+    required_columns_str = ', '.join(GROUNDTRUTH_BBOX_COLS_) 
+    assert is_in_df_columns(df, GROUNDTRUTH_BBOX_COLS_), (
+        'One or more out of {} columns is not found, '
+                        'cannot calculate area'.format(required_columns_str))
+
+    df = df.assign(gt_area = _calc_bbox_area(df, 'gt'))
+    return df
+
+def augment_with_iou(df):
+    """Augments data frame with iou between the detection and groundtruth"""
+    required_columns = GROUNDTRUTH_BBOX_COLS_ + DETECTION_BBOX_COLS_ + ['det_area', 'gt_area'] 
+    required_columns_str = ', '.join(required_columns) 
+    assert is_in_df_columns(df, required_columns), (
+        'One or more out of {} columns is not found'
+                     'cannot perform thresholding'.format(required_columns_str))
+    df['iou'] = 0
+    ix_min = df[['det_left', 'gt_left']].max(axis=1)
+    iy_min = df[['det_top', 'gt_top']].max(axis=1)
+    ix_max = df[['det_right', 'gt_right']].min(axis=1)
+    iy_max = df[['det_bottom', 'gt_bottom']].min(axis=1)
+
+    iw = np.maximum(ix_max - ix_min, 0.)
+    ih = np.maximum(iy_max - iy_min, 0.)
+ 
+    intersections = iw * ih
+    unions = (df['det_area'] + df['gt_area'] - intersections)
+
+    ious = intersections / unions
+    ious[unions < 1e-12] = 0
+    # the iou is set to zero for frame where there is no ground truth (ground truth area is NaN )
+    # and there is a detection (detection are is not NaN)
+    # if there is not detection in a frame the iou will be NaN
+    ious[df['gt_area'].isnull() & df['det_area'].notnull()] = 0
+    df = df.assign(iou = ious)
+    return df
+
+def _augment_with_match_no_match(df, is_match, is_no_match):
+    assert np.all((is_match & is_no_match) == False), (
+        'the same combination of ground truth and detection cannot be both match and no match')
+    df['gt_det_match'] = 0
+    df['gt_det_no_match'] = 0
+    df.loc[is_match, 'gt_det_match'] = 1 
+    df.loc[is_no_match, 'gt_det_no_match'] = 1
+    return df 
+
+def augment_with_iou_match(df, is_match_min_iou, is_no_match_max_iou=None):
+    """Augments the data frame with match/ no match based on the iou"""
+    assert is_in_df_columns(df, ['iou', 'Exists']), (
+        'One or more out of iou, Exists columns is not found, ' 
+                                'cannot perform assignment of match/ no match')
+    if is_no_match_max_iou is None:
+        is_no_match_max_iou = is_match_min_iou
+    log.info('IoU matching: match minimum iou = %.2f, and no match '
+            'maximum iou = %.2f ', is_match_min_iou, is_no_match_max_iou)
+    
+    # the detection and ground truth are matched if the iou is above a certain threshold
+    is_match = df['iou'] >= is_match_min_iou
+    # the detection and ground truth are NOT matched, if there is a detection 
+    # (the condition df['Exists'] != 'left_only' checks that there is a detection in a frame) 
+    # and its IOU to ground truth is below a certain threshold
+    is_no_match = (df['Exists'] != 'left_only') & (df['iou'] < is_no_match_max_iou)
+    df = _augment_with_match_no_match(df, is_match, is_no_match)
+    return df
+   
+def augment_with_detection_id(df_results):
+    """Enumerates the data frame of results with detection id""" 
+    df_results = df_results.assign(detection_id = list(range(len(df_results))))
+    return df_results
+
+def _extend_bounding_boxes(orig_box_width, orig_box_height, min_box_area):
+    """Helper function: extends small bounding boxes to have the specified minimum object area, 
+    while maintaining original aspect ratio.
+    Note this function assumes all the provided boxes have area less than minimun area
+    Formula: 
+    1) new_area = width * height
+    2) aspect_ratio = width / height (also aspect_ratio = orig_width / orig_height)
+    ==> height  = width / aspect_ratio
+    ==> new_area = width * (width / aspect_ratio)
+    ==> sqrt(new_area * aspect_ratio) = width 
+    Params:
+        Original bounding box widths and heights and minimum bounding box area to get after extension
+    Throws:
+        ValueError if any of the provided bounding boxes has greater area than minimum box area
+        or has widths or heights equal to zero
+    Returns: 
+        Extended width sand heights and the corresponding deltas with respect to the original widths
+        and heights
+    """ 
+    if not np.all(orig_box_width * orig_box_height < min_box_area):
+        raise ValueError('This function expects all the original areas to be '
+                                                    'less then the minimum area')
+    if not np.all(orig_box_width > 0):
+        raise ValueError('This function expects non-zero width of bounding boxes')
+    if not np.all(orig_box_height > 0):
+        raise ValueError('This function expects non-zero height of bounding boxes')
+    orig_aspect_ratio = (orig_box_width / orig_box_height).astype('float')
+    extended_width = np.sqrt(min_box_area * orig_aspect_ratio)
+    extended_height = min_box_area / extended_width
+    delta_width = extended_width - orig_box_width
+    delta_height = extended_height - orig_box_height    
+    assert np.all(delta_width >= 0), 'extention should yield bigger or equal width'
+    assert np.all(delta_height >= 0), 'extention should yield bigger or equal height'                                    
+    return delta_width, delta_height, extended_width, extended_height
+
+def _extend_bounding_boxes_to_have_minimum_object_area(df, prefix, 
+                                            bboxs_to_extend, min_object_area):
+    """Helper function: extends specified bounding boxes of a data frame to have
+    minimum object area
+    The specification is done based on provided parameters
+        bboxs_to_extend: indexes in data frame 
+        prefix: 'gt' for ground truth and 'det' for detection
+    """
+    bbox_width = df['{}_right'.format(prefix)] - df['{}_left'.format(prefix)]
+    bbox_height = df['{}_bottom'.format(prefix)] - df['{}_top'.format(prefix)] 
+    orig_width = bbox_width[bboxs_to_extend]
+    orig_height = bbox_height[bboxs_to_extend]
+    delta_width, delta_height, extended_width, extended_height = _extend_bounding_boxes(
+                                        orig_width, orig_height, min_object_area)
+    df.loc[bboxs_to_extend, '{}_left'.format(prefix)] -= delta_width / 2
+    df.loc[bboxs_to_extend, '{}_right'.format(prefix)] = (
+            df.loc[bboxs_to_extend, '{}_left'.format(prefix)] + extended_width)
+    df.loc[bboxs_to_extend, '{}_top'.format(prefix)] -=  delta_height / 2
+    df.loc[bboxs_to_extend, '{}_bottom'.format(prefix)] = (
+            df.loc[bboxs_to_extend, '{}_top'.format(prefix)] + extended_height)
+    df.loc[bboxs_to_extend, '{}_area'.format(prefix)] = extended_width * extended_height
+    return df
+
+def extend_detections_for_orig_ufo_based_on_area(df_results_orig_ufo, minimum_object_area):
+    """This function extends detections of the original ufo algorithm to have 
+    specified minimum area for all detections"""
+    log.info('Extending small detections')
+    is_small_det_area = df_results_orig_ufo['det_area'] < minimum_object_area 
+    if len(df_results_orig_ufo[is_small_det_area]) == 0:
+        log.info('There are no detections with area below %d', minimum_object_area)
+        return df_results_orig_ufo
+    log.info('Extending %d detections', len(df_results_orig_ufo[is_small_det_area]))
+    df_results_orig_ufo =_extend_bounding_boxes_to_have_minimum_object_area(
+            df_results_orig_ufo, 'det', is_small_det_area, minimum_object_area)
+    min_det_area = df_results_orig_ufo[is_small_det_area]['det_area'].min()
+    assert  min_det_area > minimum_object_area - 1, ('Something went wrong, '
+                            'minimum detection area is still less then expected')
+    return df_results_orig_ufo
+
+def extend_bounding_boxes_based_on_gt_area(df_comb, minimum_object_area):
+    """Extends ground truth and result bounding boxes based on the area of the ground truth: 
+    If the area of ground truth bounding box is less than the minimum object area
+    both ground truth and detection bounding boxes are extended to reach minimum object area, 
+    while maintaining original aspect ratio 
+    """
+    log.info('Extending bounding boxes based on ground truth area')
+    required_columns = DETECTION_BBOX_COLS_ + GROUNDTRUTH_BBOX_COLS_ + ['gt_area', 'det_area'] 
+    required_columns_str = ', '.join(required_columns) 
+    assert is_in_df_columns(df_comb, required_columns), (
+        'One or more out of {} columns is not found, '
+        'cannot perform bounding box extension'.format(required_columns_str))
+    is_small_gt_area = (df_comb['gt_area'] > 0) & (df_comb['gt_area'] < minimum_object_area) 
+    if len(df_comb[is_small_gt_area]) == 0:
+        log.info('There are no objects with area below %d', minimum_object_area)
+        return df_comb
+    log.info('Number of objects with ground truth area less than %d is %d', 
+                            minimum_object_area, len(df_comb[is_small_gt_area]))
+    # extending ground truth bounding box for objects with small ground truth area
+    df_comb = _extend_bounding_boxes_to_have_minimum_object_area(df_comb, 'gt', 
+                                        is_small_gt_area, minimum_object_area) 
+    # verify that all the boxes now have area no less than minimum object area
+    assert df_comb['gt_area'].min() >= minimum_object_area - 1, (
+                'Something went wrong, minimum ground truth area is still less '
+                                                'then minimum expected area')
+
+    # extending detection bounding box for objects with small ground truth area 
+    # if the detection area is also small
+    is_small_gt_and_det_area = np.logical_and(is_small_gt_area, 
+                                (df_comb['det_area'] < (minimum_object_area - EPSILON_)))
+    if len(df_comb[is_small_gt_and_det_area]) == 0:
+        log.info('There are no detections with area below %d that are being matched '
+                    'to extended ground truth', minimum_object_area)
+        return df_comb
+
+    log.info('Number of cases with ground truth and detection areas less '
+        'than %d is %d', minimum_object_area, len(df_comb[is_small_gt_and_det_area]))
+
+    df_comb = _extend_bounding_boxes_to_have_minimum_object_area(df_comb, 'det', 
+                                is_small_gt_and_det_area, minimum_object_area) 
+    # verify that all the detection boxes that are compared to extended ground 
+    # truth boxes now have area no less than minimum object area
+    assert df_comb[is_small_gt_area]['det_area'].min() > minimum_object_area - 1, (
+        'Something went wrong, minimum detection area is still less then expected')
+    return df_comb
+
+def preprocess_results(df_res, minimum_object_area=None):
+    """Add to the result a bounding box in top, left, bottom, right format and area
+    """
+    df_res = augment_with_detection_top_bottom_left_right(df_res)
+    df_res = augment_with_detection_area(df_res)
+    # extension of bounding boxes if necessary (typically done for original UFO only)
+    return df_res
+
+def _remove_invalid_groundtruth(df_gt):
+    non_valid = df_gt['gt_area'] == 0
+    cols = GROUNDTRUTH_BBOX_COLS_ 
+    for col in cols + ['gt_area']:
+        df_gt.loc[non_valid, col] = np.nan 
+    return df_gt
+    
+def preprocess_groundtruth(df_gt):
+    """Adds an area to ground truth bounding boxes
+    """
+    df_gt = augment_with_groundtruth_area(df_gt)
+    df_gt = _remove_invalid_groundtruth(df_gt)
+    return df_gt
+
+def threshold_results_based_on_score(df_results, score_thresh):
+    """Thresholds df_results based on the score"""
+    assert is_in_df_columns(df_results, ['s']), (
+        's (score) column is not found - cannot perform thresholding')
+    df_results = df_results.query('s >= {}'.format(score_thresh))
+    return df_results
+
+def threshold_results_based_on_track_id_len(df_results, min_track_len):
+    """helper to filter data frame of matches based on track id length so far"""
+    min_track_len = min_track_len if min_track_len is not None else 0  
+    if min_track_len < 0:
+        raise ValueError('min_track_len should be positive or zero or None')
+    elif min_track_len > 0:
+        log.info('Filtering length of tracks so far = %.3f', min_track_len) 
+        assert is_in_df_columns(df_results, ['track_id_len_so_far']), (
+            'track_id_len_so_far column is not found, cannot filter')
+        df_results = df_results.query('track_id_len_so_far == track_id_len_so_far and track_id_len_so_far >= {}'.format(min_track_len)) 
+        return df_results
+    return df_results
+
+####################################################################################################
+# MAIN
+####################################################################################################
+def get_matching_params(flags):
+    is_match_thresh = flags.is_match_threshold
+    is_no_match_thresh = flags.is_no_match_threshold
+    if is_no_match_thresh is None:
+        return is_match_thresh, is_match_thresh
+    if is_match_thresh < is_no_match_thresh:
+        raise ValueError('iou threshold for groundtruth and detection to be '
+                            'declared as "no match" cannot be more than '
+                            'iou threshold for a match')
+    if is_no_match_thresh <= 0:
+        raise ValueError('iou threshold for groundtruth and detection to be '
+                            'declared as "no match" must be strictly positive')
+    return is_match_thresh, is_no_match_thresh
+
+def _assert_no_matches_if_not_both_gt_det_exist(df):
+    """helper to check that matches is always 0 if detection or groundtruth do not exist 
+    in the frame and no matches are always 1
+    """ 
+    required_columns = ['Exists', 'gt_det_match', 'gt_det_no_match']
+    required_columns_str = ', '.join(required_columns) 
+    assert is_in_df_columns(df, required_columns), (
+        'One or more of {} is not found '.format(required_columns_str))
+    df_match_error = df.query('Exists == "right_only" and gt_det_match != 0')
+    df_no_match_error = df.query('Exists == "right_only" and gt_det_no_match != 1')
+    assert len(df_match_error)== 0, 'match error, should be 0'
+    assert len(df_no_match_error)== 0, 'no match error, should be 1'    
+
+def augment_with_track_len(df):
+    """calculates length of track and length of track so far"""
+    df_unique_tracks_per_frame = df.groupby(['flight_id','track_id'])['frame'].agg(
+                                             ['min', 'max', 'count']).add_prefix('track_frame_')
+    df_track_id_len = df_unique_tracks_per_frame.reset_index(0).reset_index(0)
+    df_track_id_len = df_track_id_len.assign(track_id_len = 
+                     1 +  df_track_id_len['track_frame_max'] - df_track_id_len['track_frame_min'])
+    # Sanity check for length of tracks
+    min_track_id_len = df_track_id_len['track_id_len'].min()
+    max_track_id_len = df_track_id_len['track_id_len'].max()
+    assert min_track_id_len >= 0, 'Minimum track length: expected {}, got {}'.format(0, min_track_id_len)
+    assert max_track_id_len <= MAX_FRAMES_PER_FLIGHT, (
+            'Maximum track length: expected {}, got {}'.format(MAX_FRAMES_PER_FLIGHT, max_track_id_len))
+    
+    df = df.merge(df_track_id_len, on=['flight_id', 'track_id'], how='left')
+    df = df.assign(track_id_len_so_far = 1 +  df['frame'] - df['track_frame_min'])
+    assert len(df.query('track_id_len_so_far > track_id_len')) == 0, ('Track id len so far'
+                                                    ' should not exceed total track id length ') 
+    
+    return df
+
+def augment_with_zero_match_no_match(df):
+    """this is a special case handling when no results are found"""
+    df['gt_det_match'] = 0
+    df['gt_det_no_match'] = 0
+    return df
+
+def compute_groundtruth_detections_matches(df_gt, df_results, extend_small_detections,
+                     is_match_thresh, is_no_match_thresh=None,
+                     minimum_object_area=0):
+    """This function computes the matches between the ground truth and the detections 
+    at the same frame
+    Input:
+        df_gt: pd.DataFrame - ground truth
+        df_results: pd.Dataframe - detection results 
+        extend_small_detections: Boolean - True if the detection results are derived from 
+                        original UFO algorithm 
+        is_match_thresh: float - threshold for matching function to determine correct 
+                                match between ground truth and detection
+        is_no_match_thresh: float - threshold that defines no match between 
+                                    ground truth and detection
+    Returns:
+        df_comb_gt_results_outer_with_matches: pd.DataFrame - 
+                        combined ground truth and detections with match / no match
+    """    
+    # pre-calculation of bounding boxes if necessary for matching
+    df_gt = preprocess_groundtruth(df_gt)
+    if extend_small_detections:
+        # add detection bounding boxes and extend if necessary 
+        df_results = preprocess_results(df_results, minimum_object_area)
+    else:
+        # add detection bounding boxes
+        df_results = preprocess_results(df_results)
+
+    # combine all ground truth intruders with all the detections withing the same img_name 
+    # this pairs each ground truth intruder with each detection and vice versa
+    log.info('Pairing each ground truth intruder with each detection in the respective frame')
+    df_comb_gt_results = df_gt.merge(df_results, on =['flight_id', 'frame', 'img_name'], how='outer', 
+                                    indicator='Exists', suffixes=['_gt', '_det'])
+    
+    assert len(df_comb_gt_results.query('Exists == "right_only"')) == 0, (
+            'there are missing images in ground truth, that appear in detection file')
+       
+    log.info('Augmenting with original iou for comparison')
+    df_comb_gt_results = augment_with_iou(df_comb_gt_results)        
+    if minimum_object_area != 0: 
+        # save the original iou to compare 
+        df_comb_gt_results = df_comb_gt_results.rename(columns={'iou': 'iou_orig'})
+        # extend bounding boxes to have all ground truth area equal to at least minimum area
+        log.info('Extending bounding boxes based on groundtruth area')
+        df_comb_gt_results = extend_bounding_boxes_based_on_gt_area(
+                                df_comb_gt_results, minimum_object_area)
+        log.info('Augmenting with extended iou with minimum object area of %d', minimum_object_area)
+        df_comb_gt_results = augment_with_iou(df_comb_gt_results)
+        assert np.all(df_comb_gt_results['iou'] - df_comb_gt_results['iou_orig']) >= 0, (
+                                        'extended_iou should be higher or equal to original')            
+    df_comb_gt_results_with_matches = augment_with_iou_match(
+                        df_comb_gt_results, is_match_thresh, is_no_match_thresh)
+    _assert_no_matches_if_not_both_gt_det_exist(df_comb_gt_results_with_matches)
+    log.info('Matching done')
+    return df_comb_gt_results_with_matches
+
+def run(flags):  
+    # preparing path for saving results 
+    if flags.output_dir_path is None:
+        # create a directory with the same name as airborne_classifier_result omitting the extension 
+        output_dir = flags.airborne_classifier_results
+        for extension in ['.json', '.csv', '.gz']:
+            output_dir = output_dir.replace(extension, '')
+        output_dir += '_metrics_min_track_len_{}'.format(flags.min_track_len)
+    else:
+        output_dir = flags.output_dir_path
+
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
+    
+    is_match_threshold, is_no_match_threshold = get_matching_params(flags)
+    matching_alg_str = 'extended_iou'
+    prefix = flags.results_name_prefix
+    matching_alg_str += '_minObjArea_{}'.format(flags.minimum_object_area)
+    if flags.extend_small_detections:
+        prefix = prefix.replace('det', 'ext_det')
+    output_filename = ('{}_matches_{}_matchThresh_{}_noMatchThresh_{}.csv'.format(prefix,
+                      matching_alg_str, str(is_match_threshold).replace('.','_'),
+                                    str(is_no_match_threshold).replace('.','_')))
+    full_output_path = os.path.join(output_dir, output_filename)
+    
+    # Starting processing
+    log.info('Reading input ground truth and results')                    
+    df_gt = get_deeplearning_groundtruth_as_data_frame(flags.deeplearning_groundtruth)
+    if (flags.deeplearning_groundtruth.endswith('.json') 
+        or flags.deeplearning_groundtruth.endswith('.json.gz')):
+        log.info('Saving groundtruth in .csv format, please use .csv in the future') 
+        df_gt.to_csv(flags.deeplearning_groundtruth.replace('.json', '.csv').replace('.gz', ''))
+    log.info('Number of evaluated images is %d', df_gt['img_name'].nunique())
+    df_results = get_results_as_data_frame(flags.airborne_classifier_results)
+ 
+    if (flags.airborne_classifier_results.endswith('.json') 
+        or flags.airborne_classifier_results.endswith('.json.gz')):
+        log.info('Saving airborne classifier results in .csv format, please use .csv in the future') 
+        df_results.to_csv(flags.airborne_classifier_results.replace('.json', '.csv').replace('.gz', ''))
+    log.info('Number of evaluated unique detections is %d', len(df_results))
+    log.info('Filtering results based on results score %.2f', flags.detection_score_threshold)  
+    df_results = threshold_results_based_on_score(df_results, flags.detection_score_threshold)
+    # enumerate detections with unique ids 
+    df_results= df_results.sort_values('img_name').reset_index(0)
+
+    if 'detection_id' not in df_results.columns:
+        log.info('Enumerating detections with detection_id')
+        df_results = augment_with_detection_id(df_results)
+    # add track_id/ object_id
+    if 'track_id' not in df_results.columns:
+        if 'object_id' in df_results.columns:
+            df_results = df_results.assign(track_id = df_results['object_id'])
+            log.info('Using object_id as track_id')
+        else:
+            df_results = df_results.assign(track_id = df_results['detection_id'])
+            log.info('Using detection_id as track_id')
+    else:
+        log.info('Using track_id as track_id')
+    df_results = df_results.merge(df_gt[['flight_id','frame','img_name']].drop_duplicates(), 
+                                                                on='img_name', how='left')
+    # TODO: remove below when Sleipnir dataset is fixed to have 306 flights
+    df_results = df_results.dropna(subset=['flight_id']) 
+    log.info('Augmenting with track length')
+    df_results = augment_with_track_len(df_results)
+    log.info('Filtering results with track length below {}'.format(flags.min_track_len))
+    df_results = threshold_results_based_on_track_id_len(df_results, flags.min_track_len)
+
+    log.info('Computing ground truth and detection match based on %s', matching_alg_str)
+    df_comb_gt_results_with_matches = compute_groundtruth_detections_matches(
+        df_gt, df_results, flags.extend_small_detections,  
+        is_match_threshold, is_no_match_threshold, flags.minimum_object_area)
+
+    # save provided ground truth with the added encounters as data frame in .csv format
+    log.info('Saving ground truth and detection match results to %s', full_output_path)
+    df_comb_gt_results_with_matches.to_csv(full_output_path) 
+    return full_output_path
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Calculates matches '
+                                         'between the ground truth and results')
+    add_flags(parser)
+    parser.add_argument('--log-level', default=logging.getLevelName(logging.INFO), 
+                                                help='Logging verbosity level')
+    args = parser.parse_args()
+    setup_logging(args.log_level)
+    check_flags(args)
+    run(args)    
+
+
diff --git a/core/metrics/airborne_metrics/pandas_utils.py b/core/metrics/airborne_metrics/pandas_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d05e4a381a7e17f63e16399787e473d56f179238
--- /dev/null
+++ b/core/metrics/airborne_metrics/pandas_utils.py
@@ -0,0 +1,99 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+import gzip
+import json
+import logging
+import numpy as np
+import pandas as pd
+
+log = logging.getLogger(__name__)
+
+def is_in_df_columns(df, col_names):
+    """check if columns with col_names exist in data_frame df"""
+    all_cols = df.columns.to_list()
+    return np.all([col_name in all_cols for col_name in col_names])
+
+def _fix_empty_records_json_dict(json_data, record_key):
+    """adds empty dictionary for each empty list in record_key 
+    of each element of json_data"""
+    for element in json_data:
+        if not(element[record_key]):
+            element[record_key] = [{}]
+    return json_data
+
+def normalize_json_deeplearning_groundtruth_to_dataframe(json_data):
+    """Custom function that normalizes json into dataframe"""
+    # it is important to keep all the images in the data frame to know how many 
+    # images were evaluated, hence we fix such images to have 'gt'=[{}]
+    df = pd.json_normalize(json_data['samples'].values(), record_path='entities', meta=[
+                ['metadata', 'resolution', 'width'] , ['metadata', 'resolution', 'height']],
+                                                         sep='_', errors='ignore')
+    df = df.assign(gt_left = [bb[0] if bb==bb else bb for bb in df['bb']])
+    df = df.assign(gt_top = [bb[1] if bb==bb else bb for bb in df['bb']])
+    df = df.assign(gt_right = [bb[0] + bb[2] if bb==bb else bb for bb in df['bb']])
+    df = df.assign(gt_bottom = [bb[1] + bb[3] if bb==bb else bb for bb in df['bb']])
+    df = df.drop(columns=['bb'])
+    df = df.rename(columns={'labels_is_above_horizon': 'is_above_horizon', 
+        'blob_range_distance_m': 'range_distance_m', 'blob_frame': 'frame', 
+        'metadata_resolution_width': 'size_width', 'metadata_resolution_height': 'size_height',
+        })
+    return df
+
+def normalize_json_result_to_dataframe(json_data):
+    """Custom function that normalizes json into dataframe"""
+    if len(json_data) == 0:
+        return None
+    df = pd.json_normalize(json_data, record_path='detections',
+                            meta=['img_name'],
+                            sep='_', errors='ignore')
+    df_columns = df.columns.to_list()
+    columns_to_return = ['img_name', 'n', 'x', 'y', 'w', 'h', 's']
+    if 'track_id' in df_columns:
+        columns_to_return.append('track_id')
+    if 'object_id' in df_columns:
+        columns_to_return.append('object_id')
+    return df[columns_to_return]
+
+def _get_as_dataframe(filename, normalization_func=None):
+    """Reads the provided .json/.csv filename
+    if needed normalizes json into csv using the provided normalization_func
+    returns data frame representation
+    """  
+    log.info('Reading provided %s', filename)
+    if filename.endswith('.csv'):
+        df = pd.read_csv(filename)
+    elif filename.endswith('.json') or filename.endswith('.json.gz'): 
+        if normalization_func is None:
+            raise ValueError('Please provide normalization function for you json schema')
+        if filename.endswith('.json'):
+            log.info('Loading .json')
+            with open(filename, 'r') as json_data:
+                json_gt = json.load(json_data)
+        else:
+            log.info('Loading .json.gz')
+            with gzip.open(filename, 'rt', encoding='UTF-8') as json_data:
+                json_gt = json.load(json_data)
+        log.info('Normalizing json. This operation is time consuming. The result .csv will be saved ' 
+                'Please consider providing .csv file next time')
+        df = normalization_func(json_gt)
+    else:
+        raise ValueError('Only .csv, .json or .json.gz are supported')
+    return df   
+
+def get_deeplearning_groundtruth_as_data_frame(deeplearning_groundtruth_filename):
+    """Reads the deep learning ground truth as provided .json/.csv
+    if needed normalizes json into csv
+    returns data frame representation of the deep learning ground truth
+    """  
+    log.info('Reading ground truth')
+    return _get_as_dataframe(deeplearning_groundtruth_filename, 
+                           normalize_json_deeplearning_groundtruth_to_dataframe)
+
+def get_results_as_data_frame(detection_results_filename):
+    """Reads detection results as provided .json/.csv
+    if needed normalizes json into csv
+    returns data frame representation of the detection results
+    """  
+    log.info('Reading detection results')
+    return _get_as_dataframe(detection_results_filename, 
+                             normalize_json_result_to_dataframe)
+    
\ No newline at end of file
diff --git a/core/metrics/airborne_metrics/script_utils.py b/core/metrics/airborne_metrics/script_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f69fad06e8eb5182e6edc9b0ef2fb237e9d291c8
--- /dev/null
+++ b/core/metrics/airborne_metrics/script_utils.py
@@ -0,0 +1,22 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+import logging
+import sys
+
+def setup_logging(log_level):
+    """Utility function setting up the logger"""
+    logging.basicConfig(format='%(asctime)-15s:%(levelname)s:%(filename)s:%(lineno)d %(message)s',
+                        level=logging.getLevelName(log_level),
+                        stream=sys.stderr)
+
+def assert_file_format (filename, allowed_format=None):
+    """This function asserts that provided filename end with allowed format, if None is provided
+    default formats are: .csv, .json, .json.gz
+    """
+    if allowed_format is None:
+        assert filename.endswith('.csv') or filename.endswith('.json') or filename.endswith('.json.gz'), (
+            'Unsupported file format, please provide .csv (preferred) or .json or .gz')
+    else:
+        assert filename.endswith(allowed_format), '{} format expected'.format(allowed_format)
+
+def remove_extension(filename):
+    return filename.replace('.json', '').replace('.csv', '').replace('.gz', '')
diff --git a/core/metrics/run_airborne_metrics.py b/core/metrics/run_airborne_metrics.py
new file mode 100644
index 0000000000000000000000000000000000000000..5730ccfeaeaf41fcbc8cc1ed8998a350d6d87202
--- /dev/null
+++ b/core/metrics/run_airborne_metrics.py
@@ -0,0 +1,227 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# This module binds together different modules or airborne metrics for local evaluation
+
+import argparse
+from collections import defaultdict
+from glob import glob
+import json
+import logging
+import os
+import shutil
+
+import pandas as pd 
+
+import airborne_metrics.calculate_airborne_metrics as calculate_airborne_metrics
+import airborne_metrics.calculate_encounters as calculate_encounters
+import airborne_metrics.match_groundtruth_results as match_groundtruth_results
+from airborne_metrics.script_utils import setup_logging, remove_extension
+from airborne_metrics.calculate_airborne_metrics import RANGES_TO_DETECT, HORIZON_QUERIES
+from airborne_metrics.calculate_encounters import DEFAULT_MAX_RANGE_M
+
+MAX_FAR_TO_BE_RANKED = 0.5 
+MAX_FPPI_TO_BE_RANKED = 0.0005
+DEFAULT_MIN_TRACK_LEN_ = 0
+DEFAULT_MIN_SCORE_ = 0
+DEEP_LEARNING_GROUNDRUTH = 'groundtruth.json' 
+ENCOUNTERS_GROUNDTRUTH = 'groundtruth_with_encounters_maxRange{max_range}_maxGap3_minEncLen30.csv'
+
+log = logging.getLogger(__name__)
+
+def add_flags(parser):
+    """Utility function adding command line arguments to the parser"""
+    parser.add_argument('--dataset-folder', '-d', required=True,
+                        help='Name of the folder with dataset ground truth')
+    parser.add_argument('--results-folder', '-r', required=True,
+                        help='Name of the folder with results')
+    parser.add_argument('--summaries-folder', '-s',
+                        help='Name of the folders to hold summary files')
+    parser.add_argument('--min-score', type=float, default=DEFAULT_MIN_SCORE_,
+                        help='Minimum score to threshold the results if cannot be derived from the name')    
+    parser.add_argument('--min-track-len', '-l', type=int, default=DEFAULT_MIN_TRACK_LEN_, 
+                        help='minimum track length - the results will be evaluated for all track lengths'
+                        'with at least such length')
+    parser.add_argument('--baseline-far', type=float, 
+                        help='Will be used if baseline-ufo-result is not provided')    
+    parser.add_argument('--baseline-fppi', type=float, 
+                        help='Will be used if specified')   
+    parser.add_argument('--enc-max-range', '-f', default=DEFAULT_MAX_RANGE_M,
+                        help='Max range of intruder in the encounters')
+ 
+def call_script(script_handler, args_list):
+    parser = argparse.ArgumentParser()
+    script_handler.add_flags(parser)
+    args = parser.parse_args(args_list)
+    script_handler.check_flags(args)
+    return script_handler.run(args)
+
+def call_encounter_calculation(args_list):
+    return call_script(calculate_encounters, args_list)
+
+def call_metrics_calculation(args_list):
+    return call_script(calculate_airborne_metrics, args_list)
+
+def call_match_calculation(groundtruth, result_file, min_track_len, min_score=None):
+    args_list = ['--deeplearning-groundtruth', groundtruth, 
+                 '--airborne-classifier-results', result_file, 
+                 '--min-track-len', str(min_track_len)]
+    if min_score is not None:
+        args_list += ['--detection-score-threshold', str(min_score)]
+    return call_script(match_groundtruth_results, args_list)
+
+def call_metrics_with_target_far(gt_det_match_result_path, encounters,
+                                                          far=None, min_score=0):
+    args_list = ['--encounters-with-groundtruth-filename', encounters, 
+                 '--groundtruth-results-matches-filename', gt_det_match_result_path, 
+                 '--target-metrics', 'far', 
+                 '--min-det-score', str(min_score),
+                 '--use-track-fl-dr']
+    if far is not None:
+        args_list += ['--target-value', str(far)]
+    return call_metrics_calculation(args_list)
+
+def call_metrics_with_target_fppi(gt_det_match_result_path, encounters, fppi, min_score=0):
+    return call_metrics_calculation(['--encounters-with-groundtruth-filename', encounters, 
+                                     '--groundtruth-results-matches-filename', gt_det_match_result_path,
+                                     '--target-metrics', 'fppi', 
+                                     '--min-det-score', str(min_score),
+                                     '--target-value', str(fppi),
+                                     '--use-track-fl-dr'])
+
+def call_metrics_with_score(gt_det_match_result_path, encounters, score=None):
+    args_list = ['--encounters-with-groundtruth-filename', encounters, 
+                 '--groundtruth-results-matches-filename', gt_det_match_result_path,
+                 '--use-track-fl-dr']
+    if score is not None:
+        args_list += ['--detection-score-threshold', str(score)]
+    return call_metrics_calculation(args_list)
+
+def calculate_airborne_encounters(groundtruth, output_dir):
+    return call_script(calculate_encounters,['--deeplearning-groundtruth', groundtruth, 
+                                             '--output-dir-path', output_dir])
+
+def get_min_score_from_name(result_name):
+    def replace_decimal(value):
+        return str(value).replace('.', 'p')
+    scores = [val / 10 for val in range(10)]
+    for score in scores:
+        if 't' + replace_decimal(score) in result_name:
+            return score
+    return None 
+
+def _change_to_csv(filename):
+    if filename.endswith('.csv'):
+        return filename
+    return filename.replace('.json', '.csv').replace('.gz','')
+
+####################################################################################################
+# MAIN
+###################################################################################################
+def summarize(summaries_dir):
+    """Gathers all summaries into one table"""
+    all_summaries = glob(os.path.join(summaries_dir, '*.json'))
+    summaries = []
+    for i, summary_f in enumerate(all_summaries):
+        with open(summary_f, 'r') as jf:
+            summaries.append(json.load(jf))
+
+    for range_ in RANGES_TO_DETECT:
+        results = defaultdict(list)
+        for summary in summaries:
+            results_name = os.path.basename(os.path.dirname(summary['gt_det_matches']))
+            results['Algorithm'].append(results_name)
+            results['Score'].append(summary['min_det_score'])
+            results['FPPI'].append(summary['fppi'])
+            results['AFDR'].append(summary['fl_dr_in_range'])
+            results['Detected planned aircraft'].append(summary['det_aircraft_in_range'])
+            results['Total planned aircraft'].append(summary['tot_aircraft_in_range'])
+            results['HFAR'].append(summary['far'])
+            for dr_criteria in ['Tracking']:
+                for scenario in list(HORIZON_QUERIES.keys()):
+                    results['EDR {}, {}'.format(scenario, dr_criteria)].append(
+                            round(summary[dr_criteria]['Encounters'][str(range_)][scenario]['dr'], 5))
+                    results['Detected {}, {}'.format(scenario, dr_criteria)].append(
+                            round(summary[dr_criteria]['Encounters'][str(range_)][scenario]['detected'], 5))
+                    results['Total {}, {}'.format(scenario, dr_criteria)].append(
+                            round(summary[dr_criteria]['Encounters'][str(range_)][scenario]['total'], 5))
+        df_to_save = pd.DataFrame.from_dict(results)
+        df_to_save = df_to_save.sort_values(
+            ['EDR All, Tracking', 'EDR Below Horizon, Tracking', 'EDR Mixed, Tracking', 
+             'EDR Above Horizon, Tracking'], ascending=False).reset_index(drop=True).rename_axis('#')
+        df_to_save.to_csv(os.path.join(summaries_dir, 'dr_encounters_detected_before_{}.csv'.format(range_)))
+        df_benchmark_1 = df_to_save[['Algorithm', 'Score', 'HFAR', 'EDR All, Tracking']]
+        df_benchmark_1.to_csv(os.path.join(summaries_dir, 'detection_tracking_benchmark_results_for_ranking.csv'))
+        df_benchmark_2 = df_to_save[['Algorithm', 'Score', 'FPPI', 'AFDR']]
+        df_benchmark_2.to_csv(os.path.join(summaries_dir, 'detection_only_benchmark_results_for_ranking.csv')) 
+        
+
+def run(flags):
+    encounters_gt_path = os.path.join(flags.dataset_folder, ENCOUNTERS_GROUNDTRUTH.format(
+                                                                max_range=flags.enc_max_range))
+    log.info('Encounter ground truth: %s', encounters_gt_path)
+    groundtruth_path = os.path.join(flags.dataset_folder, DEEP_LEARNING_GROUNDRUTH)
+    groundtruth_path_csv = _change_to_csv(groundtruth_path)
+    if os.path.isfile(groundtruth_path_csv):
+        groundtruth_path = groundtruth_path_csv
+    if flags.summaries_folder is not None:
+        summaries_dir = flags.summaries_folder
+    else:
+        summaries_dir = os.path.join(flags.results_folder, 'summaries')
+    if not os.path.isdir(summaries_dir):
+        os.makedirs(summaries_dir)
+    if not os.path.exists(encounters_gt_path):
+        encounters_gt_path = calculate_airborne_encounters(groundtruth_path, flags.dataset_folder)
+        groundtruth_path = _change_to_csv(groundtruth_path) # for acceleration of other evaluations
+    
+    result_files = glob(os.path.join(flags.results_folder, '*.json*')) 
+
+    for result_file in result_files:
+        result_file_base = remove_extension(result_file)
+        gt_det_matches_filename = None
+        min_score = get_min_score_from_name(result_file)
+        if min_score is None:
+            min_score = flags.min_score
+        min_track_len = flags.min_track_len 
+        results_dir = result_file_base + '_metrics' + '_min_track_len_{}'.format(min_track_len)
+        
+        if os.path.isfile(result_file_base + '.csv'):
+            result_file = result_file_base + '.csv'
+        # calculates matches for detections that belong to tracks with min_track_len (in online fashion) 
+        # and have at least min_score 
+        gt_det_matches_filename = call_match_calculation(groundtruth_path, result_file, 
+                                                         min_track_len, min_score)  
+        groundtruth_path = _change_to_csv(groundtruth_path) # for acceleration of other evaluations
+        exp_name = os.path.basename(os.path.dirname(gt_det_matches_filename))
+        # calculate metrics for all the matches with min_track_len and min_score
+        
+        # now perform metrics calculation for baseline far
+        if flags.baseline_far:
+            _, summary_json = call_metrics_with_target_far(gt_det_matches_filename, encounters_gt_path, 
+                                                        min_score=min_score, far=flags.baseline_far)
+        else:
+            _, summary_json = call_metrics_with_score(gt_det_matches_filename, encounters_gt_path,
+                                                                                score=min_score)
+        # copy summary for directory of summaires
+        shutil.copyfile(summary_json, os.path.join(summaries_dir, 
+                os.path.basename(summary_json.replace('summary', '{}_summary'.format(exp_name)))))  
+
+        # now perform metrics calculation for baseline fppi
+        if flags.baseline_fppi:
+            _, summary_json = call_metrics_with_target_fppi(gt_det_matches_filename, encounters_gt_path, 
+                                                       fppi=flags.baseline_fppi, min_score=min_score)
+            shutil.copyfile(summary_json, os.path.join(summaries_dir, 
+                os.path.basename(summary_json.replace('summary', '{}_summary'.format(exp_name)))))
+                       
+    summarize(summaries_dir)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Evaluate airborne detection results with airborne metrics')
+    add_flags(parser)
+    parser.add_argument('--log-level', default=logging.getLevelName(logging.INFO), 
+                        help='Logging verbosity level')
+    args = parser.parse_args()
+    setup_logging(args.log_level)
+    run(args)
+
+
+
+
+ 
diff --git a/core/metrics/setup.py b/core/metrics/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..3656fa1a8b5229c76caac110d42ac54921dead78
--- /dev/null
+++ b/core/metrics/setup.py
@@ -0,0 +1,13 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+from setuptools import setup, find_packages
+
+setup(
+    name = 'airbornemetrics', 
+    description='A PrimeAir Challenge Metrics',
+    version='1.0', 
+    license='Apache-2.0',
+    install_requires=['pandas>=1.0.1',
+                      'numpy',                     
+                      ],    
+    packages=find_packages(), #include/exclude arguments take * as wildcard, . for any sub-package names
+)