Mentions légales du service

Skip to content
Snippets Groups Projects
Commit eaa6fc36 authored by rickymwalsh's avatar rickymwalsh
Browse files

add avg precision and auc precision recall curve

parent 5691eb81
No related branches found
No related tags found
1 merge request!14add avg precision and auc precision recall curve
...@@ -5,9 +5,61 @@ import numpy as np ...@@ -5,9 +5,61 @@ import numpy as np
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, average_precision_score, auc
from .binary_search import single_binary_search, filter_table_and_get_metrics, load_data from .binary_search import single_binary_search, filter_table_and_get_metrics, load_data
def prepare_pr_data(gt_df, pred_df):
""" Given tables of GT and predicted lesions, reformat for sklearn average precision calculation.
Args:
gt_df: GT table containing at least the following columns:
image_name, reference_instance_id, predicted_proba, is_true_positive
pred_df: predictions table containing at least the following columns:
image_name, reference_instance_id, predicted_proba, is_false_positive
Returns: two lists with length (# TP lesions + # FP predictions)
the first list contains the "true" class, either 1 in the case of a TP lesion, or 0 in the case of a FP
the second list contains the predicted score for each lesion (TP or FP)
"""
# Ensure that we have only one row per GT label
# Take the predicted lesion with the highest proba while still maintaining true positive
unique_gt = gt_df[['image_name', 'reference_instance_id']].drop_duplicates()
gt_tps = gt_df[gt_df['is_true_positive'] == 1]
# Get max predicted probability per GT lesion
gt_tps = gt_tps \
.groupby(['image_name', 'reference_instance_id'])[['predicted_proba']] \
.max() \
.reset_index()
# Maintain GT lesions that have no associated predicted lesion
unique_gt = unique_gt.merge(gt_tps, on=['image_name', 'reference_instance_id'], how='left')
unique_gt = unique_gt.fillna(value={'predicted_proba': 0})
gt_classes = [1] * len(unique_gt)
pred_scores = unique_gt['predicted_proba'].values.tolist()
# Prepare the FP pred data
pred_df = pred_df[pred_df['is_false_positive'] == 1]
# Add the FPs to the class and scores arrays
gt_classes = gt_classes + [0] * len(pred_df)
pred_scores = pred_scores + pred_df['predicted_proba'].values.tolist()
return gt_classes, pred_scores
def compute_pr_metrics(gt_classes, pred_scores):
"""Compute average precision and area under the precision recall curve using sklearn.metrics functions.
Args:
gt_classes (array-like): Class for each lesion, either 1 for TP, or 0 for FP (len = # TP + # FP lesions)
pred_scores (array-like): Predicted lesion scores. (len = # TP + # FP lesions)
Returns:
avg_precision (float): Average precision.
auc_pr (float): Area under the precision recall curve.
"""
avg_precision = average_precision_score(gt_classes, pred_scores)
precision, recall, thresholds = precision_recall_curve(gt_classes, pred_scores)
auc_pr = auc(recall, precision)
return avg_precision, auc_pr
def calculate_froc(args, gt_path, pred_path, target_metric_list, metric_type, output_path, iou_threshold=0.2): def calculate_froc(args, gt_path, pred_path, target_metric_list, metric_type, output_path, iou_threshold=0.2):
mean_sensitivities = [] mean_sensitivities = []
results = {} results = {}
...@@ -44,8 +96,15 @@ def calculate_froc(args, gt_path, pred_path, target_metric_list, metric_type, ou ...@@ -44,8 +96,15 @@ def calculate_froc(args, gt_path, pred_path, target_metric_list, metric_type, ou
'fp_per_image': fp_per_image, 'fp_per_image': fp_per_image,
} }
# Calculate the average precision and AUC of the precision-recall curve
gt_classes, pred_scores = prepare_pr_data(tableGT, tablePred)
avg_precision, auc_pr = compute_pr_metrics(gt_classes, pred_scores)
# Writing results into a JSON file # Writing results into a JSON file
froc_dict = {"FROC Metric": froc_metric} froc_dict = {"FROC Metric": froc_metric,
"avg_precision": avg_precision,
"auc_pr": auc_pr,
}
results.update(froc_dict) results.update(froc_dict)
results_json_path = os.path.join(output_path, 'results.json') results_json_path = os.path.join(output_path, 'results.json')
with open(results_json_path, "w") as json_file: with open(results_json_path, "w") as json_file:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment