init
This commit is contained in:
6
finetune/mmseg/evaluation/metrics/__init__.py
Normal file
6
finetune/mmseg/evaluation/metrics/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .citys_metric import CityscapesMetric
|
||||
from .depth_metric import DepthMetric
|
||||
from .iou_metric import IoUMetric
|
||||
|
||||
__all__ = ['IoUMetric', 'CityscapesMetric', 'DepthMetric']
|
||||
158
finetune/mmseg/evaluation/metrics/citys_metric.py
Normal file
158
finetune/mmseg/evaluation/metrics/citys_metric.py
Normal file
@@ -0,0 +1,158 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import shutil
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, Optional, Sequence
|
||||
|
||||
try:
|
||||
|
||||
import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa
|
||||
import cityscapesscripts.helpers.labels as CSLabels
|
||||
except ImportError:
|
||||
CSLabels = None
|
||||
CSEval = None
|
||||
|
||||
import numpy as np
|
||||
from mmengine.dist import is_main_process, master_only
|
||||
from mmengine.evaluator import BaseMetric
|
||||
from mmengine.logging import MMLogger, print_log
|
||||
from mmengine.utils import mkdir_or_exist
|
||||
from PIL import Image
|
||||
|
||||
from mmseg.registry import METRICS
|
||||
|
||||
|
||||
@METRICS.register_module()
|
||||
class CityscapesMetric(BaseMetric):
|
||||
"""Cityscapes evaluation metric.
|
||||
|
||||
Args:
|
||||
output_dir (str): The directory for output prediction
|
||||
ignore_index (int): Index that will be ignored in evaluation.
|
||||
Default: 255.
|
||||
format_only (bool): Only format result for results commit without
|
||||
perform evaluation. It is useful when you want to format the result
|
||||
to a specific format and submit it to the test server.
|
||||
Defaults to False.
|
||||
keep_results (bool): Whether to keep the results. When ``format_only``
|
||||
is True, ``keep_results`` must be True. Defaults to False.
|
||||
collect_device (str): Device name used for collecting results from
|
||||
different ranks during distributed training. Must be 'cpu' or
|
||||
'gpu'. Defaults to 'cpu'.
|
||||
prefix (str, optional): The prefix that will be added in the metric
|
||||
names to disambiguate homonymous metrics of different evaluators.
|
||||
If prefix is not provided in the argument, self.default_prefix
|
||||
will be used instead. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
output_dir: str,
|
||||
ignore_index: int = 255,
|
||||
format_only: bool = False,
|
||||
keep_results: bool = False,
|
||||
collect_device: str = 'cpu',
|
||||
prefix: Optional[str] = None,
|
||||
**kwargs) -> None:
|
||||
super().__init__(collect_device=collect_device, prefix=prefix)
|
||||
if CSEval is None:
|
||||
raise ImportError('Please run "pip install cityscapesscripts" to '
|
||||
'install cityscapesscripts first.')
|
||||
self.output_dir = output_dir
|
||||
self.ignore_index = ignore_index
|
||||
|
||||
self.format_only = format_only
|
||||
if format_only:
|
||||
assert keep_results, (
|
||||
'When format_only is True, the results must be keep, please '
|
||||
f'set keep_results as True, but got {keep_results}')
|
||||
self.keep_results = keep_results
|
||||
self.prefix = prefix
|
||||
if is_main_process():
|
||||
mkdir_or_exist(self.output_dir)
|
||||
|
||||
@master_only
|
||||
def __del__(self) -> None:
|
||||
"""Clean up."""
|
||||
if not self.keep_results:
|
||||
shutil.rmtree(self.output_dir)
|
||||
|
||||
def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
|
||||
"""Process one batch of data and data_samples.
|
||||
|
||||
The processed results should be stored in ``self.results``, which will
|
||||
be used to computed the metrics when all batches have been processed.
|
||||
|
||||
Args:
|
||||
data_batch (dict): A batch of data from the dataloader.
|
||||
data_samples (Sequence[dict]): A batch of outputs from the model.
|
||||
"""
|
||||
mkdir_or_exist(self.output_dir)
|
||||
|
||||
for data_sample in data_samples:
|
||||
pred_label = data_sample['pred_sem_seg']['data'][0].cpu().numpy()
|
||||
# when evaluating with official cityscapesscripts,
|
||||
# labelIds should be used
|
||||
pred_label = self._convert_to_label_id(pred_label)
|
||||
basename = osp.splitext(osp.basename(data_sample['img_path']))[0]
|
||||
png_filename = osp.abspath(
|
||||
osp.join(self.output_dir, f'{basename}.png'))
|
||||
output = Image.fromarray(pred_label.astype(np.uint8)).convert('P')
|
||||
output.save(png_filename)
|
||||
if self.format_only:
|
||||
# format_only always for test dataset without ground truth
|
||||
gt_filename = ''
|
||||
else:
|
||||
# when evaluating with official cityscapesscripts,
|
||||
# **_gtFine_labelIds.png is used
|
||||
gt_filename = data_sample['seg_map_path'].replace(
|
||||
'labelTrainIds.png', 'labelIds.png')
|
||||
self.results.append((png_filename, gt_filename))
|
||||
|
||||
def compute_metrics(self, results: list) -> Dict[str, float]:
|
||||
"""Compute the metrics from processed results.
|
||||
|
||||
Args:
|
||||
results (list): Testing results of the dataset.
|
||||
|
||||
Returns:
|
||||
dict[str: float]: Cityscapes evaluation results.
|
||||
"""
|
||||
logger: MMLogger = MMLogger.get_current_instance()
|
||||
if self.format_only:
|
||||
logger.info(f'results are saved to {osp.dirname(self.output_dir)}')
|
||||
return OrderedDict()
|
||||
|
||||
msg = 'Evaluating in Cityscapes style'
|
||||
if logger is None:
|
||||
msg = '\n' + msg
|
||||
print_log(msg, logger=logger)
|
||||
|
||||
eval_results = dict()
|
||||
print_log(
|
||||
f'Evaluating results under {self.output_dir} ...', logger=logger)
|
||||
|
||||
CSEval.args.evalInstLevelScore = True
|
||||
CSEval.args.predictionPath = osp.abspath(self.output_dir)
|
||||
CSEval.args.evalPixelAccuracy = True
|
||||
CSEval.args.JSONOutput = False
|
||||
|
||||
pred_list, gt_list = zip(*results)
|
||||
metric = dict()
|
||||
eval_results.update(
|
||||
CSEval.evaluateImgLists(pred_list, gt_list, CSEval.args))
|
||||
metric['averageScoreCategories'] = eval_results[
|
||||
'averageScoreCategories']
|
||||
metric['averageScoreInstCategories'] = eval_results[
|
||||
'averageScoreInstCategories']
|
||||
return metric
|
||||
|
||||
@staticmethod
|
||||
def _convert_to_label_id(result):
|
||||
"""Convert trainId to id for cityscapes."""
|
||||
if isinstance(result, str):
|
||||
result = np.load(result)
|
||||
result_copy = result.copy()
|
||||
for trainId, label in CSLabels.trainId2label.items():
|
||||
result_copy[result == trainId] = label.id
|
||||
|
||||
return result_copy
|
||||
212
finetune/mmseg/evaluation/metrics/depth_metric.py
Normal file
212
finetune/mmseg/evaluation/metrics/depth_metric.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
from collections import OrderedDict, defaultdict
|
||||
from typing import Dict, List, Optional, Sequence
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from mmengine.dist import is_main_process
|
||||
from mmengine.evaluator import BaseMetric
|
||||
from mmengine.logging import MMLogger, print_log
|
||||
from mmengine.utils import mkdir_or_exist
|
||||
from prettytable import PrettyTable
|
||||
from torch import Tensor
|
||||
|
||||
from mmseg.registry import METRICS
|
||||
|
||||
|
||||
@METRICS.register_module()
|
||||
class DepthMetric(BaseMetric):
|
||||
"""Depth estimation evaluation metric.
|
||||
|
||||
Args:
|
||||
depth_metrics (List[str], optional): List of metrics to compute. If
|
||||
not specified, defaults to all metrics in self.METRICS.
|
||||
min_depth_eval (float): Minimum depth value for evaluation.
|
||||
Defaults to 0.0.
|
||||
max_depth_eval (float): Maximum depth value for evaluation.
|
||||
Defaults to infinity.
|
||||
crop_type (str, optional): Specifies the type of cropping to be used
|
||||
during evaluation. This option can affect how the evaluation mask
|
||||
is generated. Currently, 'nyu_crop' is supported, but other
|
||||
types can be added in future. Defaults to None if no cropping
|
||||
should be applied.
|
||||
depth_scale_factor (float): Factor to scale the depth values.
|
||||
Defaults to 1.0.
|
||||
collect_device (str): Device name used for collecting results from
|
||||
different ranks during distributed training. Must be 'cpu' or
|
||||
'gpu'. Defaults to 'cpu'.
|
||||
output_dir (str): The directory for output prediction. Defaults to
|
||||
None.
|
||||
format_only (bool): Only format result for results commit without
|
||||
perform evaluation. It is useful when you want to save the result
|
||||
to a specific format and submit it to the test server.
|
||||
Defaults to False.
|
||||
prefix (str, optional): The prefix that will be added in the metric
|
||||
names to disambiguate homonymous metrics of different evaluators.
|
||||
If prefix is not provided in the argument, self.default_prefix
|
||||
will be used instead. Defaults to None.
|
||||
"""
|
||||
METRICS = ('d1', 'd2', 'd3', 'abs_rel', 'sq_rel', 'rmse', 'rmse_log',
|
||||
'log10', 'silog')
|
||||
|
||||
def __init__(self,
|
||||
depth_metrics: Optional[List[str]] = None,
|
||||
min_depth_eval: float = 0.0,
|
||||
max_depth_eval: float = float('inf'),
|
||||
crop_type: Optional[str] = None,
|
||||
depth_scale_factor: float = 1.0,
|
||||
collect_device: str = 'cpu',
|
||||
output_dir: Optional[str] = None,
|
||||
format_only: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
**kwargs) -> None:
|
||||
super().__init__(collect_device=collect_device, prefix=prefix)
|
||||
|
||||
if depth_metrics is None:
|
||||
self.metrics = self.METRICS
|
||||
elif isinstance(depth_metrics, [tuple, list]):
|
||||
for metric in depth_metrics:
|
||||
assert metric in self.METRICS, f'the metric {metric} is not ' \
|
||||
f'supported. Please use metrics in {self.METRICS}'
|
||||
self.metrics = depth_metrics
|
||||
|
||||
# Validate crop_type, if provided
|
||||
assert crop_type in [
|
||||
None, 'nyu_crop'
|
||||
], (f'Invalid value for crop_type: {crop_type}. Supported values are '
|
||||
'None or \'nyu_crop\'.')
|
||||
self.crop_type = crop_type
|
||||
self.min_depth_eval = min_depth_eval
|
||||
self.max_depth_eval = max_depth_eval
|
||||
self.output_dir = output_dir
|
||||
if self.output_dir and is_main_process():
|
||||
mkdir_or_exist(self.output_dir)
|
||||
self.format_only = format_only
|
||||
self.depth_scale_factor = depth_scale_factor
|
||||
|
||||
def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
|
||||
"""Process one batch of data and data_samples.
|
||||
|
||||
The processed results should be stored in ``self.results``, which will
|
||||
be used to compute the metrics when all batches have been processed.
|
||||
|
||||
Args:
|
||||
data_batch (dict): A batch of data from the dataloader.
|
||||
data_samples (Sequence[dict]): A batch of outputs from the model.
|
||||
"""
|
||||
for data_sample in data_samples:
|
||||
pred_label = data_sample['pred_depth_map']['data'].squeeze()
|
||||
# format_only always for test dataset without ground truth
|
||||
if not self.format_only:
|
||||
gt_depth = data_sample['gt_depth_map']['data'].squeeze().to(
|
||||
pred_label)
|
||||
|
||||
eval_mask = self._get_eval_mask(gt_depth)
|
||||
self.results.append(
|
||||
(gt_depth[eval_mask], pred_label[eval_mask]))
|
||||
# format_result
|
||||
if self.output_dir is not None:
|
||||
basename = osp.splitext(osp.basename(
|
||||
data_sample['img_path']))[0]
|
||||
png_filename = osp.abspath(
|
||||
osp.join(self.output_dir, f'{basename}.png'))
|
||||
output_mask = pred_label.cpu().numpy(
|
||||
) * self.depth_scale_factor
|
||||
|
||||
cv2.imwrite(png_filename, output_mask.astype(np.uint16),
|
||||
[cv2.IMWRITE_PNG_COMPRESSION, 0])
|
||||
|
||||
def _get_eval_mask(self, gt_depth: Tensor):
|
||||
"""Generates an evaluation mask based on ground truth depth and
|
||||
cropping.
|
||||
|
||||
Args:
|
||||
gt_depth (Tensor): Ground truth depth map.
|
||||
|
||||
Returns:
|
||||
Tensor: Boolean mask where evaluation should be performed.
|
||||
"""
|
||||
valid_mask = torch.logical_and(gt_depth > self.min_depth_eval,
|
||||
gt_depth < self.max_depth_eval)
|
||||
|
||||
if self.crop_type == 'nyu_crop':
|
||||
# this implementation is adapted from
|
||||
# https://github.com/zhyever/Monocular-Depth-Estimation-Toolbox/blob/main/depth/datasets/nyu.py # noqa
|
||||
crop_mask = torch.zeros_like(valid_mask)
|
||||
crop_mask[45:471, 41:601] = 1
|
||||
else:
|
||||
crop_mask = torch.ones_like(valid_mask)
|
||||
|
||||
eval_mask = torch.logical_and(valid_mask, crop_mask)
|
||||
return eval_mask
|
||||
|
||||
@staticmethod
|
||||
def _calc_all_metrics(gt_depth, pred_depth):
|
||||
"""Computes final evaluation metrics based on accumulated results."""
|
||||
assert gt_depth.shape == pred_depth.shape
|
||||
|
||||
thresh = torch.max((gt_depth / pred_depth), (pred_depth / gt_depth))
|
||||
diff = pred_depth - gt_depth
|
||||
diff_log = torch.log(pred_depth) - torch.log(gt_depth)
|
||||
|
||||
d1 = torch.sum(thresh < 1.25).float() / len(thresh)
|
||||
d2 = torch.sum(thresh < 1.25**2).float() / len(thresh)
|
||||
d3 = torch.sum(thresh < 1.25**3).float() / len(thresh)
|
||||
|
||||
abs_rel = torch.mean(torch.abs(diff) / gt_depth)
|
||||
sq_rel = torch.mean(torch.pow(diff, 2) / gt_depth)
|
||||
|
||||
rmse = torch.sqrt(torch.mean(torch.pow(diff, 2)))
|
||||
rmse_log = torch.sqrt(torch.mean(torch.pow(diff_log, 2)))
|
||||
|
||||
log10 = torch.mean(
|
||||
torch.abs(torch.log10(pred_depth) - torch.log10(gt_depth)))
|
||||
silog = torch.sqrt(
|
||||
torch.pow(diff_log, 2).mean() -
|
||||
0.5 * torch.pow(diff_log.mean(), 2))
|
||||
|
||||
return {
|
||||
'd1': d1.item(),
|
||||
'd2': d2.item(),
|
||||
'd3': d3.item(),
|
||||
'abs_rel': abs_rel.item(),
|
||||
'sq_rel': sq_rel.item(),
|
||||
'rmse': rmse.item(),
|
||||
'rmse_log': rmse_log.item(),
|
||||
'log10': log10.item(),
|
||||
'silog': silog.item()
|
||||
}
|
||||
|
||||
def compute_metrics(self, results: list) -> Dict[str, float]:
|
||||
"""Compute the metrics from processed results.
|
||||
|
||||
Args:
|
||||
results (list): The processed results of each batch.
|
||||
|
||||
Returns:
|
||||
Dict[str, float]: The computed metrics. The keys are the names of
|
||||
the metrics, and the values are corresponding results. The keys
|
||||
are identical with self.metrics.
|
||||
"""
|
||||
logger: MMLogger = MMLogger.get_current_instance()
|
||||
if self.format_only:
|
||||
logger.info(f'results are saved to {osp.dirname(self.output_dir)}')
|
||||
return OrderedDict()
|
||||
|
||||
metrics = defaultdict(list)
|
||||
for gt_depth, pred_depth in results:
|
||||
for key, value in self._calc_all_metrics(gt_depth,
|
||||
pred_depth).items():
|
||||
metrics[key].append(value)
|
||||
metrics = {k: sum(metrics[k]) / len(metrics[k]) for k in self.metrics}
|
||||
|
||||
table_data = PrettyTable()
|
||||
for key, val in metrics.items():
|
||||
table_data.add_column(key, [round(val, 5)])
|
||||
|
||||
print_log('results:', logger)
|
||||
print_log('\n' + table_data.get_string(), logger=logger)
|
||||
|
||||
return metrics
|
||||
286
finetune/mmseg/evaluation/metrics/iou_metric.py
Normal file
286
finetune/mmseg/evaluation/metrics/iou_metric.py
Normal file
@@ -0,0 +1,286 @@
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, List, Optional, Sequence
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from mmengine.dist import is_main_process
|
||||
from mmengine.evaluator import BaseMetric
|
||||
from mmengine.logging import MMLogger, print_log
|
||||
from mmengine.utils import mkdir_or_exist
|
||||
from PIL import Image
|
||||
from prettytable import PrettyTable
|
||||
|
||||
from mmseg.registry import METRICS
|
||||
|
||||
|
||||
@METRICS.register_module()
|
||||
class IoUMetric(BaseMetric):
|
||||
"""IoU evaluation metric.
|
||||
|
||||
Args:
|
||||
ignore_index (int): Index that will be ignored in evaluation.
|
||||
Default: 255.
|
||||
iou_metrics (list[str] | str): Metrics to be calculated, the options
|
||||
includes 'mIoU', 'mDice' and 'mFscore'.
|
||||
nan_to_num (int, optional): If specified, NaN values will be replaced
|
||||
by the numbers defined by the user. Default: None.
|
||||
beta (int): Determines the weight of recall in the combined score.
|
||||
Default: 1.
|
||||
collect_device (str): Device name used for collecting results from
|
||||
different ranks during distributed training. Must be 'cpu' or
|
||||
'gpu'. Defaults to 'cpu'.
|
||||
output_dir (str): The directory for output prediction. Defaults to
|
||||
None.
|
||||
format_only (bool): Only format result for results commit without
|
||||
perform evaluation. It is useful when you want to save the result
|
||||
to a specific format and submit it to the test server.
|
||||
Defaults to False.
|
||||
prefix (str, optional): The prefix that will be added in the metric
|
||||
names to disambiguate homonymous metrics of different evaluators.
|
||||
If prefix is not provided in the argument, self.default_prefix
|
||||
will be used instead. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
ignore_index: int = 255,
|
||||
iou_metrics: List[str] = ['mIoU'],
|
||||
nan_to_num: Optional[int] = None,
|
||||
beta: int = 1,
|
||||
collect_device: str = 'cpu',
|
||||
output_dir: Optional[str] = None,
|
||||
format_only: bool = False,
|
||||
prefix: Optional[str] = None,
|
||||
**kwargs) -> None:
|
||||
super().__init__(collect_device=collect_device, prefix=prefix)
|
||||
|
||||
self.ignore_index = ignore_index
|
||||
self.metrics = iou_metrics
|
||||
self.nan_to_num = nan_to_num
|
||||
self.beta = beta
|
||||
self.output_dir = output_dir
|
||||
if self.output_dir and is_main_process():
|
||||
mkdir_or_exist(self.output_dir)
|
||||
self.format_only = format_only
|
||||
|
||||
def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
|
||||
"""Process one batch of data and data_samples.
|
||||
|
||||
The processed results should be stored in ``self.results``, which will
|
||||
be used to compute the metrics when all batches have been processed.
|
||||
|
||||
Args:
|
||||
data_batch (dict): A batch of data from the dataloader.
|
||||
data_samples (Sequence[dict]): A batch of outputs from the model.
|
||||
"""
|
||||
num_classes = len(self.dataset_meta['classes'])
|
||||
for data_sample in data_samples:
|
||||
pred_label = data_sample['pred_sem_seg']['data'].squeeze()
|
||||
# format_only always for test dataset without ground truth
|
||||
if not self.format_only:
|
||||
label = data_sample['gt_sem_seg']['data'].squeeze().to(
|
||||
pred_label)
|
||||
self.results.append(
|
||||
self.intersect_and_union(pred_label, label, num_classes,
|
||||
self.ignore_index))
|
||||
# format_result
|
||||
if self.output_dir is not None:
|
||||
basename = osp.splitext(osp.basename(
|
||||
data_sample['img_path']))[0]
|
||||
png_filename = osp.abspath(
|
||||
osp.join(self.output_dir, f'{basename}.png'))
|
||||
output_mask = pred_label.cpu().numpy()
|
||||
# The index range of official ADE20k dataset is from 0 to 150.
|
||||
# But the index range of output is from 0 to 149.
|
||||
# That is because we set reduce_zero_label=True.
|
||||
if data_sample.get('reduce_zero_label', False):
|
||||
output_mask = output_mask + 1
|
||||
output = Image.fromarray(output_mask.astype(np.uint8))
|
||||
output.save(png_filename)
|
||||
|
||||
def compute_metrics(self, results: list) -> Dict[str, float]:
|
||||
"""Compute the metrics from processed results.
|
||||
|
||||
Args:
|
||||
results (list): The processed results of each batch.
|
||||
|
||||
Returns:
|
||||
Dict[str, float]: The computed metrics. The keys are the names of
|
||||
the metrics, and the values are corresponding results. The key
|
||||
mainly includes aAcc, mIoU, mAcc, mDice, mFscore, mPrecision,
|
||||
mRecall.
|
||||
"""
|
||||
logger: MMLogger = MMLogger.get_current_instance()
|
||||
if self.format_only:
|
||||
logger.info(f'results are saved to {osp.dirname(self.output_dir)}')
|
||||
return OrderedDict()
|
||||
# convert list of tuples to tuple of lists, e.g.
|
||||
# [(A_1, B_1, C_1, D_1), ..., (A_n, B_n, C_n, D_n)] to
|
||||
# ([A_1, ..., A_n], ..., [D_1, ..., D_n])
|
||||
results = tuple(zip(*results))
|
||||
assert len(results) == 4
|
||||
|
||||
total_area_intersect = sum(results[0])
|
||||
total_area_union = sum(results[1])
|
||||
total_area_pred_label = sum(results[2])
|
||||
total_area_label = sum(results[3])
|
||||
ret_metrics = self.total_area_to_metrics(
|
||||
total_area_intersect, total_area_union, total_area_pred_label,
|
||||
total_area_label, self.metrics, self.nan_to_num, self.beta)
|
||||
|
||||
class_names = self.dataset_meta['classes']
|
||||
|
||||
# summary table
|
||||
ret_metrics_summary = OrderedDict({
|
||||
ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2)
|
||||
for ret_metric, ret_metric_value in ret_metrics.items()
|
||||
})
|
||||
metrics = dict()
|
||||
for key, val in ret_metrics_summary.items():
|
||||
if key == 'aAcc':
|
||||
metrics[key] = val
|
||||
else:
|
||||
metrics['m' + key] = val
|
||||
|
||||
# each class table
|
||||
ret_metrics.pop('aAcc', None)
|
||||
ret_metrics_class = OrderedDict({
|
||||
ret_metric: np.round(ret_metric_value * 100, 2)
|
||||
for ret_metric, ret_metric_value in ret_metrics.items()
|
||||
})
|
||||
ret_metrics_class.update({'Class': class_names})
|
||||
ret_metrics_class.move_to_end('Class', last=False)
|
||||
class_table_data = PrettyTable()
|
||||
for key, val in ret_metrics_class.items():
|
||||
class_table_data.add_column(key, val)
|
||||
|
||||
print_log('per class results:', logger)
|
||||
print_log('\n' + class_table_data.get_string(), logger=logger)
|
||||
|
||||
return metrics
|
||||
|
||||
@staticmethod
|
||||
def intersect_and_union(pred_label: torch.tensor, label: torch.tensor,
|
||||
num_classes: int, ignore_index: int):
|
||||
"""Calculate Intersection and Union.
|
||||
|
||||
Args:
|
||||
pred_label (torch.tensor): Prediction segmentation map
|
||||
or predict result filename. The shape is (H, W).
|
||||
label (torch.tensor): Ground truth segmentation map
|
||||
or label filename. The shape is (H, W).
|
||||
num_classes (int): Number of categories.
|
||||
ignore_index (int): Index that will be ignored in evaluation.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The intersection of prediction and ground truth
|
||||
histogram on all classes.
|
||||
torch.Tensor: The union of prediction and ground truth histogram on
|
||||
all classes.
|
||||
torch.Tensor: The prediction histogram on all classes.
|
||||
torch.Tensor: The ground truth histogram on all classes.
|
||||
"""
|
||||
|
||||
mask = (label != ignore_index)
|
||||
pred_label = pred_label[mask]
|
||||
label = label[mask]
|
||||
|
||||
intersect = pred_label[pred_label == label]
|
||||
area_intersect = torch.histc(
|
||||
intersect.float(), bins=(num_classes), min=0,
|
||||
max=num_classes - 1).cpu()
|
||||
area_pred_label = torch.histc(
|
||||
pred_label.float(), bins=(num_classes), min=0,
|
||||
max=num_classes - 1).cpu()
|
||||
area_label = torch.histc(
|
||||
label.float(), bins=(num_classes), min=0,
|
||||
max=num_classes - 1).cpu()
|
||||
area_union = area_pred_label + area_label - area_intersect
|
||||
return area_intersect, area_union, area_pred_label, area_label
|
||||
|
||||
@staticmethod
|
||||
def total_area_to_metrics(total_area_intersect: np.ndarray,
|
||||
total_area_union: np.ndarray,
|
||||
total_area_pred_label: np.ndarray,
|
||||
total_area_label: np.ndarray,
|
||||
metrics: List[str] = ['mIoU'],
|
||||
nan_to_num: Optional[int] = None,
|
||||
beta: int = 1):
|
||||
"""Calculate evaluation metrics
|
||||
Args:
|
||||
total_area_intersect (np.ndarray): The intersection of prediction
|
||||
and ground truth histogram on all classes.
|
||||
total_area_union (np.ndarray): The union of prediction and ground
|
||||
truth histogram on all classes.
|
||||
total_area_pred_label (np.ndarray): The prediction histogram on
|
||||
all classes.
|
||||
total_area_label (np.ndarray): The ground truth histogram on
|
||||
all classes.
|
||||
metrics (List[str] | str): Metrics to be evaluated, 'mIoU' and
|
||||
'mDice'.
|
||||
nan_to_num (int, optional): If specified, NaN values will be
|
||||
replaced by the numbers defined by the user. Default: None.
|
||||
beta (int): Determines the weight of recall in the combined score.
|
||||
Default: 1.
|
||||
Returns:
|
||||
Dict[str, np.ndarray]: per category evaluation metrics,
|
||||
shape (num_classes, ).
|
||||
"""
|
||||
|
||||
def f_score(precision, recall, beta=1):
|
||||
"""calculate the f-score value.
|
||||
|
||||
Args:
|
||||
precision (float | torch.Tensor): The precision value.
|
||||
recall (float | torch.Tensor): The recall value.
|
||||
beta (int): Determines the weight of recall in the combined
|
||||
score. Default: 1.
|
||||
|
||||
Returns:
|
||||
[torch.tensor]: The f-score value.
|
||||
"""
|
||||
score = (1 + beta**2) * (precision * recall) / (
|
||||
(beta**2 * precision) + recall)
|
||||
return score
|
||||
|
||||
if isinstance(metrics, str):
|
||||
metrics = [metrics]
|
||||
allowed_metrics = ['mIoU', 'mDice', 'mFscore']
|
||||
if not set(metrics).issubset(set(allowed_metrics)):
|
||||
raise KeyError(f'metrics {metrics} is not supported')
|
||||
|
||||
all_acc = total_area_intersect.sum() / total_area_label.sum()
|
||||
ret_metrics = OrderedDict({'aAcc': all_acc})
|
||||
for metric in metrics:
|
||||
if metric == 'mIoU':
|
||||
iou = total_area_intersect / total_area_union
|
||||
acc = total_area_intersect / total_area_label
|
||||
ret_metrics['IoU'] = iou
|
||||
ret_metrics['Acc'] = acc
|
||||
elif metric == 'mDice':
|
||||
dice = 2 * total_area_intersect / (
|
||||
total_area_pred_label + total_area_label)
|
||||
acc = total_area_intersect / total_area_label
|
||||
ret_metrics['Dice'] = dice
|
||||
ret_metrics['Acc'] = acc
|
||||
elif metric == 'mFscore':
|
||||
precision = total_area_intersect / total_area_pred_label
|
||||
recall = total_area_intersect / total_area_label
|
||||
f_value = torch.tensor([
|
||||
f_score(x[0], x[1], beta) for x in zip(precision, recall)
|
||||
])
|
||||
ret_metrics['Fscore'] = f_value
|
||||
ret_metrics['Precision'] = precision
|
||||
ret_metrics['Recall'] = recall
|
||||
|
||||
ret_metrics = {
|
||||
metric: value.numpy()
|
||||
for metric, value in ret_metrics.items()
|
||||
}
|
||||
if nan_to_num is not None:
|
||||
ret_metrics = OrderedDict({
|
||||
metric: np.nan_to_num(metric_value, nan=nan_to_num)
|
||||
for metric, metric_value in ret_metrics.items()
|
||||
})
|
||||
return ret_metrics
|
||||
Reference in New Issue
Block a user