Skip to content

Metrics

npfl138.metrics.BIOEncodingF1Score

Bases: Module

Metric for evaluating F1 score of BIO-encoded spans.

The metric employs a simple heuristic to handle invalid sequences of BIO tags. Notably:

  • If there is an I tag without preceding B/I tag, it is considered a B tag.
  • If the type of an I tag does not match the type of the preceding tag, the type of this I tag is ignored (i.e., considered the same as the preceeding tag type).
Source code in npfl138/metrics.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class BIOEncodingF1Score(torch.nn.Module):
    """Metric for evaluating F1 score of BIO-encoded spans.

    The metric employs a simple heuristic to handle invalid sequences of BIO tags.
    Notably:

    - If there is an `I` tag without preceding `B/I` tag, it is considered a `B` tag.
    - If the type of an `I` tag does not match the type of the preceding tag, the type
      of this `I` tag is ignored (i.e., considered the same as the preceeding tag type).
    """
    def __init__(self, labels: list[str], ignore_index: int) -> None:
        """Construct a new BIOEncodingF1Score metric.

        Parameters:
          labels: The list of BIO-encoded labels.
          ignore_index: The gold index to ignore when computing the F1 score.
        """
        super().__init__()
        self.register_buffer("tp", torch.tensor(0, dtype=torch.int64), persistent=False)
        self.register_buffer("fp", torch.tensor(0, dtype=torch.int64), persistent=False)
        self.register_buffer("fn", torch.tensor(0, dtype=torch.int64), persistent=False)
        self._labels = labels
        self._ignore_index = ignore_index

    def reset(self) -> Self:
        """Reset the metric to its initial state.

        Returns:
          self
        """
        self.tp.zero_()
        self.fp.zero_()
        self.fn.zero_()
        return self

    def update(self, pred: torch.Tensor, true: torch.Tensor) -> Self:
        """Update the metric with new predictions and targets.

        Returns:
          self
        """
        true = torch.nn.functional.pad(true, (0, 1), value=self._ignore_index).view(-1)
        pred = torch.nn.functional.pad(pred, (0, 1), value=self._ignore_index).view(-1)
        spans_pred, spans_true = set(), set()
        for spans, tags in [(spans_true, true), (spans_pred, pred)]:
            span, offset = None, 0
            for tag in tags:
                label = self._labels[tag] if tag != self._ignore_index else "O"
                if span and label.startswith(("O", "B")):
                    spans.add((start, offset, span))
                    span = None
                if not span and label.startswith(("B", "I")):
                    span, start = label[1:], offset
                if tag != self._ignore_index:
                    offset += 1
        self.tp.add_(len(spans_pred & spans_true))
        self.fp.add_(len(spans_pred - spans_true))
        self.fn.add_(len(spans_true - spans_pred))
        return self

    def compute(self) -> torch.Tensor:
        """Compute the F1 score."""
        return 2 * self.tp / torch.max(2 * self.tp + self.fp + self.fn, torch.ones_like(self.tp))

__init__

__init__(labels: list[str], ignore_index: int) -> None

Construct a new BIOEncodingF1Score metric.

Parameters:

  • labels (list[str]) –

    The list of BIO-encoded labels.

  • ignore_index (int) –

    The gold index to ignore when computing the F1 score.

Source code in npfl138/metrics.py
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, labels: list[str], ignore_index: int) -> None:
    """Construct a new BIOEncodingF1Score metric.

    Parameters:
      labels: The list of BIO-encoded labels.
      ignore_index: The gold index to ignore when computing the F1 score.
    """
    super().__init__()
    self.register_buffer("tp", torch.tensor(0, dtype=torch.int64), persistent=False)
    self.register_buffer("fp", torch.tensor(0, dtype=torch.int64), persistent=False)
    self.register_buffer("fn", torch.tensor(0, dtype=torch.int64), persistent=False)
    self._labels = labels
    self._ignore_index = ignore_index

reset

reset() -> Self

Reset the metric to its initial state.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
37
38
39
40
41
42
43
44
45
46
def reset(self) -> Self:
    """Reset the metric to its initial state.

    Returns:
      self
    """
    self.tp.zero_()
    self.fp.zero_()
    self.fn.zero_()
    return self

update

update(pred: Tensor, true: Tensor) -> Self

Update the metric with new predictions and targets.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def update(self, pred: torch.Tensor, true: torch.Tensor) -> Self:
    """Update the metric with new predictions and targets.

    Returns:
      self
    """
    true = torch.nn.functional.pad(true, (0, 1), value=self._ignore_index).view(-1)
    pred = torch.nn.functional.pad(pred, (0, 1), value=self._ignore_index).view(-1)
    spans_pred, spans_true = set(), set()
    for spans, tags in [(spans_true, true), (spans_pred, pred)]:
        span, offset = None, 0
        for tag in tags:
            label = self._labels[tag] if tag != self._ignore_index else "O"
            if span and label.startswith(("O", "B")):
                spans.add((start, offset, span))
                span = None
            if not span and label.startswith(("B", "I")):
                span, start = label[1:], offset
            if tag != self._ignore_index:
                offset += 1
    self.tp.add_(len(spans_pred & spans_true))
    self.fp.add_(len(spans_pred - spans_true))
    self.fn.add_(len(spans_true - spans_pred))
    return self

compute

compute() -> Tensor

Compute the F1 score.

Source code in npfl138/metrics.py
73
74
75
def compute(self) -> torch.Tensor:
    """Compute the F1 score."""
    return 2 * self.tp / torch.max(2 * self.tp + self.fp + self.fn, torch.ones_like(self.tp))

npfl138.metrics.EditDistance

Bases: Module

An implementation of mean edit distance metric.

Source code in npfl138/metrics.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
class EditDistance(torch.nn.Module):
    """An implementation of mean edit distance metric."""

    def __init__(self, ignore_index: int | None = None) -> None:
        """Construct a new EditDistance metric.

        Parameters:
          ignore_index: If not None, the gold index to ignore when computing the edit distance.
            The default is None, which means no index is ignored.
        """
        super().__init__()
        self._ignore_index = ignore_index
        self.register_buffer("edit_distances", torch.tensor(0.0, dtype=torch.float32), persistent=False)
        self.register_buffer("count", torch.tensor(0, dtype=torch.int64), persistent=False)

    def reset(self) -> Self:
        """Reset the metric to its initial state.

        Returns:
          self
        """
        self.edit_distances.zero_()
        self.count.zero_()
        return self

    def update(self, y_preds: Sequence[Sequence[Any]], y_trues: Sequence[Sequence[Any]]) -> Self:
        """Update the metric with new predictions and targets.

        Returns:
          self
        """
        import torchaudio

        for y_pred, y_true in zip(y_preds, y_trues):
            if self._ignore_index is not None:
                y_true = [y for y in y_true if y != self._ignore_index]
                y_pred = [y for y in y_pred if y != self._ignore_index]
            self.edit_distances += torchaudio.functional.edit_distance(y_pred, y_true) / (len(y_true) or 1)
            self.count += 1
        return self

    def compute(self) -> torch.Tensor:
        """Compute the mean edit distance."""
        return self.edit_distances / self.count

__init__

__init__(ignore_index: int | None = None) -> None

Construct a new EditDistance metric.

Parameters:

  • ignore_index (int | None, default: None ) –

    If not None, the gold index to ignore when computing the edit distance. The default is None, which means no index is ignored.

Source code in npfl138/metrics.py
81
82
83
84
85
86
87
88
89
90
91
def __init__(self, ignore_index: int | None = None) -> None:
    """Construct a new EditDistance metric.

    Parameters:
      ignore_index: If not None, the gold index to ignore when computing the edit distance.
        The default is None, which means no index is ignored.
    """
    super().__init__()
    self._ignore_index = ignore_index
    self.register_buffer("edit_distances", torch.tensor(0.0, dtype=torch.float32), persistent=False)
    self.register_buffer("count", torch.tensor(0, dtype=torch.int64), persistent=False)

reset

reset() -> Self

Reset the metric to its initial state.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
 93
 94
 95
 96
 97
 98
 99
100
101
def reset(self) -> Self:
    """Reset the metric to its initial state.

    Returns:
      self
    """
    self.edit_distances.zero_()
    self.count.zero_()
    return self

update

update(
    y_preds: Sequence[Sequence[Any]], y_trues: Sequence[Sequence[Any]]
) -> Self

Update the metric with new predictions and targets.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def update(self, y_preds: Sequence[Sequence[Any]], y_trues: Sequence[Sequence[Any]]) -> Self:
    """Update the metric with new predictions and targets.

    Returns:
      self
    """
    import torchaudio

    for y_pred, y_true in zip(y_preds, y_trues):
        if self._ignore_index is not None:
            y_true = [y for y in y_true if y != self._ignore_index]
            y_pred = [y for y in y_pred if y != self._ignore_index]
        self.edit_distances += torchaudio.functional.edit_distance(y_pred, y_true) / (len(y_true) or 1)
        self.count += 1
    return self

compute

compute() -> Tensor

Compute the mean edit distance.

Source code in npfl138/metrics.py
119
120
121
def compute(self) -> torch.Tensor:
    """Compute the mean edit distance."""
    return self.edit_distances / self.count

npfl138.metrics.MaskIoU

Bases: Module

An implementation of mean IoU metric computed on binary masks.

Source code in npfl138/metrics.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class MaskIoU(torch.nn.Module):
    """An implementation of mean IoU metric computed on binary masks."""
    def __init__(self, mask_shape: Sequence[int], from_logits: bool = False) -> None:
        """Construct a new MaskIoU metric.

        Parameters:
          mask_shape: The shape of the input masks as (H, W).
          from_logits: If `True`, the predictions are expected to be logits; otherwise, they
            are probabilities (the default). However, the target masks must always be probabilities.
        """
        super().__init__()
        self.register_buffer("iou", torch.tensor(0.0, dtype=torch.float32), persistent=False)
        self.register_buffer("count", torch.tensor(0, dtype=torch.int64), persistent=False)
        self._mask_size = math.prod(mask_shape)
        self._prediction_threshold = 0.0 if from_logits else 0.5

    def reset(self) -> Self:
        """Reset the metric to its initial state.

        Returns:
          self
        """
        self.iou.zero_()
        self.count.zero_()
        return self

    def update(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> Self:
        """Update the metric with new predictions and targets.

        Returns:
          self
        """
        y_pred_mask = (y_pred.detach() >= self._prediction_threshold).reshape([-1, self._mask_size])
        y_true_mask = (y_true.detach() >= 0.5).reshape([-1, self._mask_size])

        intersection = torch.logical_and(y_pred_mask, y_true_mask).float().sum(dim=1)
        union = torch.logical_or(y_pred_mask, y_true_mask).float().sum(dim=1)
        iou = torch.where(union == 0, 1., intersection / union)

        self.iou += iou.sum()
        self.count += iou.shape[0]
        return self

    def compute(self) -> torch.Tensor:
        """Compute the mean IoU."""
        return self.iou / self.count

__init__

__init__(mask_shape: Sequence[int], from_logits: bool = False) -> None

Construct a new MaskIoU metric.

Parameters:

  • mask_shape (Sequence[int]) –

    The shape of the input masks as (H, W).

  • from_logits (bool, default: False ) –

    If True, the predictions are expected to be logits; otherwise, they are probabilities (the default). However, the target masks must always be probabilities.

Source code in npfl138/metrics.py
126
127
128
129
130
131
132
133
134
135
136
137
138
def __init__(self, mask_shape: Sequence[int], from_logits: bool = False) -> None:
    """Construct a new MaskIoU metric.

    Parameters:
      mask_shape: The shape of the input masks as (H, W).
      from_logits: If `True`, the predictions are expected to be logits; otherwise, they
        are probabilities (the default). However, the target masks must always be probabilities.
    """
    super().__init__()
    self.register_buffer("iou", torch.tensor(0.0, dtype=torch.float32), persistent=False)
    self.register_buffer("count", torch.tensor(0, dtype=torch.int64), persistent=False)
    self._mask_size = math.prod(mask_shape)
    self._prediction_threshold = 0.0 if from_logits else 0.5

reset

reset() -> Self

Reset the metric to its initial state.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
140
141
142
143
144
145
146
147
148
def reset(self) -> Self:
    """Reset the metric to its initial state.

    Returns:
      self
    """
    self.iou.zero_()
    self.count.zero_()
    return self

update

update(y_pred: Tensor, y_true: Tensor) -> Self

Update the metric with new predictions and targets.

Returns:

  • Self

    self

Source code in npfl138/metrics.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def update(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> Self:
    """Update the metric with new predictions and targets.

    Returns:
      self
    """
    y_pred_mask = (y_pred.detach() >= self._prediction_threshold).reshape([-1, self._mask_size])
    y_true_mask = (y_true.detach() >= 0.5).reshape([-1, self._mask_size])

    intersection = torch.logical_and(y_pred_mask, y_true_mask).float().sum(dim=1)
    union = torch.logical_or(y_pred_mask, y_true_mask).float().sum(dim=1)
    iou = torch.where(union == 0, 1., intersection / union)

    self.iou += iou.sum()
    self.count += iou.shape[0]
    return self

compute

compute() -> Tensor

Compute the mean IoU.

Source code in npfl138/metrics.py
167
168
169
def compute(self) -> torch.Tensor:
    """Compute the mean IoU."""
    return self.iou / self.count