diff --git a/segeval/agreement/pi.py b/segeval/agreement/pi.py index 791b31b..a72062a 100644 --- a/segeval/agreement/pi.py +++ b/segeval/agreement/pi.py @@ -6,6 +6,7 @@ from __future__ import absolute_import, division from decimal import Decimal from segeval.agreement import __fnc_metric__, __actual_agreement_linear__ +from itertools import chain def __fleiss_pi_linear__(dataset, **kwargs): @@ -29,13 +30,10 @@ def __fleiss_pi_linear__(dataset, **kwargs): A_a = Decimal(sum(all_numerators)) / sum(all_denominators) # Calculate Ae p_e_segs = list() - for boundaries_info in coders_boundaries.values(): - for item in boundaries_info: - boundaries, total_boundaries = item - p_e_seg = Decimal(boundaries) / total_boundaries - p_e_segs.append(p_e_seg) + boundary_ratios = chain.from_iterable(coders_boundaries.values()) + b_placed, b_possible = map(sum, zip(*boundary_ratios)) # Calculate P_e_seg - P_e_seg = Decimal(sum(p_e_segs)) / len(p_e_segs) + P_e_seg = Decimal(b_placed) / b_possible A_e = (P_e_seg ** 2) # Calculate pi pi = (A_a - A_e) / (Decimal('1') - A_e) diff --git a/segeval/similarity/__init__.py b/segeval/similarity/__init__.py index 220811b..185accc 100644 --- a/segeval/similarity/__init__.py +++ b/segeval/similarity/__init__.py @@ -103,7 +103,7 @@ def __boundary_confusion_matrix__(*args, **kwargs): # Add weighted near misses for transposition in statistics['transpositions']: match = transposition[2] - matrix[match][match] += fnc_weight_t([transposition], n_t) + matrix[match][match] += (1 - fnc_weight_t([transposition], n_t)) # Add confusion errors for substitution in statistics['substitutions']: hyp, ref = substitution @@ -113,11 +113,11 @@ def __boundary_confusion_matrix__(*args, **kwargs): hyp, ref = None, None boundary_type, side = addition if side == 'a': - hyp = None - ref = boundary_type - else: # side == 'b' hyp = boundary_type ref = None + else: # side == 'b' + hyp = None + ref = boundary_type assert side == 'a' or side == 'b' matrix[hyp][ref] += 1 return matrix