Source code for lm_polygraph.generation_metrics.bleu

import numpy as np
from sacrebleu.metrics import BLEU

from typing import List, Dict
from .generation_metric import GenerationMetric


[docs]class BLEUMetric(GenerationMetric): """ Calculates BLEU metric between model-generated texts and ground truth texts. """ def __init__(self): super().__init__(["greedy_texts"], "sequence") self.scorer = BLEU(effective_order=True, lowercase=True) def __str__(self): return "BLEU" def _score_single(self, t1: str, t2: str): return self.scorer.sentence_score( t1.strip().rstrip("."), [t2.strip().rstrip(".")] ).score def __call__( self, stats: Dict[str, np.ndarray], target_texts: List[str], ) -> np.ndarray: """ Calculates BLEU score between stats['greedy_texts'] and target_texts. Parameters: stats (Dict[str, np.ndarray]): input statistics, which for multiple samples includes: * model-generated texts in 'greedy_texts' target_texts (List[str]): ground-truth texts Returns: np.ndarray: list of BLEU Scores for each sample in input. """ return np.array( [ self._score_single(hyp, ref) for hyp, ref in zip(stats["greedy_texts"], target_texts) ] )