Source code for lm_polygraph.estimators.boostedprob_score

import numpy as np
import torch
from typing import Dict
from .estimator import Estimator
from boostedprob import calculate_boostedprob


[docs]class BoostedProbSequence(Estimator): """ Estimates the sequence-level uncertainty of a language model by taking the average of the token-level scores obtained with BoostedProb (https://aclanthology.org/2025.emnlp-main.166.pdf) """ def __init__(self): super().__init__(["greedy_log_probs"], "sequence") def __str__(self): return "BoostedProbSequence" def __call__(self, stats: Dict[str, np.ndarray]) -> np.ndarray: """ Estimates the average of boosted model probabilities over the sequence Parameters: stats (Dict[str, np.ndarray]): input statistics, which for multiple samples includes: * Full distribution of log p(y_i | y_<i, x) in 'greedy_log_probs' Returns: np.ndarray: average boosted model probabilities over each sequence sample. Higher values indicate more uncertain samples. """ lprob_distributions = stats[ "greedy_log_probs" ] # nr_samples (nr_tokens x vocab_size) output_tokens = stats["greedy_tokens"] score = [ calculate_boostedprob(torch.tensor(lprob_distribution), torch.tensor(out)) for lprob_distribution, out in zip(lprob_distributions, output_tokens) ] return np.array([-np.mean(x.numpy()) for x in score])