Source code for lm_polygraph.estimators.linguistic_1s

import numpy as np

from typing import Dict

from .estimator import Estimator


[docs]class Linguistic1S(Estimator): """ Estimates sequence-level uncertainty of a language model by extracting the confidence estimate from the model's answer using a provided regex. Model is expected to output confidence using one of the provided expressions, i.e. "Highly Likely", "Probable" etc. Mapping between expressions of confidence and their corresponding numerical values has to be provided. To use this estimator, model has to be correctly prompted to output it's confidence in the answer. Adapted from the original implementation in the paper https://arxiv.org/abs/2305.14975 """ def __init__(self, expressions: Dict[str, float], name_postfix=""): self.expressions = expressions self.postfix = name_postfix super().__init__(["greedy_texts"], "sequence") def __str__(self): return f"Linguistic1S{self.postfix}" def __call__(self, stats: Dict[str, np.ndarray]) -> np.ndarray: ues = [] for answer in stats["greedy_texts"]: ue = np.nan for expression, confidence in self.expressions.items(): if expression in answer: ue = 1 - confidence break ues.append(ue) return np.array(ues)