Source code for lm_polygraph.estimators.verbalized_2s

import numpy as np
import re

from typing import Dict

from .estimator import Estimator


[docs]class Verbalized2S(Estimator): """ Asks model to output it's confidence in a provided follow-up prompt and extracts the confidence estimate from the model's answer using a provided regex. Only usabe for instruct-finetuned models with chat template support. Adapted from the original implementation in the paper https://arxiv.org/abs/2305.14975 """ def __init__( self, confidence_prompt: str, confidence_regex: str = "", max_new_tokens: int = 10, name_postfix="", ): self.max_new_tokens = max_new_tokens self.confidence_prompt = confidence_prompt self.confidence_regex = confidence_regex self.postfix = name_postfix super().__init__(["input_texts", "greedy_texts"], "sequence") def __str__(self): return f"Verbalized2S{self.postfix}" def __call__(self, stats: Dict[str, np.ndarray]) -> np.ndarray: model = stats["model"] chats = [] prompts = stats["input_texts"] guesses = stats["greedy_texts"] for prompt, guess in zip(prompts, guesses): chats.append( [ {"role": "user", "content": prompt}, {"role": "assistant", "content": guess}, {"role": "user", "content": self.confidence_prompt}, ] ) out = model.generate_texts( chats, min_new_tokens=2, max_new_tokens=self.max_new_tokens, num_return_sequences=1, ) ues = [] conf_re = re.compile(self.confidence_regex) for answer in out: match = re.search(conf_re, answer) try: ue = 1 - float(match.groups()[0]) except AttributeError: ue = np.nan ues.append(ue) return np.array(ues)