Source code for lm_polygraph.estimators.semantic_entropy

import numpy as np

from typing import List, Dict, Optional

from .estimator import Estimator


[docs]class SemanticEntropy(Estimator):
    """
    Estimates the sequence-level uncertainty of a language model following the method of
    "Semantic entropy" as provided in the paper https://arxiv.org/abs/2302.09664.
    Works only with whitebox models (initialized using lm_polygraph.utils.model.WhiteboxModel).

    This method calculates the generation entropy estimations merged by semantic classes using Monte-Carlo.
    The number of samples is controlled by lm_polygraph.stat_calculators.sample.SamplingGenerationCalculator
    'samples_n' parameter.

    The entropy_estimation parameter supports two methods:
    - "mean": Mean entropy estimation from the original paper (https://arxiv.org/abs/2302.09664).
    - "direct": Direct entropy estimation from the proper estimator in the SDLG paper
      (https://arxiv.org/pdf/2406.04306).
    """

    def __init__(
        self,
        verbose: bool = False,
        class_probability_estimation: str = "sum",
        entropy_estimation: str = "mean",
        use_unique_responses: bool = False,
    ):
        self.class_probability_estimation = class_probability_estimation
        self.entropy_estimation = entropy_estimation
        self.use_unique_responses = use_unique_responses
        if self.class_probability_estimation == "sum":
            deps = ["sample_log_probs", "sample_texts", "semantic_classes_entail"]
        elif self.class_probability_estimation == "frequency":
            deps = ["sample_texts", "semantic_classes_entail"]
        else:
            raise ValueError(
                f"Unknown class_probability_estimation: {self.class_probability_estimation}. Use 'sum' or 'frequency'."
            )

        super().__init__(deps, "sequence")
        self.verbose = verbose

    def __str__(self):
        entropy_estimation = " (Direct)" if self.entropy_estimation == "direct" else ""
        if self.class_probability_estimation == "sum":
            class_probability_estimation = "Unique" if self.use_unique_responses else ""
            return f"SemanticEntropy{class_probability_estimation}" + entropy_estimation
        elif self.class_probability_estimation == "frequency":
            return "SemanticEntropyEmpirical" + entropy_estimation

    def __call__(self, stats: Dict[str, np.ndarray]) -> np.ndarray:
        """
        Estimates the semantic entropy for each sample in the input statistics.

        Parameters:
            stats (Dict[str, np.ndarray]): input statistics, which for multiple samples includes:
                * generated samples in 'sample_texts',
                * corresponding log probabilities in 'sample_log_probs',
                * matrix with semantic similarities in 'semantic_matrix_entail'
        Returns:
            np.ndarray: float semantic entropy for each sample in input statistics.
                Higher values indicate more uncertain samples.
        """
        if self.class_probability_estimation == "sum":
            loglikelihoods_list = stats["sample_log_probs"]
            hyps_list = stats["sample_texts"]
        elif self.class_probability_estimation == "frequency":
            loglikelihoods_list = None
            hyps_list = stats["sample_texts"]

        self._class_to_sample = stats["semantic_classes_entail"]["class_to_sample"]
        self._sample_to_class = stats["semantic_classes_entail"]["sample_to_class"]

        return self.batched_call(hyps_list, loglikelihoods_list)

[docs]    def batched_call(
        self,
        hyps_list: List[List[str]],
        loglikelihoods_list: Optional[List[List[float]]],
        log_weights: Optional[List[List[float]]] = None,
    ) -> np.array:
        if log_weights is None:
            log_weights = [None for _ in hyps_list]

        semantic_logits = {}
        # Iteration over batch
        for i in range(len(hyps_list)):
            if self.class_probability_estimation == "sum":
                class_likelihoods = [
                    np.array(loglikelihoods_list[i])[np.array(class_idx)]
                    for class_idx in self._class_to_sample[i]
                ]
                if self.use_unique_responses:
                    unique_hyps_ids = self.get_unique_hypos_by_class(i, hyps_list)

                    class_likelihoods = [
                        likelihoods[ids]
                        for ids, likelihoods in zip(unique_hyps_ids, class_likelihoods)
                    ]
                class_lp = [
                    np.logaddexp.reduce(likelihoods)
                    for likelihoods in class_likelihoods
                ]
            elif self.class_probability_estimation == "frequency":
                num_samples = len(hyps_list[i])
                class_lp = np.log(
                    [
                        len(class_idx) / num_samples
                        for class_idx in self._class_to_sample[i]
                    ]
                )

            if log_weights[i] is None:
                log_weights[i] = [0 for _ in hyps_list[i]]
            if self.entropy_estimation == "mean":
                semantic_logits[i] = -np.mean(
                    [
                        class_lp[self._sample_to_class[i][j]]
                        * np.exp(log_weights[i][j])
                        for j in range(len(hyps_list[i]))
                    ]
                )
            elif self.entropy_estimation == "direct":
                semantic_logits[i] = -np.sum(
                    [
                        class_lp[self._sample_to_class[i][j]]
                        * np.exp(class_lp[self._sample_to_class[i][j]])
                        for j in range(len(hyps_list[i]))
                    ]
                )
            else:
                raise ValueError(
                    f"Unknown entropy_estimation: {self.entropy_estimation}"
                )
        return np.array([semantic_logits[i] for i in range(len(hyps_list))])

[docs]    def get_unique_hypos_by_class(self, batch_i, hyps_list):
        class_hyps = [
            np.array(hyps_list[batch_i])[np.array(class_idx)]
            for class_idx in self._class_to_sample[batch_i]
        ]
        unique_hyps_ids = [np.unique(hyps, return_index=True)[1] for hyps in class_hyps]

        return unique_hyps_ids