Source code for lm_polygraph.estimators.kernel_language_entropy

import numpy as np
import scipy.linalg

from typing import Dict

from .estimator import Estimator


[docs]def laplacian_matrix(weighted_graph: np.ndarray) -> np.ndarray: degrees = np.diag(np.sum(weighted_graph, axis=0)) return degrees - weighted_graph
[docs]def heat_kernel(laplacian: np.ndarray, t: float) -> np.ndarray: return scipy.linalg.expm(-t * laplacian)
[docs]def normalize_kernel(K: np.ndarray) -> np.ndarray: EPS = 1e-12 diagonal_values = np.sqrt(np.diag(K)) + EPS normalized_kernel = K / np.outer(diagonal_values, diagonal_values) return normalized_kernel
[docs]def scale_entropy(entropy: np.ndarray, n_classes: int) -> np.ndarray: max_entropy = -np.log( 1.0 / n_classes ) # For a discrete distribution with num_classes scaled_entropy = entropy / max_entropy return scaled_entropy
[docs]def vn_entropy( K: np.ndarray, normalize: bool, scale: bool, jitter: float ) -> np.float64: if normalize: K = normalize_kernel(K) / K.shape[0] result = 0 try: eigvs = np.linalg.eig(K + jitter * np.eye(K.shape[0])).eigenvalues.astype( np.float64 ) except AttributeError: eigvs = np.linalg.eig(K + jitter * np.eye(K.shape[0]))[0].astype(np.float64) for e in eigvs: if np.abs(e) > 1e-8: result -= e * np.log(e) if scale: result = scale_entropy(result, K.shape[0]) return np.float64(result)
[docs]class KernelLanguageEntropy(Estimator): """ Estimates the sequence-level uncertainty of a language model following the method of "Kernel Language Entropy" as provided in the paper https://arxiv.org/pdf/2405.20003 Works with both whitebox and blackbox models (initialized using lm_polygraph.utils.model.BlackboxModel/WhiteboxModel). This method calculates KLE(Kheat) = VNE(Kheat), where VNE is von Neumann entropy and Kheat is a heat kernel of a semantic graph over language model's outputs. """ def __init__( self, t: float = 0.3, normalize: bool = True, scale: bool = True, jitter: float = 0, ): """ Parameters: t (float): temperature for method; default is taken from the paper normalize (bool): whether VNE should be calculated on normalized kernel or not scale (bool): whether VNE should scale the result by amount of samples jitter (float): calculate VNE not on kernel, but kernel + jitter * I """ super().__init__( ["semantic_matrix_entail", "semantic_matrix_contra"], "sequence" ) self.t = t self.normalize = normalize self.scale = scale self.jitter = jitter def __str__(self): return "KernelLanguageEntropy" def __call__(self, stats: Dict[str, np.ndarray]) -> np.ndarray: """ Calculates KLE(Kheat) uncertainty of a language model. 1. Let S1, ..., Sn be a set of LLM generations. 2. Let NLI'(Si, Sj) = one-hot prediction over (entailment, neutral class, contradiction) Note that NLI'(Si, Sj) is calculated in stats 3. Let W be a matrix, such that Wij = wNLI'(Si, Sj), where w = (1, 0.5, 0) 4. Let L be a laplacian matrix of W, i.e. L = D - W, where Dii = sum(Wij) over j. 5. Let Kheat = heat kernel of W, i.e. Kheat = expm(-t * L), where t is a hyperparameter. 6. Finally, KLE(x) = VNE(Kheat), where VNE(A) = -Tr(A log A). """ semantic_matrix_entail = stats["semantic_matrix_entail"] semantic_matrix_contra = stats["semantic_matrix_contra"] kle = [] for matrix_entail, matrix_contra in zip( semantic_matrix_entail, semantic_matrix_contra ): matrix_entail = matrix_entail + matrix_entail.T matrix_contra = matrix_contra + matrix_contra.T matrix_neutral = ( 2 * np.ones(matrix_entail.shape) - matrix_entail - matrix_contra ) weighted_graph = matrix_entail + 0.5 * matrix_neutral laplacian = laplacian_matrix(weighted_graph) heat_kernel_score = heat_kernel(laplacian, self.t) kle.append( vn_entropy(heat_kernel_score, self.normalize, self.scale, self.jitter) ) return kle