Source code for lm_polygraph.stat_calculators.vllm_logprobs_extraction
"""
vLLM logprobs extraction calculator for lm-polygraph.
Extracts greedy_log_likelihoods and greedy_log_probs from vLLM output
or from token_ids/logprobs directly (for truncated scoring).
Unlike greedy_probs.py which both generates and extracts probabilities,
this calculator only extracts probabilities from existing vLLM output.
"""
from typing import Dict
import numpy as np
from lm_polygraph.stat_calculators import StatCalculator
[docs]class VLLMLogprobsExtractionCalculator(StatCalculator):
"""
Extracts greedy_log_likelihoods and greedy_log_probs from vLLM output
or from token_ids/logprobs directly.
Args:
output_matrix: If True, output greedy_log_probs as 2D matrix [T, K]
for PDGap estimator. If False (default), output as
list of 1D arrays for EntropyCalculator.
Usage:
# From vLLM output (original way)
deps = {"vllm_output": output}
result = calculator(deps)
# From token_ids/logprobs directly (for truncated scoring)
deps = {"token_ids": truncated_ids, "logprobs": truncated_logprobs}
result = calculator(deps)
"""
def __init__(self, output_matrix: bool = False):
super().__init__()
self.output_matrix = output_matrix
[docs] @staticmethod
def meta_info():
return (
["greedy_log_likelihoods", "greedy_log_probs", "greedy_tokens"],
["vllm_output"], # Optional dependency - can also use token_ids/logprobs
)
def __call__(self, dependencies: Dict, **kwargs) -> Dict[str, np.ndarray]:
"""
Extract logprobs from vLLM output or from token_ids/logprobs directly.
Args:
dependencies: Dict containing either:
- 'vllm_output': vLLM CompletionOutput object
OR
- 'token_ids': List[int] - token IDs
- 'logprobs': List[Dict] - logprob dicts from vLLM
Returns:
Dict with:
- greedy_log_likelihoods: [[log_likelihood per token]]
- greedy_log_probs: format depends on output_matrix:
- False: [[[log_probs per position]]] for EntropyCalculator
- True: [2D array of shape [T, K]] for PDGap
- greedy_tokens: [[token_ids]] - generated token IDs
"""
# Get token_ids and logprobs from either vllm_output or directly
if "vllm_output" in dependencies:
output = dependencies["vllm_output"]
token_ids = output.token_ids
logprobs = output.logprobs
elif "token_ids" in dependencies and "logprobs" in dependencies:
token_ids = dependencies["token_ids"]
logprobs = dependencies["logprobs"]
else:
raise ValueError(
"VLLMLogprobsExtractionCalculator requires either 'vllm_output' or "
"both 'token_ids' and 'logprobs' in dependencies"
)
if not logprobs or not token_ids:
return {
"greedy_log_likelihoods": [[]],
"greedy_log_probs": [np.array([[]]) if self.output_matrix else []],
"greedy_tokens": [[]],
}
# Extract log-likelihood for each chosen token
log_likelihoods = []
for token_id, logprob_dict in zip(token_ids, logprobs):
if logprob_dict is None:
log_likelihoods.append(-100.0)
elif token_id in logprob_dict:
log_likelihoods.append(logprob_dict[token_id].logprob)
else:
log_likelihoods.append(-100.0)
if self.output_matrix:
# Output as 2D matrix [T, K] for PDGap
# K = max number of logprobs per position (usually top_k, but vLLM
# may return top_k+1 when the greedy token isn't in top_k)
k = max((len(d) for d in logprobs if d is not None), default=0)
matrix = np.full((len(logprobs), k), -np.inf)
for t, logprob_dict in enumerate(logprobs):
if logprob_dict is not None:
for i, info in enumerate(logprob_dict.values()):
matrix[t, i] = info.logprob
greedy_log_probs = [matrix]
else:
# Output as list of 1D arrays for EntropyCalculator
greedy_log_probs = []
for logprob_dict in logprobs:
if logprob_dict is not None:
position_logprobs = np.array(
[info.logprob for info in logprob_dict.values()]
)
else:
position_logprobs = np.array([])
greedy_log_probs.append(position_logprobs)
greedy_log_probs = [greedy_log_probs]
return {
"greedy_log_likelihoods": [log_likelihoods],
"greedy_log_probs": greedy_log_probs,
"greedy_tokens": [list(token_ids)],
}