Source code for lm_polygraph.utils.ensemble_utils.ensemble_greedy

import warnings
from dataclasses import dataclass
from typing import Optional, Union, Dict, List, Tuple
from scipy.stats import entropy

import torch
import torch.distributed as dist
from torch import nn
from transformers import GenerationMixin
from transformers.generation.logits_process import (
    LogitsProcessorList,
)
from transformers.generation.stopping_criteria import (
    StoppingCriteriaList,
    validate_stopping_criteria,
)
from transformers.generation.streamers import BaseStreamer
from transformers.generation.utils import ModelOutput

try:
    from transformers.generation.utils import (
        GreedySearchOutput,
        GreedySearchDecoderOnlyOutput,
    )
except ImportError:
    # transformers >= 5.0 renamed these classes
    from transformers.generation.utils import (
        GenerateNonBeamOutput as GreedySearchOutput,
        GenerateDecoderOnlyOutput as GreedySearchDecoderOnlyOutput,
    )


[docs]class EnsembleGreedyMixin(GenerationMixin):
[docs]@dataclass class GreedySearchEncoderDecoderOutput(ModelOutput): """ Base class for outputs of encoder-decoder generation models using greedy search. Hidden states and attention weights of the decoder (respectively the encoder) can be accessed via the encoder_attentions and the encoder_hidden_states attributes (respectively the decoder_attentions and the decoder_hidden_states attributes) Args: sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`): The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter if all batches finished early due to the `eos_token_id`. scores (`tuple(torch.FloatTensor)` *optional*, returned when `output_scores=True` is passed or when `config.output_scores=True`): Processed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax) at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for each generated token), with each tensor of shape `(batch_size, config.vocab_size)`. encoder_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`): Tuple of `torch.FloatTensor` (one for each layer of the decoder) of shape `(batch_size, num_heads, sequence_length, sequence_length)`. encoder_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. decoder_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`): Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of `torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`. cross_attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or `config.output_attentions=True`): Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of `torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`. decoder_hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of `torch.FloatTensor` of shape `(batch_size, generated_length, hidden_size)`. """ sequences: torch.LongTensor = None sequences_scores: Optional[torch.FloatTensor] = None scores: Optional[Tuple[torch.FloatTensor]] = None models_scores: Optional[Tuple[List[torch.FloatTensor]]] = None models_hypo_next_token_logits: Optional[Tuple[torch.FloatTensor]] = None pe_uncertainties: Optional[Dict[str, List[torch.FloatTensor]]] = None ep_uncertainties: Optional[Dict[str, List[torch.FloatTensor]]] = None encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None decoder_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None cross_attentions: Optional[Tuple[Tuple[torch.FloatTensor]]] = None decoder_hidden_states: Optional[Tuple[Tuple[torch.FloatTensor]]] = None