Skip to content

Utils

Shared functions related to benchmarks.

compare_outputs(pytorch_output, engine_output) #

Compare 2 model outputs by computing the mean of absolute value difference between them.

Parameters:

Name Type Description Default
pytorch_output ndarray

reference output

required
engine_output ndarray

other engine output

required

Returns:

Type Description
float

difference between outputs as a single float

Source code in src/transformer_deploy/benchmarks/utils.py
def compare_outputs(pytorch_output: np.ndarray, engine_output: np.ndarray) -> float:
    """
    Compare 2 model outputs by computing the mean of absolute value difference between them.

    :param pytorch_output: reference output
    :param engine_output: other engine output
    :return: difference between outputs as a single float
    """
    return np.mean(np.abs(pytorch_output - engine_output))

generate_input(seq_len, batch_size, input_names, device='cuda') #

Generate dummy inputs.

Parameters:

Name Type Description Default
seq_len int

number of token per input.

required
batch_size int

first dimension of the tensor

required
input_names List[str]

tensor input names to generate

required
device str

where to store tensors (Pytorch only). One of [cpu, cuda]

'cuda'

Returns:

Type Description
Tuple[Dict[str, torch.Tensor], Dict[str, numpy.ndarray]]

a tuple of tensors, Pytorch and numpy

Source code in src/transformer_deploy/benchmarks/utils.py
def generate_input(
    seq_len: int, batch_size: int, input_names: List[str], device: str = "cuda"
) -> Tuple[Dict[str, torch.Tensor], Dict[str, np.ndarray]]:
    """
    Generate dummy inputs.
    :param seq_len: number of token per input.
    :param batch_size: first dimension of the tensor
    :param input_names: tensor input names to generate
    :param device: where to store tensors (Pytorch only). One of [cpu, cuda]
    :return: a tuple of tensors, Pytorch and numpy
    """
    assert device in ["cpu", "cuda"]
    shape = (batch_size, seq_len)
    inputs_pytorch: Dict[str, torch.Tensor] = dict()
    for name in input_names:
        inputs_pytorch[name] = torch.ones(size=shape, dtype=torch.int32, device=device)
    inputs_onnx: Dict[str, np.ndarray] = {
        k: np.ascontiguousarray(v.detach().cpu().numpy()) for k, v in inputs_pytorch.items()
    }
    return inputs_pytorch, inputs_onnx

generate_multiple_inputs(seq_len, batch_size, input_names, nb_inputs_to_gen, device) #

Generate multiple random inputs.

Parameters:

Name Type Description Default
seq_len int

sequence length to generate

required
batch_size int

number of sequences per batch to generate

required
input_names List[str]

tensor input names to generate

required
nb_inputs_to_gen int

number of batches of sequences to generate

required
device str

one of [cpu, cuda]

required

Returns:

Type Description
Tuple[List[Dict[str, torch.Tensor]], List[Dict[str, numpy.ndarray]]]

generated sequences

Source code in src/transformer_deploy/benchmarks/utils.py
def generate_multiple_inputs(
    seq_len: int, batch_size: int, input_names: List[str], nb_inputs_to_gen: int, device: str
) -> Tuple[List[Dict[str, torch.Tensor]], List[Dict[str, np.ndarray]]]:
    """
    Generate multiple random inputs.

    :param seq_len: sequence length to generate
    :param batch_size: number of sequences per batch to generate
    :param input_names: tensor input names to generate
    :param nb_inputs_to_gen: number of batches of sequences to generate
    :param device: one of [cpu, cuda]
    :return: generated sequences
    """
    all_inputs_pytorch: List[Dict[str, torch.Tensor]] = list()
    all_inputs_onnx: List[Dict[str, np.ndarray]] = list()
    for _ in range(nb_inputs_to_gen):
        inputs_pytorch, inputs_onnx = generate_input(
            seq_len=seq_len, batch_size=batch_size, input_names=input_names, device=device
        )
        all_inputs_pytorch.append(inputs_pytorch)
        all_inputs_onnx.append(inputs_onnx)
    return all_inputs_pytorch, all_inputs_onnx

print_timings(name, timings) #

Format and print inference latencies.

Parameters:

Name Type Description Default
name str

inference engine name

required
timings List[float]

latencies measured during the inference

required
Source code in src/transformer_deploy/benchmarks/utils.py
def print_timings(name: str, timings: List[float]) -> None:
    """
    Format and print inference latencies.

    :param name: inference engine name
    :param timings: latencies measured during the inference
    """
    mean_time = 1e3 * np.mean(timings)
    std_time = 1e3 * np.std(timings)
    min_time = 1e3 * np.min(timings)
    max_time = 1e3 * np.max(timings)
    median, percent_95_time, percent_99_time = 1e3 * np.percentile(timings, [50, 95, 99])
    print(
        f"[{name}] "
        f"mean={mean_time:.2f}ms, "
        f"sd={std_time:.2f}ms, "
        f"min={min_time:.2f}ms, "
        f"max={max_time:.2f}ms, "
        f"median={median:.2f}ms, "
        f"95p={percent_95_time:.2f}ms, "
        f"99p={percent_99_time:.2f}ms"
    )

setup_logging(level=20) #

Set the generic Python logger

Parameters:

Name Type Description Default
level int

logger level

20
Source code in src/transformer_deploy/benchmarks/utils.py
def setup_logging(level: int = logging.INFO) -> None:
    """
    Set the generic Python logger
    :param level: logger level
    """
    logging.basicConfig(format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=level)

to_numpy(tensors) #

Convert list of torch / numpy tensors to a numpy tensor

Parameters:

Name Type Description Default
tensors List[Union[numpy.ndarray, torch.Tensor]]

list of torch / numpy tensors

required

Returns:

Type Description
ndarray

numpy tensor

Source code in src/transformer_deploy/benchmarks/utils.py
def to_numpy(tensors: List[Union[np.ndarray, torch.Tensor]]) -> np.ndarray:
    """
    Convert list of torch / numpy tensors to a numpy tensor
    :param tensors: list of torch / numpy tensors
    :return: numpy tensor
    """
    if isinstance(tensors[0], torch.Tensor):
        pytorch_output = [t.detach().cpu().numpy() for t in tensors]
    elif isinstance(tensors[0], np.ndarray):
        pytorch_output = tensors
    else:
        raise Exception(f"unknown tensor type: {type(tensors[0])}")
    return np.asarray(pytorch_output)

track_infer_time(buffer) #

A context manager to perform latency measures

Parameters:

Name Type Description Default
buffer List[int]

a List where to save latencies for each input

required
Source code in src/transformer_deploy/benchmarks/utils.py
@contextmanager
def track_infer_time(buffer: List[int]) -> None:
    """
    A context manager to perform latency measures
    :param buffer: a List where to save latencies for each input
    """
    start = time.perf_counter()
    yield
    end = time.perf_counter()
    buffer.append(end - start)