Skip to content

Convert

This module contains code related to client interface.

check_accuracy(engine_name, pytorch_output, engine_output, tolerance) #

Compare engine predictions with a reference. Assert that the difference is under a threshold.

Parameters:

Name Type Description Default
engine_name str

string used in error message, if any

required
pytorch_output List[torch.Tensor]

reference output used for the comparaison

required
engine_output List[Union[numpy.ndarray, torch.Tensor]]

output from the engine

required
tolerance float

if difference in outputs is above threshold, an error will be raised

required
Source code in src/transformer_deploy/convert.py
def check_accuracy(
    engine_name: str,
    pytorch_output: List[torch.Tensor],
    engine_output: List[Union[np.ndarray, torch.Tensor]],
    tolerance: float,
) -> None:
    """
    Compare engine predictions with a reference.
    Assert that the difference is under a threshold.

    :param engine_name: string used in error message, if any
    :param pytorch_output: reference output used for the comparaison
    :param engine_output: output from the engine
    :param tolerance: if difference in outputs is above threshold, an error will be raised
    """
    pytorch_output = to_numpy(pytorch_output)
    engine_output = to_numpy(engine_output)
    discrepency = compare_outputs(pytorch_output=pytorch_output, engine_output=engine_output)
    assert discrepency <= tolerance, (
        f"{engine_name} discrepency is too high ({discrepency:.2f} >= {tolerance}):\n"
        f"Pythorch:\n{pytorch_output}\n"
        f"VS\n"
        f"Engine:\n{engine_output}\n"
        f"Diff:\n"
        f"{torch.asarray(pytorch_output) - torch.asarray(engine_output)}\n"
        "Tolerance can be increased with --atol parameter."
    )

launch_inference(infer, inputs, nb_measures) #

Perform inference and measure latency.

Parameters:

Name Type Description Default
infer Callable

a lambda which will perform the inference

required
inputs List[Dict[str, Union[numpy.ndarray, torch.Tensor]]]

tensor compatible with the lambda (Torch tensor for Pytorch, or numpy otherwise)

required
nb_measures int

number of measures to perform for the latency measure

required

Returns:

Type Description
Tuple[List[Union[numpy.ndarray, torch.Tensor]], List[float]]

a tuple of model output and inference latencies

Source code in src/transformer_deploy/convert.py
def launch_inference(
    infer: Callable, inputs: List[Dict[str, Union[np.ndarray, torch.Tensor]]], nb_measures: int
) -> Tuple[List[Union[np.ndarray, torch.Tensor]], List[float]]:
    """
    Perform inference and measure latency.

    :param infer: a lambda which will perform the inference
    :param inputs: tensor compatible with the lambda (Torch tensor for Pytorch, or numpy otherwise)
    :param nb_measures: number of measures to perform for the latency measure
    :return: a tuple of model output and inference latencies
    """
    assert type(inputs) == list
    assert len(inputs) > 0
    outputs = list()
    for batch_input in inputs:
        output = infer(batch_input)
        outputs.append(output)
    time_buffer: List[int] = list()
    for _ in range(nb_measures):
        with track_infer_time(time_buffer):
            _ = infer(inputs[0])
    return outputs, time_buffer