Convert
This module contains code related to client interface.
check_accuracy(engine_name, pytorch_output, engine_output, tolerance)
#
Compare engine predictions with a reference. Assert that the difference is under a threshold.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
engine_name |
str |
string used in error message, if any |
required |
pytorch_output |
List[torch.Tensor] |
reference output used for the comparaison |
required |
engine_output |
List[Union[numpy.ndarray, torch.Tensor]] |
output from the engine |
required |
tolerance |
float |
if difference in outputs is above threshold, an error will be raised |
required |
Source code in src/transformer_deploy/convert.py
def check_accuracy(
engine_name: str,
pytorch_output: List[torch.Tensor],
engine_output: List[Union[np.ndarray, torch.Tensor]],
tolerance: float,
) -> None:
"""
Compare engine predictions with a reference.
Assert that the difference is under a threshold.
:param engine_name: string used in error message, if any
:param pytorch_output: reference output used for the comparaison
:param engine_output: output from the engine
:param tolerance: if difference in outputs is above threshold, an error will be raised
"""
pytorch_output = to_numpy(pytorch_output)
engine_output = to_numpy(engine_output)
discrepency = compare_outputs(pytorch_output=pytorch_output, engine_output=engine_output)
assert discrepency <= tolerance, (
f"{engine_name} discrepency is too high ({discrepency:.2f} >= {tolerance}):\n"
f"Pythorch:\n{pytorch_output}\n"
f"VS\n"
f"Engine:\n{engine_output}\n"
f"Diff:\n"
f"{torch.asarray(pytorch_output) - torch.asarray(engine_output)}\n"
"Tolerance can be increased with --atol parameter."
)
launch_inference(infer, inputs, nb_measures)
#
Perform inference and measure latency.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
infer |
Callable |
a lambda which will perform the inference |
required |
inputs |
List[Dict[str, Union[numpy.ndarray, torch.Tensor]]] |
tensor compatible with the lambda (Torch tensor for Pytorch, or numpy otherwise) |
required |
nb_measures |
int |
number of measures to perform for the latency measure |
required |
Returns:
Type | Description |
---|---|
Tuple[List[Union[numpy.ndarray, torch.Tensor]], List[float]] |
a tuple of model output and inference latencies |
Source code in src/transformer_deploy/convert.py
def launch_inference(
infer: Callable, inputs: List[Dict[str, Union[np.ndarray, torch.Tensor]]], nb_measures: int
) -> Tuple[List[Union[np.ndarray, torch.Tensor]], List[float]]:
"""
Perform inference and measure latency.
:param infer: a lambda which will perform the inference
:param inputs: tensor compatible with the lambda (Torch tensor for Pytorch, or numpy otherwise)
:param nb_measures: number of measures to perform for the latency measure
:return: a tuple of model output and inference latencies
"""
assert type(inputs) == list
assert len(inputs) > 0
outputs = list()
for batch_input in inputs:
output = infer(batch_input)
outputs.append(output)
time_buffer: List[int] = list()
for _ in range(nb_measures):
with track_infer_time(time_buffer):
_ = infer(inputs[0])
return outputs, time_buffer