Utils
Shared functions related to benchmarks.
compare_outputs(pytorch_output, engine_output)
#
Compare 2 model outputs by computing the mean of absolute value difference between them.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pytorch_output |
ndarray |
reference output |
required |
engine_output |
ndarray |
other engine output |
required |
Returns:
Type | Description |
---|---|
float |
difference between outputs as a single float |
Source code in src/transformer_deploy/benchmarks/utils.py
def compare_outputs(pytorch_output: np.ndarray, engine_output: np.ndarray) -> float:
"""
Compare 2 model outputs by computing the mean of absolute value difference between them.
:param pytorch_output: reference output
:param engine_output: other engine output
:return: difference between outputs as a single float
"""
return np.mean(np.abs(pytorch_output - engine_output))
generate_input(seq_len, batch_size, input_names, device='cuda')
#
Generate dummy inputs.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
seq_len |
int |
number of token per input. |
required |
batch_size |
int |
first dimension of the tensor |
required |
input_names |
List[str] |
tensor input names to generate |
required |
device |
str |
where to store tensors (Pytorch only). One of [cpu, cuda] |
'cuda' |
Returns:
Type | Description |
---|---|
Tuple[Dict[str, torch.Tensor], Dict[str, numpy.ndarray]] |
a tuple of tensors, Pytorch and numpy |
Source code in src/transformer_deploy/benchmarks/utils.py
def generate_input(
seq_len: int, batch_size: int, input_names: List[str], device: str = "cuda"
) -> Tuple[Dict[str, torch.Tensor], Dict[str, np.ndarray]]:
"""
Generate dummy inputs.
:param seq_len: number of token per input.
:param batch_size: first dimension of the tensor
:param input_names: tensor input names to generate
:param device: where to store tensors (Pytorch only). One of [cpu, cuda]
:return: a tuple of tensors, Pytorch and numpy
"""
assert device in ["cpu", "cuda"]
shape = (batch_size, seq_len)
inputs_pytorch: Dict[str, torch.Tensor] = dict()
for name in input_names:
inputs_pytorch[name] = torch.ones(size=shape, dtype=torch.int32, device=device)
inputs_onnx: Dict[str, np.ndarray] = {
k: np.ascontiguousarray(v.detach().cpu().numpy()) for k, v in inputs_pytorch.items()
}
return inputs_pytorch, inputs_onnx
generate_multiple_inputs(seq_len, batch_size, input_names, nb_inputs_to_gen, device)
#
Generate multiple random inputs.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
seq_len |
int |
sequence length to generate |
required |
batch_size |
int |
number of sequences per batch to generate |
required |
input_names |
List[str] |
tensor input names to generate |
required |
nb_inputs_to_gen |
int |
number of batches of sequences to generate |
required |
device |
str |
one of [cpu, cuda] |
required |
Returns:
Type | Description |
---|---|
Tuple[List[Dict[str, torch.Tensor]], List[Dict[str, numpy.ndarray]]] |
generated sequences |
Source code in src/transformer_deploy/benchmarks/utils.py
def generate_multiple_inputs(
seq_len: int, batch_size: int, input_names: List[str], nb_inputs_to_gen: int, device: str
) -> Tuple[List[Dict[str, torch.Tensor]], List[Dict[str, np.ndarray]]]:
"""
Generate multiple random inputs.
:param seq_len: sequence length to generate
:param batch_size: number of sequences per batch to generate
:param input_names: tensor input names to generate
:param nb_inputs_to_gen: number of batches of sequences to generate
:param device: one of [cpu, cuda]
:return: generated sequences
"""
all_inputs_pytorch: List[Dict[str, torch.Tensor]] = list()
all_inputs_onnx: List[Dict[str, np.ndarray]] = list()
for _ in range(nb_inputs_to_gen):
inputs_pytorch, inputs_onnx = generate_input(
seq_len=seq_len, batch_size=batch_size, input_names=input_names, device=device
)
all_inputs_pytorch.append(inputs_pytorch)
all_inputs_onnx.append(inputs_onnx)
return all_inputs_pytorch, all_inputs_onnx
print_timings(name, timings)
#
Format and print inference latencies.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
inference engine name |
required |
timings |
List[float] |
latencies measured during the inference |
required |
Source code in src/transformer_deploy/benchmarks/utils.py
def print_timings(name: str, timings: List[float]) -> None:
"""
Format and print inference latencies.
:param name: inference engine name
:param timings: latencies measured during the inference
"""
mean_time = 1e3 * np.mean(timings)
std_time = 1e3 * np.std(timings)
min_time = 1e3 * np.min(timings)
max_time = 1e3 * np.max(timings)
median, percent_95_time, percent_99_time = 1e3 * np.percentile(timings, [50, 95, 99])
print(
f"[{name}] "
f"mean={mean_time:.2f}ms, "
f"sd={std_time:.2f}ms, "
f"min={min_time:.2f}ms, "
f"max={max_time:.2f}ms, "
f"median={median:.2f}ms, "
f"95p={percent_95_time:.2f}ms, "
f"99p={percent_99_time:.2f}ms"
)
setup_logging(level=20)
#
Set the generic Python logger
Parameters:
Name | Type | Description | Default |
---|---|---|---|
level |
int |
logger level |
20 |
to_numpy(tensors)
#
Convert list of torch / numpy tensors to a numpy tensor
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tensors |
List[Union[numpy.ndarray, torch.Tensor]] |
list of torch / numpy tensors |
required |
Returns:
Type | Description |
---|---|
ndarray |
numpy tensor |
Source code in src/transformer_deploy/benchmarks/utils.py
def to_numpy(tensors: List[Union[np.ndarray, torch.Tensor]]) -> np.ndarray:
"""
Convert list of torch / numpy tensors to a numpy tensor
:param tensors: list of torch / numpy tensors
:return: numpy tensor
"""
if isinstance(tensors[0], torch.Tensor):
pytorch_output = [t.detach().cpu().numpy() for t in tensors]
elif isinstance(tensors[0], np.ndarray):
pytorch_output = tensors
else:
raise Exception(f"unknown tensor type: {type(tensors[0])}")
return np.asarray(pytorch_output)
track_infer_time(buffer)
#
A context manager to perform latency measures
Parameters:
Name | Type | Description | Default |
---|---|---|---|
buffer |
List[int] |
a List where to save latencies for each input |
required |