Args
Command line args parser
parse_args(commands=None)
#
Parse command line arguments
Parameters:
Name | Type | Description | Default |
---|---|---|---|
commands |
List[str] |
to provide command line programatically |
None |
Returns:
Type | Description |
---|---|
Namespace |
parsed command line |
Source code in src/transformer_deploy/utils/args.py
def parse_args(commands: List[str] = None) -> argparse.Namespace:
"""
Parse command line arguments
:param commands: to provide command line programatically
:return: parsed command line
"""
parser = argparse.ArgumentParser(
description="optimize and deploy transformers", formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-m", "--model", required=True, help="path to model or URL to Hugging Face hub")
parser.add_argument("-t", "--tokenizer", help="path to tokenizer or URL to Hugging Face hub")
parser.add_argument(
"--task",
default="classification",
choices=["classification", "embedding", "text-generation", "token-classification"],
help="task to manage. embeddings is for sentence-transformers models",
)
parser.add_argument(
"--auth-token",
default=None,
help=(
"Hugging Face Hub auth token. Set to `None` (default) for public models. "
"For private models, use `True` to use local cached token, or a string of your HF API token"
),
)
parser.add_argument(
"-b",
"--batch-size",
default=[1, 1, 1],
help="batch sizes to optimize for (min, optimal, max). Used by TensorRT and benchmarks.",
type=int,
nargs=3,
)
parser.add_argument(
"-s",
"--seq-len",
default=[16, 16, 16],
help="sequence lengths to optimize for (min, optimal, max). Used by TensorRT and benchmarks.",
type=int,
nargs=3,
)
parser.add_argument("-q", "--quantization", action="store_true", help="INT-8 GPU quantization support")
parser.add_argument("-w", "--workspace-size", default=10000, help="workspace size in MiB (TensorRT)", type=int)
parser.add_argument("-o", "--output", default="triton_models", help="name to be used for ")
parser.add_argument("-n", "--name", default="transformer", help="model name to be used in triton server")
parser.add_argument("-v", "--verbose", action="store_true", help="display detailed information")
parser.add_argument(
"--backend",
default=["onnx"],
help="backend to use. multiple args accepted.",
nargs="*",
choices=["onnx", "tensorrt"],
)
parser.add_argument(
"-d",
"--device",
default=None,
help="device to use. If not set, will be cuda if available.",
choices=["cpu", "cuda"],
)
parser.add_argument("--nb-threads", default=1, help="# of CPU threads to use for inference", type=int)
parser.add_argument(
"--nb-instances", default=1, help="# of model instances, may improve throughput (Triton)", type=int
)
parser.add_argument("--warmup", default=10, help="# of inferences to warm each model", type=int)
parser.add_argument("--nb-measures", default=1000, help="# of inferences for benchmarks", type=int)
parser.add_argument("--seed", default=123, help="seed for random inputs, etc.", type=int)
parser.add_argument("--atol", default=3e-1, help="tolerance when comparing outputs to Pytorch ones", type=float)
args, _ = parser.parse_known_args(args=commands)
return args