Configuration token classifier
Generate Nvidia Triton server configuration files for encoder based models (Bert, Roberta, Electra, etc.).
ConfigurationTokenClassifier (Configuration)
#
Source code in src/transformer_deploy/triton/configuration_token_classifier.py
class ConfigurationTokenClassifier(Configuration):
@property
def python_code(self):
return inspect.getsource(token_classifier)
@property
def python_folder_name(self) -> str:
return f"{self.model_name}_inference"
def get_inference_conf(self) -> str:
"""
Generate inference step configuration.
:return: inference step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
{self._instance_group()}
parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()
def create_configs(
self,
tokenizer: PreTrainedTokenizer,
config: PretrainedConfig,
model_path: str,
engine_type: EngineType,
) -> None:
super().create_configs(
tokenizer=tokenizer,
config=config,
model_path=model_path,
engine_type=engine_type,
)
wd_path = Path(self.working_dir)
for path, conf_content in [
(
wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"),
self.get_model_conf(),
),
(
wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"),
self.get_inference_conf(),
),
]:
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
create_configs(self, tokenizer, config, model_path, engine_type)
#
Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tokenizer |
PreTrainedTokenizer |
tokenizer to use |
required |
config |
PretrainedConfig |
tranformer model config to use |
required |
model_path |
str |
main folder where to save configurations and artefacts |
required |
engine_type |
EngineType |
type of inference engine (ONNX or TensorRT) |
required |
Source code in src/transformer_deploy/triton/configuration_token_classifier.py
def create_configs(
self,
tokenizer: PreTrainedTokenizer,
config: PretrainedConfig,
model_path: str,
engine_type: EngineType,
) -> None:
super().create_configs(
tokenizer=tokenizer,
config=config,
model_path=model_path,
engine_type=engine_type,
)
wd_path = Path(self.working_dir)
for path, conf_content in [
(
wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"),
self.get_model_conf(),
),
(
wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"),
self.get_inference_conf(),
),
]:
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
get_inference_conf(self)
#
Generate inference step configuration.
Returns:
Type | Description |
---|---|
str |
inference step configuration |
Source code in src/transformer_deploy/triton/configuration_token_classifier.py
def get_inference_conf(self) -> str:
"""
Generate inference step configuration.
:return: inference step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
{self._instance_group()}
parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()