Configuration encoder
Generate Nvidia Triton server configuration files for encoder based models (Bert, Roberta, Electra, etc.).
ConfigurationEnc (Configuration)
#
Source code in src/transformer_deploy/triton/configuration_encoder.py
class ConfigurationEnc(Configuration):
@property
def python_code(self):
return inspect.getsource(python_tokenizer)
@property
def python_folder_name(self) -> str:
return f"{self.model_name}_tokenize"
def get_tokenize_conf(self) -> str:
"""
Generate tokenization step configuration.
:return: tokenization step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{self._get_tokens()}
]
{self._instance_group()}
""".strip()
def get_inference_conf(self) -> str:
"""
Generate inference step configuration.
:return: inference step configuration
"""
output_map_blocks = list()
for input_name in self.tensor_input_names:
output_map_text = f"""
{{
key: "{input_name}"
value: "{input_name}"
}}
""".strip()
output_map_blocks.append(output_map_text)
mapping_keys = ",\n".join(output_map_blocks)
return f"""
{self._get_header(name=self.inference_folder_name, platform="ensemble")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output {{
name: "output"
data_type: TYPE_FP32
dims: {str(self.dim_output)}
}}
ensemble_scheduling {{
step [
{{
model_name: "{self.python_folder_name}"
model_version: -1
input_map {{
key: "TEXT"
value: "TEXT"
}}
output_map [
{mapping_keys}
]
}},
{{
model_name: "{self.model_folder_name}"
model_version: -1
input_map [
{mapping_keys}
]
output_map {{
key: "output"
value: "output"
}}
}}
]
}}
""".strip()
def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)
for path, conf_content in [
(self.working_dir.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
(self.working_dir.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_tokenize_conf()),
(self.working_dir.joinpath(self.inference_folder_name).joinpath("config.pbtxt"), self.get_inference_conf()),
]: # type: Path, str
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
create_configs(self, tokenizer, config, model_path, engine_type)
#
Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tokenizer |
PreTrainedTokenizer |
tokenizer to use |
required |
config |
PretrainedConfig |
tranformer model config to use |
required |
model_path |
str |
main folder where to save configurations and artefacts |
required |
engine_type |
EngineType |
type of inference engine (ONNX or TensorRT) |
required |
Source code in src/transformer_deploy/triton/configuration_encoder.py
def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)
for path, conf_content in [
(self.working_dir.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
(self.working_dir.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_tokenize_conf()),
(self.working_dir.joinpath(self.inference_folder_name).joinpath("config.pbtxt"), self.get_inference_conf()),
]: # type: Path, str
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
get_inference_conf(self)
#
Generate inference step configuration.
Returns:
Type | Description |
---|---|
str |
inference step configuration |
Source code in src/transformer_deploy/triton/configuration_encoder.py
def get_inference_conf(self) -> str:
"""
Generate inference step configuration.
:return: inference step configuration
"""
output_map_blocks = list()
for input_name in self.tensor_input_names:
output_map_text = f"""
{{
key: "{input_name}"
value: "{input_name}"
}}
""".strip()
output_map_blocks.append(output_map_text)
mapping_keys = ",\n".join(output_map_blocks)
return f"""
{self._get_header(name=self.inference_folder_name, platform="ensemble")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output {{
name: "output"
data_type: TYPE_FP32
dims: {str(self.dim_output)}
}}
ensemble_scheduling {{
step [
{{
model_name: "{self.python_folder_name}"
model_version: -1
input_map {{
key: "TEXT"
value: "TEXT"
}}
output_map [
{mapping_keys}
]
}},
{{
model_name: "{self.model_folder_name}"
model_version: -1
input_map [
{mapping_keys}
]
output_map {{
key: "output"
value: "output"
}}
}}
]
}}
""".strip()
get_tokenize_conf(self)
#
Generate tokenization step configuration.
Returns:
Type | Description |
---|---|
str |
tokenization step configuration |
Source code in src/transformer_deploy/triton/configuration_encoder.py
def get_tokenize_conf(self) -> str:
"""
Generate tokenization step configuration.
:return: tokenization step configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{self._get_tokens()}
]
{self._instance_group()}
""".strip()