Configuration decoder
Generate Nvidia Triton server configuration files for decoder based model (GPT-2).
ConfigurationDec (Configuration)
#
Source code in src/transformer_deploy/triton/configuration_decoder.py
class ConfigurationDec(Configuration):
@property
def python_code(self):
return inspect.getsource(generative_model)
@property
def python_folder_name(self) -> str:
return f"{self.model_name}_generate"
def get_generation_conf(self) -> str:
"""
Generate sequence configuration.
:return: Generate sequence configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
{self._instance_group()}
parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()
def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)
wd_path = Path(self.working_dir)
for path, conf_content in [
(wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
(wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_generation_conf()),
]: # type: Path, str
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
create_configs(self, tokenizer, config, model_path, engine_type)
#
Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tokenizer |
PreTrainedTokenizer |
tokenizer to use |
required |
config |
PretrainedConfig |
tranformer model config to use |
required |
model_path |
str |
main folder where to save configurations and artefacts |
required |
engine_type |
EngineType |
type of inference engine (ONNX or TensorRT) |
required |
Source code in src/transformer_deploy/triton/configuration_decoder.py
def create_configs(
self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)
wd_path = Path(self.working_dir)
for path, conf_content in [
(wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
(wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_generation_conf()),
]: # type: Path, str
path.parent.mkdir(parents=True, exist_ok=True)
path.parent.joinpath("1").mkdir(exist_ok=True)
path.write_text(conf_content)
get_generation_conf(self)
#
Generate sequence configuration.
Returns:
Type | Description |
---|---|
str |
Generate sequence configuration |
Source code in src/transformer_deploy/triton/configuration_decoder.py
def get_generation_conf(self) -> str:
"""
Generate sequence configuration.
:return: Generate sequence configuration
"""
return f"""
{self._get_header(name=self.python_folder_name, backend="python")}
input [
{{
name: "TEXT"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
output [
{{
name: "output"
data_type: TYPE_STRING
dims: [ -1 ]
}}
]
{self._instance_group()}
parameters: {{
key: "FORCE_CPU_ONLY_INPUT_TENSORS"
value: {{
string_value:"no"
}}
}}
""".strip()