Skip to content

Configuration decoder

Generate Nvidia Triton server configuration files for decoder based model (GPT-2).

ConfigurationDec (Configuration) #

Source code in src/transformer_deploy/triton/configuration_decoder.py
class ConfigurationDec(Configuration):
    @property
    def python_code(self):
        return inspect.getsource(generative_model)

    @property
    def python_folder_name(self) -> str:
        return f"{self.model_name}_generate"

    def get_generation_conf(self) -> str:
        """
        Generate sequence configuration.
        :return: Generate sequence configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
    {{
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

output [
    {{
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

{self._instance_group()}

parameters: {{
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {{
    string_value:"no"
  }}
}}
""".strip()

    def create_configs(
        self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
    ) -> None:
        super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)

        wd_path = Path(self.working_dir)
        for path, conf_content in [
            (wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
            (wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_generation_conf()),
        ]:  # type: Path, str
            path.parent.mkdir(parents=True, exist_ok=True)
            path.parent.joinpath("1").mkdir(exist_ok=True)
            path.write_text(conf_content)

create_configs(self, tokenizer, config, model_path, engine_type) #

Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.

Parameters:

Name Type Description Default
tokenizer PreTrainedTokenizer

tokenizer to use

required
config PretrainedConfig

tranformer model config to use

required
model_path str

main folder where to save configurations and artefacts

required
engine_type EngineType

type of inference engine (ONNX or TensorRT)

required
Source code in src/transformer_deploy/triton/configuration_decoder.py
def create_configs(
    self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
    super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)

    wd_path = Path(self.working_dir)
    for path, conf_content in [
        (wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
        (wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_generation_conf()),
    ]:  # type: Path, str
        path.parent.mkdir(parents=True, exist_ok=True)
        path.parent.joinpath("1").mkdir(exist_ok=True)
        path.write_text(conf_content)

get_generation_conf(self) #

Generate sequence configuration.

Returns:

Type Description
str

Generate sequence configuration

Source code in src/transformer_deploy/triton/configuration_decoder.py
    def get_generation_conf(self) -> str:
        """
        Generate sequence configuration.
        :return: Generate sequence configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
    {{
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

output [
    {{
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

{self._instance_group()}

parameters: {{
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {{
    string_value:"no"
  }}
}}
""".strip()