Skip to content

Configuration encoder

Generate Nvidia Triton server configuration files for encoder based models (Bert, Roberta, Electra, etc.).

ConfigurationEnc (Configuration) #

Source code in src/transformer_deploy/triton/configuration_encoder.py
class ConfigurationEnc(Configuration):
    @property
    def python_code(self):
        return inspect.getsource(python_tokenizer)

    @property
    def python_folder_name(self) -> str:
        return f"{self.model_name}_tokenize"

    def get_tokenize_conf(self) -> str:
        """
        Generate tokenization step configuration.
        :return: tokenization step configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
{{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}}
]

output [
{self._get_tokens()}
]

{self._instance_group()}
""".strip()

    def get_inference_conf(self) -> str:
        """
        Generate inference step configuration.
        :return: inference step configuration
        """
        output_map_blocks = list()
        for input_name in self.tensor_input_names:
            output_map_text = f"""
{{
    key: "{input_name}"
    value: "{input_name}"
}}
""".strip()
            output_map_blocks.append(output_map_text)

        mapping_keys = ",\n".join(output_map_blocks)

        return f"""
{self._get_header(name=self.inference_folder_name, platform="ensemble")}

input [
{{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}}
]

output {{
    name: "output"
    data_type: TYPE_FP32
    dims: {str(self.dim_output)}
}}

ensemble_scheduling {{
    step [
        {{
            model_name: "{self.python_folder_name}"
            model_version: -1
            input_map {{
            key: "TEXT"
            value: "TEXT"
        }}
        output_map [
{mapping_keys}
        ]
        }},
        {{
            model_name: "{self.model_folder_name}"
            model_version: -1
            input_map [
{mapping_keys}
            ]
        output_map {{
                key: "output"
                value: "output"
            }}
        }}
    ]
}}
""".strip()

    def create_configs(
        self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
    ) -> None:
        super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)

        for path, conf_content in [
            (self.working_dir.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
            (self.working_dir.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_tokenize_conf()),
            (self.working_dir.joinpath(self.inference_folder_name).joinpath("config.pbtxt"), self.get_inference_conf()),
        ]:  # type: Path, str
            path.parent.mkdir(parents=True, exist_ok=True)
            path.parent.joinpath("1").mkdir(exist_ok=True)
            path.write_text(conf_content)

create_configs(self, tokenizer, config, model_path, engine_type) #

Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.

Parameters:

Name Type Description Default
tokenizer PreTrainedTokenizer

tokenizer to use

required
config PretrainedConfig

tranformer model config to use

required
model_path str

main folder where to save configurations and artefacts

required
engine_type EngineType

type of inference engine (ONNX or TensorRT)

required
Source code in src/transformer_deploy/triton/configuration_encoder.py
def create_configs(
    self, tokenizer: PreTrainedTokenizer, config: PretrainedConfig, model_path: str, engine_type: EngineType
) -> None:
    super().create_configs(tokenizer=tokenizer, config=config, model_path=model_path, engine_type=engine_type)

    for path, conf_content in [
        (self.working_dir.joinpath(self.model_folder_name).joinpath("config.pbtxt"), self.get_model_conf()),
        (self.working_dir.joinpath(self.python_folder_name).joinpath("config.pbtxt"), self.get_tokenize_conf()),
        (self.working_dir.joinpath(self.inference_folder_name).joinpath("config.pbtxt"), self.get_inference_conf()),
    ]:  # type: Path, str
        path.parent.mkdir(parents=True, exist_ok=True)
        path.parent.joinpath("1").mkdir(exist_ok=True)
        path.write_text(conf_content)

get_inference_conf(self) #

Generate inference step configuration.

Returns:

Type Description
str

inference step configuration

Source code in src/transformer_deploy/triton/configuration_encoder.py
    def get_inference_conf(self) -> str:
        """
        Generate inference step configuration.
        :return: inference step configuration
        """
        output_map_blocks = list()
        for input_name in self.tensor_input_names:
            output_map_text = f"""
{{
    key: "{input_name}"
    value: "{input_name}"
}}
""".strip()
            output_map_blocks.append(output_map_text)

        mapping_keys = ",\n".join(output_map_blocks)

        return f"""
{self._get_header(name=self.inference_folder_name, platform="ensemble")}

input [
{{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}}
]

output {{
    name: "output"
    data_type: TYPE_FP32
    dims: {str(self.dim_output)}
}}

ensemble_scheduling {{
    step [
        {{
            model_name: "{self.python_folder_name}"
            model_version: -1
            input_map {{
            key: "TEXT"
            value: "TEXT"
        }}
        output_map [
{mapping_keys}
        ]
        }},
        {{
            model_name: "{self.model_folder_name}"
            model_version: -1
            input_map [
{mapping_keys}
            ]
        output_map {{
                key: "output"
                value: "output"
            }}
        }}
    ]
}}
""".strip()

get_tokenize_conf(self) #

Generate tokenization step configuration.

Returns:

Type Description
str

tokenization step configuration

Source code in src/transformer_deploy/triton/configuration_encoder.py
    def get_tokenize_conf(self) -> str:
        """
        Generate tokenization step configuration.
        :return: tokenization step configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
{{
    name: "TEXT"
    data_type: TYPE_STRING
    dims: [ -1 ]
}}
]

output [
{self._get_tokens()}
]

{self._instance_group()}
""".strip()