Skip to content

Configuration token classifier

Generate Nvidia Triton server configuration files for encoder based models (Bert, Roberta, Electra, etc.).

ConfigurationTokenClassifier (Configuration) #

Source code in src/transformer_deploy/triton/configuration_token_classifier.py
class ConfigurationTokenClassifier(Configuration):
    @property
    def python_code(self):
        return inspect.getsource(token_classifier)

    @property
    def python_folder_name(self) -> str:
        return f"{self.model_name}_inference"

    def get_inference_conf(self) -> str:
        """
        Generate inference step configuration.
        :return: inference step configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
    {{
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

output [
    {{
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

{self._instance_group()}


parameters: {{
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {{
    string_value:"no"
  }}
}}
""".strip()

    def create_configs(
        self,
        tokenizer: PreTrainedTokenizer,
        config: PretrainedConfig,
        model_path: str,
        engine_type: EngineType,
    ) -> None:
        super().create_configs(
            tokenizer=tokenizer,
            config=config,
            model_path=model_path,
            engine_type=engine_type,
        )

        wd_path = Path(self.working_dir)
        for path, conf_content in [
            (
                wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"),
                self.get_model_conf(),
            ),
            (
                wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"),
                self.get_inference_conf(),
            ),
        ]:
            path.parent.mkdir(parents=True, exist_ok=True)
            path.parent.joinpath("1").mkdir(exist_ok=True)
            path.write_text(conf_content)

create_configs(self, tokenizer, config, model_path, engine_type) #

Create Triton configuration folder layout, generate configuration files, generate/move artefacts, etc.

Parameters:

Name Type Description Default
tokenizer PreTrainedTokenizer

tokenizer to use

required
config PretrainedConfig

tranformer model config to use

required
model_path str

main folder where to save configurations and artefacts

required
engine_type EngineType

type of inference engine (ONNX or TensorRT)

required
Source code in src/transformer_deploy/triton/configuration_token_classifier.py
def create_configs(
    self,
    tokenizer: PreTrainedTokenizer,
    config: PretrainedConfig,
    model_path: str,
    engine_type: EngineType,
) -> None:
    super().create_configs(
        tokenizer=tokenizer,
        config=config,
        model_path=model_path,
        engine_type=engine_type,
    )

    wd_path = Path(self.working_dir)
    for path, conf_content in [
        (
            wd_path.joinpath(self.model_folder_name).joinpath("config.pbtxt"),
            self.get_model_conf(),
        ),
        (
            wd_path.joinpath(self.python_folder_name).joinpath("config.pbtxt"),
            self.get_inference_conf(),
        ),
    ]:
        path.parent.mkdir(parents=True, exist_ok=True)
        path.parent.joinpath("1").mkdir(exist_ok=True)
        path.write_text(conf_content)

get_inference_conf(self) #

Generate inference step configuration.

Returns:

Type Description
str

inference step configuration

Source code in src/transformer_deploy/triton/configuration_token_classifier.py
    def get_inference_conf(self) -> str:
        """
        Generate inference step configuration.
        :return: inference step configuration
        """
        return f"""
{self._get_header(name=self.python_folder_name, backend="python")}

input [
    {{
        name: "TEXT"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

output [
    {{
        name: "output"
        data_type: TYPE_STRING
        dims: [ -1 ]
    }}
]

{self._instance_group()}


parameters: {{
  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
  value: {{
    string_value:"no"
  }}
}}
""".strip()