vllm.config.renderer ¶

TokenizerMode `module-attribute` ¶

TokenizerMode = Literal[
    "auto", "hf", "slow", "mistral", "deepseek_v32"
]

RendererConfig ¶

Configuration for the renderer.

Source code in vllm/config/renderer.py

@config
@dataclass
class RendererConfig:
    """Configuration for the renderer."""

    # NOTE: In reality, this is a required argument.
    # We provide a dummy default value here to generate the CLI args.
    model_config: SkipValidation[ModelConfig] = None  # type: ignore
    """Provides model context to the renderer."""

    tokenizer: str = ""
    """Name or path of the Hugging Face tokenizer to use. If unspecified, model
    name or path will be used."""
    tokenizer_mode: TokenizerMode | str = "auto"
    """Tokenizer mode:\n
    - "auto" will use the tokenizer from `mistral_common` for Mistral models
    if available, otherwise it will use the "hf" tokenizer.\n
    - "hf" will use the fast tokenizer if available.\n
    - "slow" will always use the slow tokenizer.\n
    - "mistral" will always use the tokenizer from `mistral_common`.\n
    - "deepseek_v32" will always use the tokenizer from `deepseek_v32`.\n
    - Other custom values can be supported via plugins."""
    tokenizer_revision: str | None = None
    """The specific revision to use for the tokenizer on the Hugging Face Hub.
    It can be a branch name, a tag name, or a commit id. If unspecified, will
    use the default version."""
    skip_tokenizer_init: bool = False
    """Skip initialization of tokenizer and detokenizer. Expects valid
    `prompt_token_ids` and `None` for prompt from the input. The generated
    output will contain token ids."""

    io_processor_plugin: str | None = None
    """IOProcessor plugin name to load at model startup."""

    media_io_kwargs: dict[str, dict[str, Any]] = Field(default_factory=dict)
    """Additional args passed to process media inputs, keyed by modalities.
    For example, to set num_frames for video, set
    `--media-io-kwargs '{"video": {"num_frames": 40} }'`"""
    allowed_local_media_path: str = ""
    """Allowing API requests to read local images or videos from directories
    specified by the server file system. This is a security risk. Should only
    be enabled in trusted environments."""
    allowed_media_domains: list[str] | None = None
    """If set, only media URLs that belong to this domain can be used for
    multi-modal inputs. """

    @property
    def trust_remote_code(self) -> bool:
        return self.model_config.trust_remote_code

    def __post_init__(self) -> None:
        model_config = self.model_config

        # The tokenizer is consistent with the model by default.
        if not self.tokenizer:
            self.tokenizer = (
                ModelConfig.model
                if model_config is None
                else model_config.original_model
            )
        if not self.tokenizer_revision:
            self.tokenizer_revision = (
                ModelConfig.revision if model_config is None else model_config.revision
            )

        self.original_tokenizer = self.tokenizer
        self.tokenizer = maybe_model_redirect(self.original_tokenizer)
        self.maybe_pull_tokenizer_for_runai(self.tokenizer)

        # Multimodal GGUF models must use original repo for mm processing
        is_multimodal_model = (
            ModelConfig.is_multimodal_model
            if model_config is None
            else model_config.is_multimodal_model
        )
        if is_gguf(self.tokenizer) and is_multimodal_model:
            raise ValueError(
                "Loading a multimodal GGUF model needs to use original "
                "tokenizer. Please specify the unquantized hf model's "
                "repo name or path using the --tokenizer argument."
            )

    def maybe_pull_tokenizer_for_runai(self, tokenizer: str) -> None:
        """Pull tokenizer from Object Storage to temporary directory when needed."""
        if not is_runai_obj_uri(tokenizer):
            return

        object_storage_tokenizer = ObjectStorageModel(url=tokenizer)
        object_storage_tokenizer.pull_files(
            tokenizer,
            ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors", "*.pth"],
        )
        self.tokenizer = object_storage_tokenizer.dir

allowed_local_media_path `class-attribute` `instance-attribute` ¶

allowed_local_media_path: str = ''

Allowing API requests to read local images or videos from directories specified by the server file system. This is a security risk. Should only be enabled in trusted environments.

allowed_media_domains `class-attribute` `instance-attribute` ¶

allowed_media_domains: list[str] | None = None

If set, only media URLs that belong to this domain can be used for multi-modal inputs.

io_processor_plugin `class-attribute` `instance-attribute` ¶

io_processor_plugin: str | None = None

IOProcessor plugin name to load at model startup.

media_io_kwargs `class-attribute` `instance-attribute` ¶

media_io_kwargs: dict[str, dict[str, Any]] = Field(
    default_factory=dict
)

Additional args passed to process media inputs, keyed by modalities. For example, to set num_frames for video, set --media-io-kwargs '{"video": {"num_frames": 40} }'

model_config `class-attribute` `instance-attribute` ¶

model_config: SkipValidation[ModelConfig] = None

Provides model context to the renderer.

skip_tokenizer_init `class-attribute` `instance-attribute` ¶

skip_tokenizer_init: bool = False

Skip initialization of tokenizer and detokenizer. Expects valid prompt_token_ids and None for prompt from the input. The generated output will contain token ids.

tokenizer `class-attribute` `instance-attribute` ¶

tokenizer: str = ''

Name or path of the Hugging Face tokenizer to use. If unspecified, model name or path will be used.

tokenizer_mode `class-attribute` `instance-attribute` ¶

tokenizer_mode: TokenizerMode | str = 'auto'

Tokenizer mode:

"auto" will use the tokenizer from mistral_common for Mistral models if available, otherwise it will use the "hf" tokenizer.
"hf" will use the fast tokenizer if available.
"slow" will always use the slow tokenizer.
"mistral" will always use the tokenizer from mistral_common.
"deepseek_v32" will always use the tokenizer from deepseek_v32.
Other custom values can be supported via plugins.

tokenizer_revision `class-attribute` `instance-attribute` ¶

tokenizer_revision: str | None = None

The specific revision to use for the tokenizer on the Hugging Face Hub. It can be a branch name, a tag name, or a commit id. If unspecified, will use the default version.

trust_remote_code `property` ¶

trust_remote_code: bool

__post_init__ ¶

__post_init__() -> None

Source code in vllm/config/renderer.py

def __post_init__(self) -> None:
    model_config = self.model_config

    # The tokenizer is consistent with the model by default.
    if not self.tokenizer:
        self.tokenizer = (
            ModelConfig.model
            if model_config is None
            else model_config.original_model
        )
    if not self.tokenizer_revision:
        self.tokenizer_revision = (
            ModelConfig.revision if model_config is None else model_config.revision
        )

    self.original_tokenizer = self.tokenizer
    self.tokenizer = maybe_model_redirect(self.original_tokenizer)
    self.maybe_pull_tokenizer_for_runai(self.tokenizer)

    # Multimodal GGUF models must use original repo for mm processing
    is_multimodal_model = (
        ModelConfig.is_multimodal_model
        if model_config is None
        else model_config.is_multimodal_model
    )
    if is_gguf(self.tokenizer) and is_multimodal_model:
        raise ValueError(
            "Loading a multimodal GGUF model needs to use original "
            "tokenizer. Please specify the unquantized hf model's "
            "repo name or path using the --tokenizer argument."
        )

maybe_pull_tokenizer_for_runai ¶

maybe_pull_tokenizer_for_runai(tokenizer: str) -> None

Pull tokenizer from Object Storage to temporary directory when needed.

Source code in vllm/config/renderer.py

def maybe_pull_tokenizer_for_runai(self, tokenizer: str) -> None:
    """Pull tokenizer from Object Storage to temporary directory when needed."""
    if not is_runai_obj_uri(tokenizer):
        return

    object_storage_tokenizer = ObjectStorageModel(url=tokenizer)
    object_storage_tokenizer.pull_files(
        tokenizer,
        ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors", "*.pth"],
    )
    self.tokenizer = object_storage_tokenizer.dir

vllm.config.renderer ¶

TokenizerMode module-attribute ¶

RendererConfig ¶

allowed_local_media_path class-attribute instance-attribute ¶

allowed_media_domains class-attribute instance-attribute ¶

io_processor_plugin class-attribute instance-attribute ¶

media_io_kwargs class-attribute instance-attribute ¶

model_config class-attribute instance-attribute ¶

skip_tokenizer_init class-attribute instance-attribute ¶

tokenizer class-attribute instance-attribute ¶

tokenizer_mode class-attribute instance-attribute ¶

tokenizer_revision class-attribute instance-attribute ¶

trust_remote_code property ¶

__post_init__ ¶

maybe_pull_tokenizer_for_runai ¶

TokenizerMode `module-attribute` ¶

allowed_local_media_path `class-attribute` `instance-attribute` ¶

allowed_media_domains `class-attribute` `instance-attribute` ¶

io_processor_plugin `class-attribute` `instance-attribute` ¶

media_io_kwargs `class-attribute` `instance-attribute` ¶

model_config `class-attribute` `instance-attribute` ¶

skip_tokenizer_init `class-attribute` `instance-attribute` ¶

tokenizer `class-attribute` `instance-attribute` ¶

tokenizer_mode `class-attribute` `instance-attribute` ¶

tokenizer_revision `class-attribute` `instance-attribute` ¶

trust_remote_code `property` ¶