Version: 2.21

Utils

Module azure

default_azure_ad_token_provider

python

def default_azure_ad_token_provider() -> str

Get a Azure AD token using the DefaultAzureCredential and the "https://cognitiveservices.azure.com/.default" scope.

Module jupyter

is_in_jupyter

python

def is_in_jupyter() -> bool

Returns True if in Jupyter or Google Colab, False otherwise.

Module url_validation

is_valid_http_url

python

def is_valid_http_url(url: str) -> bool

Check if a URL is a valid HTTP/HTTPS URL.

Module auth

SecretType

SecretType.from_str

python

@staticmethod
def from_str(string: str) -> "SecretType"

Convert a string to a SecretType.

Arguments:

string: The string to convert.

Secret

Encapsulates a secret used for authentication.

Usage example:

python

from haystack.components.generators import OpenAIGenerator
from haystack.utils import Secret

generator = OpenAIGenerator(api_key=Secret.from_token("<here_goes_your_token>"))

Secret.from_token

python

@staticmethod
def from_token(token: str) -> "Secret"

Create a token-based secret. Cannot be serialized.

Arguments:

token: The token to use for authentication.

Secret.from_env_var

python

@staticmethod
def from_env_var(env_vars: Union[str, list[str]],
                 *,
                 strict: bool = True) -> "Secret"

Create an environment variable-based secret. Accepts one or more environment variables.

Upon resolution, it returns a string token from the first environment variable that is set.

Arguments:

env_vars: A single environment variable or an ordered list of candidate environment variables.
strict: Whether to raise an exception if none of the environment variables are set.

Secret.to_dict

python

def to_dict() -> dict[str, Any]

Convert the secret to a JSON-serializable dictionary.

Some secrets may not be serializable.

Returns:

The serialized policy.

Secret.from_dict

python

@staticmethod
def from_dict(dict: dict[str, Any]) -> "Secret"

Create a secret from a JSON-serializable dictionary.

Arguments:

dict: The dictionary with the serialized data.

Returns:

The deserialized secret.

Secret.resolve_value

python

@abstractmethod
def resolve_value() -> Optional[Any]

Resolve the secret to an atomic value. The semantics of the value is secret-dependent.

Returns:

The value of the secret, if any.

Secret.type

python

@property
@abstractmethod
def type() -> SecretType

The type of the secret.

deserialize_secrets_inplace

python

def deserialize_secrets_inplace(data: dict[str, Any],
                                keys: Iterable[str],
                                *,
                                recursive: bool = False) -> None

Deserialize secrets in a dictionary inplace.

Arguments:

data: The dictionary with the serialized data.
keys: The keys of the secrets to deserialize.
recursive: Whether to recursively deserialize nested dictionaries.

Module callable_serialization

serialize_callable

python

def serialize_callable(callable_handle: Callable) -> str

Serializes a callable to its full path.

Arguments:

callable_handle: The callable to serialize

Returns:

The full path of the callable

deserialize_callable

python

def deserialize_callable(callable_handle: str) -> Callable

Deserializes a callable given its full import path as a string.

Arguments:

callable_handle: The full path of the callable_handle

Raises:

DeserializationError: If the callable cannot be found

Returns:

The callable

Module asynchronous

is_callable_async_compatible

python

def is_callable_async_compatible(func: Callable) -> bool

Returns if the given callable is usable inside a component's run_async method.

Arguments:

callable: The callable to check.

Returns:

True if the callable is compatible, False otherwise.

Module requests_utils

request_with_retry

python

def request_with_retry(attempts: int = 3,
                       status_codes_to_retry: Optional[list[int]] = None,
                       **kwargs: Any) -> requests.Response

Executes an HTTP request with a configurable exponential backoff retry on failures.

Usage example:

python

from haystack.utils import request_with_retry

# Sending an HTTP request with default retry configs
res = request_with_retry(method="GET", url="https://example.com")

# Sending an HTTP request with custom number of attempts
res = request_with_retry(method="GET", url="https://example.com", attempts=10)

# Sending an HTTP request with custom HTTP codes to retry
res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=[408, 503])

# Sending an HTTP request with custom timeout in seconds
res = request_with_retry(method="GET", url="https://example.com", timeout=5)

# Sending an HTTP request with custom authorization handling
class CustomAuth(requests.auth.AuthBase):
    def __call__(self, r):
        r.headers["authorization"] = "Basic <my_token_here>"
        return r

res = request_with_retry(method="GET", url="https://example.com", auth=CustomAuth())

# All of the above combined
res = request_with_retry(
    method="GET",
    url="https://example.com",
    auth=CustomAuth(),
    attempts=10,
    status_codes_to_retry=[408, 503],
    timeout=5
)

# Sending a POST request
res = request_with_retry(method="POST", url="https://example.com", data={"key": "value"}, attempts=10)

# Retry all 5xx status codes
res = request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=list(range(500, 600)))

Arguments:

attempts: Maximum number of attempts to retry the request.
status_codes_to_retry: List of HTTP status codes that will trigger a retry. When param is None, HTTP 408, 418, 429 and 503 will be retried.
kwargs: Optional arguments that request accepts.

Returns:

The Response object.

async_request_with_retry

python

async def async_request_with_retry(attempts: int = 3,
                                   status_codes_to_retry: Optional[
                                       list[int]] = None,
                                   **kwargs: Any) -> httpx.Response

Executes an asynchronous HTTP request with a configurable exponential backoff retry on failures.

Usage example:

python

import asyncio
from haystack.utils import async_request_with_retry

# Sending an async HTTP request with default retry configs
async def example():
    res = await async_request_with_retry(method="GET", url="https://example.com")
    return res

# Sending an async HTTP request with custom number of attempts
async def example_with_attempts():
    res = await async_request_with_retry(method="GET", url="https://example.com", attempts=10)
    return res

# Sending an async HTTP request with custom HTTP codes to retry
async def example_with_status_codes():
    res = await async_request_with_retry(method="GET", url="https://example.com", status_codes_to_retry=[408, 503])
    return res

# Sending an async HTTP request with custom timeout in seconds
async def example_with_timeout():
    res = await async_request_with_retry(method="GET", url="https://example.com", timeout=5)
    return res

# Sending an async HTTP request with custom headers
async def example_with_headers():
    headers = {"Authorization": "Bearer <my_token_here>"}
    res = await async_request_with_retry(method="GET", url="https://example.com", headers=headers)
    return res

# All of the above combined
async def example_combined():
    headers = {"Authorization": "Bearer <my_token_here>"}
    res = await async_request_with_retry(
        method="GET",
        url="https://example.com",
        headers=headers,
        attempts=10,
        status_codes_to_retry=[408, 503],
        timeout=5
    )
    return res

# Sending an async POST request
async def example_post():
    res = await async_request_with_retry(
        method="POST",
        url="https://example.com",
        json={"key": "value"},
        attempts=10
    )
    return res

# Retry all 5xx status codes
async def example_5xx():
    res = await async_request_with_retry(
        method="GET",
        url="https://example.com",
        status_codes_to_retry=list(range(500, 600))
    )
    return res

Arguments:

attempts: Maximum number of attempts to retry the request.
status_codes_to_retry: List of HTTP status codes that will trigger a retry. When param is None, HTTP 408, 418, 429 and 503 will be retried.
kwargs: Optional arguments that httpx.AsyncClient.request accepts.

Returns:

The httpx.Response object.

raise_on_invalid_filter_syntax

python

def raise_on_invalid_filter_syntax(
        filters: Optional[dict[str, Any]] = None) -> None

Raise an error if the filter syntax is invalid.

document_matches_filter

python

def document_matches_filter(filters: dict[str, Any],
                            document: Union[Document, ByteStream]) -> bool

Return whether filters match the Document or the ByteStream.

For a detailed specification of the filters, refer to the DocumentStore.filter_documents() protocol documentation.

Module misc

expand_page_range

python

def expand_page_range(page_range: list[Union[str, int]]) -> list[int]

Takes a list of page numbers and ranges and expands them into a list of page numbers.

For example, given a page_range=['1-3', '5', '8', '10-12'] the function will return [1, 2, 3, 5, 8, 10, 11, 12]

Arguments:

page_range: List of page numbers and ranges

Returns:

An expanded list of page integers

expit

python

def expit(
        x: Union[float, ndarray[Any, Any]]) -> Union[float, ndarray[Any, Any]]

Compute logistic sigmoid function. Maps input values to a range between 0 and 1

Arguments:

x: input value. Can be a scalar or a numpy array.

Module device

DeviceType

Represents device types supported by Haystack.

This also includes devices that are not directly used by models - for example, the disk device is exclusively used in device maps for frameworks that support offloading model weights to disk.

DeviceType.from_str

python

@staticmethod
def from_str(string: str) -> "DeviceType"

Create a device type from a string.

Arguments:

string: The string to convert.

Returns:

The device type.

Device

A generic representation of a device.

Arguments:

type: The device type.
id: The optional device id.

Device.init

python

def __init__(type: DeviceType, id: Optional[int] = None)

Create a generic device.

Arguments:

type: The device type.
id: The device id.

Device.cpu

python

@staticmethod
def cpu() -> "Device"

Create a generic CPU device.

Returns:

The CPU device.

Device.gpu

python

@staticmethod
def gpu(id: int = 0) -> "Device"

Create a generic GPU device.

Arguments:

id: The GPU id.

Returns:

The GPU device.

Device.disk

python

@staticmethod
def disk() -> "Device"

Create a generic disk device.

Returns:

The disk device.

Device.mps

python

@staticmethod
def mps() -> "Device"

Create a generic Apple Metal Performance Shader device.

Returns:

The MPS device.

Device.xpu

python

@staticmethod
def xpu() -> "Device"

Create a generic Intel GPU Optimization device.

Returns:

The XPU device.

Device.from_str

python

@staticmethod
def from_str(string: str) -> "Device"

Create a generic device from a string.

Returns:

The device.

DeviceMap

A generic mapping from strings to devices.

The semantics of the strings are dependent on target framework. Primarily used to deploy HuggingFace models to multiple devices.

Arguments:

mapping: Dictionary mapping strings to devices.

DeviceMap.to_dict

python

def to_dict() -> dict[str, str]

Serialize the mapping to a JSON-serializable dictionary.

Returns:

The serialized mapping.

DeviceMap.first_device

python

@property
def first_device() -> Optional[Device]

Return the first device in the mapping, if any.

Returns:

The first device.

DeviceMap.from_dict

python

@staticmethod
def from_dict(dict: dict[str, str]) -> "DeviceMap"

Create a generic device map from a JSON-serialized dictionary.

Arguments:

dict: The serialized mapping.

Returns:

The generic device map.

DeviceMap.from_hf

python

@staticmethod
def from_hf(
        hf_device_map: dict[str, Union[int, str,
                                       "torch.device"]]) -> "DeviceMap"

Create a generic device map from a HuggingFace device map.

Arguments:

hf_device_map: The HuggingFace device map.

Returns:

The deserialized device map.

ComponentDevice

A representation of a device for a component.

This can be either a single device or a device map.

ComponentDevice.from_str

python

@classmethod
def from_str(cls, device_str: str) -> "ComponentDevice"

Create a component device representation from a device string.

The device string can only represent a single device.

Arguments:

device_str: The device string.

Returns:

The component device representation.

ComponentDevice.from_single

python

@classmethod
def from_single(cls, device: Device) -> "ComponentDevice"

Create a component device representation from a single device.

Disks cannot be used as single devices.

Arguments:

device: The device.

Returns:

The component device representation.

ComponentDevice.from_multiple

python

@classmethod
def from_multiple(cls, device_map: DeviceMap) -> "ComponentDevice"

Create a component device representation from a device map.

Arguments:

device_map: The device map.

Returns:

The component device representation.

ComponentDevice.to_torch

python

def to_torch() -> "torch.device"

Convert the component device representation to PyTorch format.

Device maps are not supported.

Returns:

The PyTorch device representation.

ComponentDevice.to_torch_str

python

def to_torch_str() -> str

Convert the component device representation to PyTorch string format.

Device maps are not supported.

Returns:

The PyTorch device string representation.

ComponentDevice.to_spacy

python

def to_spacy() -> int

Convert the component device representation to spaCy format.

Device maps are not supported.

Returns:

The spaCy device representation.

ComponentDevice.to_hf

python

def to_hf() -> Union[Union[int, str], dict[str, Union[int, str]]]

Convert the component device representation to HuggingFace format.

Returns:

The HuggingFace device representation.

ComponentDevice.update_hf_kwargs

python

def update_hf_kwargs(hf_kwargs: dict[str, Any], *,
                     overwrite: bool) -> dict[str, Any]

Convert the component device representation to HuggingFace format.

Add them as canonical keyword arguments to the keyword arguments dictionary.

Arguments:

hf_kwargs: The HuggingFace keyword arguments dictionary.
overwrite: Whether to overwrite existing device arguments.

Returns:

The HuggingFace keyword arguments dictionary.

ComponentDevice.has_multiple_devices

python

@property
def has_multiple_devices() -> bool

Whether this component device representation contains multiple devices.

ComponentDevice.first_device

python

@property
def first_device() -> Optional["ComponentDevice"]

Return either the single device or the first device in the device map, if any.

Returns:

The first device.

ComponentDevice.resolve_device

python

@staticmethod
def resolve_device(
        device: Optional["ComponentDevice"] = None) -> "ComponentDevice"

Select a device for a component. If a device is specified, it's used. Otherwise, the default device is used.

Arguments:

device: The provided device, if any.

Returns:

The resolved device.

ComponentDevice.to_dict

python

def to_dict() -> dict[str, Any]

Convert the component device representation to a JSON-serializable dictionary.

Returns:

The dictionary representation.

ComponentDevice.from_dict

python

@classmethod
def from_dict(cls, dict: dict[str, Any]) -> "ComponentDevice"

Create a component device representation from a JSON-serialized dictionary.

Arguments:

dict: The serialized representation.

Returns:

The deserialized component device.

Module http_client

init_http_client

python

def init_http_client(
    http_client_kwargs: Optional[dict[str, Any]] = None,
    async_client: bool = False
) -> Union[httpx.Client, httpx.AsyncClient, None]

Initialize an httpx client based on the http_client_kwargs.

Arguments:

http_client_kwargs: The kwargs to pass to the httpx client.
async_client: Whether to initialize an async client.

Returns:

A httpx client or an async httpx client.

Module type_serialization

serialize_type

python

def serialize_type(target: Any) -> str

Serializes a type or an instance to its string representation, including the module name.

This function handles types, instances of types, and special typing objects. It assumes that non-typing objects will have a 'name' attribute.

Arguments:

target: The object to serialize, can be an instance or a type.

Returns:

The string representation of the type.

deserialize_type

python

def deserialize_type(type_str: str) -> Any

Deserializes a type given its full import path as a string, including nested generic types.

This function will dynamically import the module if it's not already imported and then retrieve the type object from it. It also handles nested generic types like list[dict[int, str]].

Arguments:

type_str: The string representation of the type's full import path.

Raises:

DeserializationError: If the type cannot be deserialized due to missing module or type.

Returns:

The deserialized type object.

thread_safe_import

python

def thread_safe_import(module_name: str) -> ModuleType

Import a module in a thread-safe manner.

Importing modules in a multi-threaded environment can lead to race conditions. This function ensures that the module is imported in a thread-safe manner without having impact on the performance of the import for single-threaded environments.

Arguments:

module_name: the module to import

Module jinja2_chat_extension

ChatMessageExtension

A Jinja2 extension for creating structured chat messages with mixed content types.

This extension provides a custom {% message %} tag that allows creating chat messages with different attributes (role, name, meta) and mixed content types (text, images, etc.).

Inspired by Banks.

Example:

{% message role="system" %}
You are a helpful assistant. You like to talk with {{user_name}}.
{% endmessage %}

{% message role="user" %}
Hello! I am {{user_name}}. Please describe the images.
{% for image in images %}
{{ image | templatize_part }}
{% endfor %}
{% endmessage %}

How it works

The {% message %} tag is used to define a chat message.
The message can contain text and other structured content parts.
To include a structured content part in the message, the | templatize_part filter is used. The filter serializes the content part into a JSON string and wraps it in a <haystack_content_part> tag.
The _build_chat_message_json method of the extension parses the message content parts, converts them into a ChatMessage object and serializes it to a JSON string.
The obtained JSON string is usable in the ChatPromptBuilder component, where templates are rendered to actual ChatMessage objects.

ChatMessageExtension.parse

python

def parse(parser: Any) -> Union[nodes.Node, list[nodes.Node]]

Parse the message tag and its attributes in the Jinja2 template.

This method handles the parsing of role (mandatory), name (optional), meta (optional) and message body content.

Arguments:

parser: The Jinja2 parser instance

Raises:

TemplateSyntaxError: If an invalid role is provided

Returns:

A CallBlock node containing the parsed message configuration

templatize_part

python

def templatize_part(value: ChatMessageContentT) -> str

Jinja filter to convert an ChatMessageContentT object into JSON string wrapped in special XML content tags.

Arguments:

value: The ChatMessageContentT object to convert

Raises:

ValueError: If the value is not an instance of ChatMessageContentT

Returns:

A JSON string wrapped in special XML content tags

Module jinja2_extensions

Jinja2TimeExtension

Jinja2TimeExtension.init

python

def __init__(environment: Environment)

Initializes the JinjaTimeExtension object.

Arguments:

environment: The Jinja2 environment to initialize the extension with. It provides the context where the extension will operate.

Jinja2TimeExtension.parse

python

def parse(parser: Any) -> Union[nodes.Node, list[nodes.Node]]

Parse the template expression to determine how to handle the datetime formatting.

Arguments:

parser: The parser object that processes the template expressions and manages the syntax tree. It's used to interpret the template's structure.

Module deserialization

deserialize_document_store_in_init_params_inplace

python

def deserialize_document_store_in_init_params_inplace(
        data: dict[str, Any], key: str = "document_store") -> None

Deserializes a generic document store from the init_parameters of a serialized component in place.

Arguments:

data: The dictionary to deserialize from.
key: The key in the data["init_parameters"] dictionary where the document store is specified.

Raises:

DeserializationError: If the document store is not properly specified in the serialization data or its type cannot be imported.

Returns:

The dictionary, with the document store deserialized.

deserialize_chatgenerator_inplace

python

def deserialize_chatgenerator_inplace(data: dict[str, Any],
                                      key: str = "chat_generator") -> None

Deserialize a ChatGenerator in a dictionary inplace.

Arguments:

data: The dictionary with the serialized data.
key: The key in the dictionary where the ChatGenerator is stored.

Raises:

DeserializationError: If the key is missing in the serialized data, the value is not a dictionary, the type key is missing, the class cannot be imported, or the class lacks a 'from_dict' method.

deserialize_component_inplace

python

def deserialize_component_inplace(data: dict[str, Any],
                                  key: str = "chat_generator") -> None

Deserialize a Component in a dictionary inplace.

Arguments:

data: The dictionary with the serialized data.
key: The key in the dictionary where the Component is stored. Default is "chat_generator".

Raises:

DeserializationError: If the key is missing in the serialized data, the value is not a dictionary, the type key is missing, the class cannot be imported, or the class lacks a 'from_dict' method.

Module base_serialization

serialize_class_instance

python

def serialize_class_instance(obj: Any) -> dict[str, Any]

Serializes an object that has a to_dict method into a dictionary.

Arguments:

obj: The object to be serialized.

Raises:

SerializationError: If the object does not have a to_dict method.

Returns:

A dictionary representation of the object.

deserialize_class_instance

python

def deserialize_class_instance(data: dict[str, Any]) -> Any

Deserializes an object from a dictionary representation generated by auto_serialize_class_instance.

Arguments:

data: The dictionary to deserialize from.

Raises:

DeserializationError: If the serialization data is malformed, the class type cannot be imported, or the class does not have a from_dict method.

Returns:

The deserialized object.

Module azure​

default_azure_ad_token_provider​

Module jupyter​

is_in_jupyter​

Module url_validation​

is_valid_http_url​

Module auth​

SecretType​

SecretType.from_str​

Secret​

Secret.from_token​

Secret.from_env_var​

Secret.to_dict​

Secret.from_dict​

Secret.resolve_value​

Secret.type​

deserialize_secrets_inplace​

Module callable_serialization​

serialize_callable​

deserialize_callable​

Module asynchronous​

is_callable_async_compatible​

Module requests_utils​

request_with_retry​

async_request_with_retry​

Module filters​

raise_on_invalid_filter_syntax​

document_matches_filter​

Module misc​

expand_page_range​

expit​

Module device​

DeviceType​

DeviceType.from_str​

Device​

Device.__init__​

Device.cpu​

Device.gpu​

Device.disk​

Device.mps​

Device.xpu​

Device.from_str​

DeviceMap​

DeviceMap.to_dict​

DeviceMap.first_device​

DeviceMap.from_dict​

DeviceMap.from_hf​

ComponentDevice​

ComponentDevice.from_str​

ComponentDevice.from_single​

ComponentDevice.from_multiple​

ComponentDevice.to_torch​

ComponentDevice.to_torch_str​

ComponentDevice.to_spacy​

ComponentDevice.to_hf​

ComponentDevice.update_hf_kwargs​

ComponentDevice.has_multiple_devices​

ComponentDevice.first_device​

ComponentDevice.resolve_device​

ComponentDevice.to_dict​

ComponentDevice.from_dict​

Module http_client​

init_http_client​

Module type_serialization​

serialize_type​

deserialize_type​

thread_safe_import​

Module jinja2_chat_extension​

ChatMessageExtension​

How it works​

ChatMessageExtension.parse​

templatize_part​

Module jinja2_extensions​

Jinja2TimeExtension​

Jinja2TimeExtension.__init__​

Jinja2TimeExtension.parse​

Module deserialization​

deserialize_document_store_in_init_params_inplace​

deserialize_chatgenerator_inplace​

deserialize_component_inplace​

Module azure

default_azure_ad_token_provider

Module jupyter

is_in_jupyter

Module url_validation

is_valid_http_url

Module auth

SecretType

SecretType.from_str

Secret

Secret.from_token

Secret.from_env_var

Secret.to_dict

Secret.from_dict

Secret.resolve_value

Secret.type

deserialize_secrets_inplace

Module callable_serialization

serialize_callable

deserialize_callable

Module asynchronous

is_callable_async_compatible

Module requests_utils

request_with_retry

async_request_with_retry

Module filters

raise_on_invalid_filter_syntax

document_matches_filter

Module misc

expand_page_range

expit

Module device

DeviceType

DeviceType.from_str

Device

Device.init

Device.cpu

Device.gpu

Device.disk

Device.mps

Device.xpu

Device.from_str

DeviceMap

DeviceMap.to_dict

DeviceMap.first_device

DeviceMap.from_dict

DeviceMap.from_hf

ComponentDevice

ComponentDevice.from_str

ComponentDevice.from_single

ComponentDevice.from_multiple

ComponentDevice.to_torch

ComponentDevice.to_torch_str

ComponentDevice.to_spacy

ComponentDevice.to_hf

ComponentDevice.update_hf_kwargs

ComponentDevice.has_multiple_devices

ComponentDevice.first_device

ComponentDevice.resolve_device

ComponentDevice.to_dict

ComponentDevice.from_dict

Module http_client

init_http_client

Module type_serialization

serialize_type

deserialize_type

thread_safe_import

Module jinja2_chat_extension

ChatMessageExtension

How it works

ChatMessageExtension.parse

templatize_part

Module jinja2_extensions

Jinja2TimeExtension

Jinja2TimeExtension.init

Jinja2TimeExtension.parse

Module deserialization

deserialize_document_store_in_init_params_inplace

deserialize_chatgenerator_inplace

deserialize_component_inplace