DocumentationAPI ReferenceπŸ““ TutorialsπŸ§‘β€πŸ³ Cookbook🀝 IntegrationsπŸ’œ Discord

Cohere integration for Haystack

Module haystack_integrations.components.embedders.gradient.gradient_document_embedder

GradientDocumentEmbedder

A component for computing Document embeddings using Gradient AI API.

The embedding of each Document is stored in the embedding field of the Document.

Usage example:

from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack import Document

from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder

documents = [
    Document(content="My name is Jean and I live in Paris."),
    Document(content="My name is Mark and I live in Berlin."),
    Document(content="My name is Giorgio and I live in Rome."),
]

indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
indexing_pipeline.add_component(
    instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
)
indexing_pipeline.connect("document_embedder", "document_writer")
indexing_pipeline.run({"document_embedder": {"documents": documents}})
>>> {'document_writer': {'documents_written': 3}}

GradientDocumentEmbedder.__init__

def __init__(
        *,
        model: str = "bge-large",
        batch_size: int = 32_768,
        access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
        workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID"),
        host: Optional[str] = None,
        progress_bar: bool = True) -> None

Create a GradientDocumentEmbedder component.

Arguments:

  • model: The name of the model to use.
  • batch_size: Update cycle for tqdm progress bar, default is to update every 32_768 docs.
  • access_token: The Gradient access token.
  • workspace_id: The Gradient workspace ID.
  • host: The Gradient host. By default, it uses Gradient AI.
  • progress_bar: Whether to show a progress bar while embedding the documents.

GradientDocumentEmbedder.to_dict

def to_dict() -> dict

Serialize this component to a dictionary.

Returns:

The serialized component as a dictionary.

GradientDocumentEmbedder.from_dict

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder"

Deserialize this component from a dictionary.

Arguments:

  • data: The dictionary representation of this component.

Returns:

The deserialized component instance.

GradientDocumentEmbedder.warm_up

def warm_up() -> None

Initializes the component.

GradientDocumentEmbedder.run

@component.output_types(documents=List[Document])
def run(documents: List[Document])

Embed a list of Documents.

The embedding of each Document is stored in the embedding field of the Document.

Arguments:

  • documents: A list of Documents to embed.

Returns:

A dictionary with the following keys:

  • documents: The embedded Documents.

Module haystack_integrations.components.embedders.gradient.gradient_text_embedder

GradientTextEmbedder

A component for embedding strings using models hosted on Gradient AI.

Usage example:

from haystack_integrations.components.embedders.gradient import GradientTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack import Pipeline

embedder =
p = Pipeline()
p.add_component("text_embedder", GradientTextEmbedder(model="bge-large"))
p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()))
p.connect("text_embedder", "retriever")
p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}})
>>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder.
>>> {'retriever': {'documents': []}}

GradientTextEmbedder.__init__

def __init__(
        *,
        model: str = "bge-large",
        access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
        workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID"),
        host: Optional[str] = None) -> None

Create a GradientTextEmbedder component.

Arguments:

  • model: The name of the model to use.
  • access_token: The Gradient access token.
  • workspace_id: The Gradient workspace ID.
  • host: The Gradient host. By default, it uses Gradient AI.

GradientTextEmbedder.to_dict

def to_dict() -> dict

Serialize this component to a dictionary.

Returns:

The serialized component as a dictionary.

GradientTextEmbedder.from_dict

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder"

Deserialize this component from a dictionary.

Arguments:

  • data: The dictionary representation of this component.

Returns:

The deserialized component instance.

GradientTextEmbedder.warm_up

def warm_up() -> None

Initializes the component.

GradientTextEmbedder.run

@component.output_types(embedding=List[float])
def run(text: str)

Generates an embedding for a single text.

Module haystack_integrations.components.generators.gradient.base

GradientGenerator

LLM Generator interfacing Gradient AI.

Queries the LLM using Gradient AI's SDK ('gradientai' package). See Gradient AI API for more details.

Usage example:

from haystack_integrations.components.generators.gradient import GradientGenerator

llm = GradientGenerator(base_model_slug="llama2-7b-chat")
llm.warm_up()
print(llm.run(prompt="What is the meaning of life?"))
# Output: {'replies': ['42']}

GradientGenerator.__init__

def __init__(
    *,
    access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
    base_model_slug: Optional[str] = None,
    host: Optional[str] = None,
    max_generated_token_count: Optional[int] = None,
    model_adapter_id: Optional[str] = None,
    temperature: Optional[float] = None,
    top_k: Optional[int] = None,
    top_p: Optional[float] = None,
    workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID")
) -> None

Create a GradientGenerator component.

Arguments:

  • access_token: The Gradient access token as a Secret. If not provided it's read from the environment variable GRADIENT_ACCESS_TOKEN.
  • base_model_slug: The base model slug to use.
  • host: The Gradient host. By default, it uses Gradient AI.
  • max_generated_token_count: The maximum number of tokens to generate.
  • model_adapter_id: The model adapter ID to use.
  • temperature: The temperature to use.
  • top_k: The top k to use.
  • top_p: The top p to use.
  • workspace_id: The Gradient workspace ID as a Secret. If not provided it's read from the environment variable GRADIENT_WORKSPACE_ID.

GradientGenerator.to_dict

def to_dict() -> Dict[str, Any]

Serialize this component to a dictionary.

Returns:

The serialized component as a dictionary.

GradientGenerator.from_dict

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator"

Deserialize this component from a dictionary.

Arguments:

  • data: The dictionary representation of this component.

Returns:

The deserialized component instance.

GradientGenerator.warm_up

def warm_up()

Initializes the LLM model instance if it doesn't exist.

GradientGenerator.run

@component.output_types(replies=List[str])
def run(prompt: str)

Queries the LLM with the prompt to produce replies.

Arguments:

  • prompt: The prompt to be sent to the generative model.