Cohere integration for Haystack
Module haystack_integrations.components.embedders.gradient.gradient_document_embedder
GradientDocumentEmbedder
@component
class GradientDocumentEmbedder()
A component for computing Document embeddings using Gradient AI API.
The embedding of each Document is stored in the embedding
field of the Document.
Usage example:
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack import Document
from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder
documents = [
Document(content="My name is Jean and I live in Paris."),
Document(content="My name is Mark and I live in Berlin."),
Document(content="My name is Giorgio and I live in Rome."),
]
indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
indexing_pipeline.add_component(
instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
)
indexing_pipeline.connect("document_embedder", "document_writer")
indexing_pipeline.run({"document_embedder": {"documents": documents}})
>>> {'document_writer': {'documents_written': 3}}
GradientDocumentEmbedder.__init__
def __init__(
*,
model: str = "bge-large",
batch_size: int = 32_768,
access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID"),
host: Optional[str] = None,
progress_bar: bool = True) -> None
Create a GradientDocumentEmbedder component.
Arguments:
model
: The name of the model to use.batch_size
: Update cycle for tqdm progress bar, default is to update every 32_768 docs.access_token
: The Gradient access token.workspace_id
: The Gradient workspace ID.host
: The Gradient host. By default, it uses Gradient AI.progress_bar
: Whether to show a progress bar while embedding the documents.
GradientDocumentEmbedder.to_dict
def to_dict() -> dict
Serialize this component to a dictionary.
Returns:
The serialized component as a dictionary.
GradientDocumentEmbedder.from_dict
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder"
Deserialize this component from a dictionary.
Arguments:
data
: The dictionary representation of this component.
Returns:
The deserialized component instance.
GradientDocumentEmbedder.warm_up
def warm_up() -> None
Initializes the component.
GradientDocumentEmbedder.run
@component.output_types(documents=List[Document])
def run(documents: List[Document])
Embed a list of Documents.
The embedding of each Document is stored in the embedding
field of the Document.
Arguments:
documents
: A list of Documents to embed.
Returns:
A dictionary with the following keys:
documents
: The embedded Documents.
Module haystack_integrations.components.embedders.gradient.gradient_text_embedder
GradientTextEmbedder
@component
class GradientTextEmbedder()
A component for embedding strings using models hosted on Gradient AI.
Usage example:
from haystack_integrations.components.embedders.gradient import GradientTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack import Pipeline
embedder =
p = Pipeline()
p.add_component("text_embedder", GradientTextEmbedder(model="bge-large"))
p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()))
p.connect("text_embedder", "retriever")
p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}})
>>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder.
>>> {'retriever': {'documents': []}}
GradientTextEmbedder.__init__
def __init__(
*,
model: str = "bge-large",
access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID"),
host: Optional[str] = None) -> None
Create a GradientTextEmbedder component.
Arguments:
model
: The name of the model to use.access_token
: The Gradient access token.workspace_id
: The Gradient workspace ID.host
: The Gradient host. By default, it uses Gradient AI.
GradientTextEmbedder.to_dict
def to_dict() -> dict
Serialize this component to a dictionary.
Returns:
The serialized component as a dictionary.
GradientTextEmbedder.from_dict
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder"
Deserialize this component from a dictionary.
Arguments:
data
: The dictionary representation of this component.
Returns:
The deserialized component instance.
GradientTextEmbedder.warm_up
def warm_up() -> None
Initializes the component.
GradientTextEmbedder.run
@component.output_types(embedding=List[float])
def run(text: str)
Generates an embedding for a single text.
Module haystack_integrations.components.generators.gradient.base
GradientGenerator
@component
class GradientGenerator()
LLM Generator interfacing Gradient AI.
Queries the LLM using Gradient AI's SDK ('gradientai' package). See Gradient AI API for more details.
Usage example:
from haystack_integrations.components.generators.gradient import GradientGenerator
llm = GradientGenerator(base_model_slug="llama2-7b-chat")
llm.warm_up()
print(llm.run(prompt="What is the meaning of life?"))
# Output: {'replies': ['42']}
GradientGenerator.__init__
def __init__(
*,
access_token: Secret = Secret.from_env_var("GRADIENT_ACCESS_TOKEN"),
base_model_slug: Optional[str] = None,
host: Optional[str] = None,
max_generated_token_count: Optional[int] = None,
model_adapter_id: Optional[str] = None,
temperature: Optional[float] = None,
top_k: Optional[int] = None,
top_p: Optional[float] = None,
workspace_id: Secret = Secret.from_env_var("GRADIENT_WORKSPACE_ID")
) -> None
Create a GradientGenerator component.
Arguments:
access_token
: The Gradient access token as aSecret
. If not provided it's read from the environment variableGRADIENT_ACCESS_TOKEN
.base_model_slug
: The base model slug to use.host
: The Gradient host. By default, it uses Gradient AI.max_generated_token_count
: The maximum number of tokens to generate.model_adapter_id
: The model adapter ID to use.temperature
: The temperature to use.top_k
: The top k to use.top_p
: The top p to use.workspace_id
: The Gradient workspace ID as aSecret
. If not provided it's read from the environment variableGRADIENT_WORKSPACE_ID
.
GradientGenerator.to_dict
def to_dict() -> Dict[str, Any]
Serialize this component to a dictionary.
Returns:
The serialized component as a dictionary.
GradientGenerator.from_dict
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator"
Deserialize this component from a dictionary.
Arguments:
data
: The dictionary representation of this component.
Returns:
The deserialized component instance.
GradientGenerator.warm_up
def warm_up()
Initializes the LLM model instance if it doesn't exist.
GradientGenerator.run
@component.output_types(replies=List[str])
def run(prompt: str)
Queries the LLM with the prompt to produce replies.
Arguments:
prompt
: The prompt to be sent to the generative model.