Collection

Collection(
    client: Any | None = None,
    collection_name: str = 'ag2-docs',
    embedding_function: Callable[..., Any] | None = None,
    metadata: Any | None = None,
    get_or_create: Any | None = None
)

A Collection object for PGVector.
Attributes:
client: The PGVector client.
collection_name (str): The name of the collection. Default is “documents”.
embedding_function (Callable): The embedding function used to generate the vector representation.
Default is None. SentenceTransformer(“all-MiniLM-L6-v2”).encode will be used when None.
Models can be chosen from:
https://huggingface.co/models?library=sentence-transformers metadata (Optional[dict[str, Any]]): The metadata of the collection.
get_or_create (Optional): The flag indicating whether to get or create the collection.
Initialize the Collection object.

Parameters:
NameDescription
clientType: Any | None

Default: None
collection_nameType: str

Default: ‘ag2-docs’
embedding_functionType: Callable[…, Any] | None

Default: None
metadataType: Any | None

Default: None
get_or_createType: Any | None

Default: None

Instance Methods

add

add(
    self,
    ids: list[str | int],
    documents: list[Document] | None,
    embeddings: list[Any] | None = None,
    metadatas: list[Any] | None = None
) -> None

Add documents to the collection.

Parameters:
NameDescription
idsA list of document IDs.

Type: list[str | int]
documentsA list of documents.

Type: list[Document] | None
embeddingsA list of document embeddings.

Optional

Type: list[typing.Any] | None

Default: None
metadatasA list of document metadatas.

Optional

Type: list[typing.Any] | None

Default: None
Returns:
TypeDescription
NoneNone

convert_string_to_array

convert_string_to_array(array_string: str) -> list[float]

Convert a string representation of an array to a list of floats.
Parameters:
- array_string (str): The string representation of the array.
Returns:
- list: A list of floats parsed from the input string. If the input is not a string, it returns the input itself.

Parameters:
NameDescription
array_stringType: str
Returns:
TypeDescription
list[float]- list: A list of floats parsed from the input string. If the input is not a string, it returns the input itself.

cosine_distance

cosine_distance(arr1: list[float], arr2: list[float]) -> float

Calculate the cosine distance between two vectors.
Parameters:
- arr1 (List[float]): The first vector.
- arr2 (List[float]): The second vector.
Returns:
- float: The cosine distance between arr1 and arr2.

Parameters:
NameDescription
arr1Type: list[float]
arr2Type: list[float]
Returns:
TypeDescription
float- float: The cosine distance between arr1 and arr2.

count

count(self) -> int

Get the total number of documents in the collection.
Returns:
int: The total number of documents.

Returns:
TypeDescription
intint: The total number of documents.

create_collection

create_collection(
    self,
    collection_name: str | None = None,
    dimension: int | str | None = None
) -> None

Create a new collection.

Parameters:
NameDescription
collection_nameThe name of the new collection.

Type: str | None

Default: None
dimensionThe dimension size of the sentence embedding model

Type: int | str | None

Default: None
Returns:
TypeDescription
NoneNone

delete

delete(
    self,
    ids: list[str | int],
    collection_name: str | None = None
) -> None

Delete documents from the collection.

Parameters:
NameDescription
idsA list of document IDs to delete.

Type: list[str | int]
collection_nameThe name of the collection to delete.

Type: str | None

Default: None
Returns:
TypeDescription
NoneNone

delete_collection

delete_collection(self, collection_name: str | None = None) -> None

Delete the entire collection.

Parameters:
NameDescription
collection_nameThe name of the collection to delete.

Type: str | None

Default: None
Returns:
TypeDescription
NoneNone

euclidean_distance

euclidean_distance(arr1: list[float], arr2: list[float]) -> float

Calculate the Euclidean distance between two vectors.
Parameters:
- arr1 (List[float]): The first vector.
- arr2 (List[float]): The second vector.
Returns:
- float: The Euclidean distance between arr1 and arr2.

Parameters:
NameDescription
arr1Type: list[float]
arr2Type: list[float]
Returns:
TypeDescription
float- float: The Euclidean distance between arr1 and arr2.

get

get(
    self,
    ids: str | None = None,
    include: str | None = None,
    where: str | None = None,
    limit: int | str | None = None,
    offset: int | str | None = None
) -> list[Document]

Retrieve documents from the collection.

Parameters:
NameDescription
idsA list of document IDs.

Type: str | None

Default: None
includeThe fields to include.

Type: str | None

Default: None
whereAdditional filtering criteria.

Type: str | None

Default: None
limitThe maximum number of documents to retrieve.

Type: int | str | None

Default: None
offsetThe offset for pagination.

Type: int | str | None

Default: None
Returns:
TypeDescription
list[Document]List: The retrieved documents.

inner_product_distance

inner_product_distance(arr1: list[float], arr2: list[float]) -> float

Calculate the Euclidean distance between two vectors.
Parameters:
- arr1 (List[float]): The first vector.
- arr2 (List[float]): The second vector.
Returns:
- float: The Euclidean distance between arr1 and arr2.

Parameters:
NameDescription
arr1Type: list[float]
arr2Type: list[float]
Returns:
TypeDescription
float- float: The Euclidean distance between arr1 and arr2.

modify

modify(
    self,
    metadata,
    collection_name: str | None = None
) -> None

Modify metadata for the collection.

Parameters:
NameDescription
metadataThe new metadata.

collection_nameThe name of the collection.

Type: str | None

Default: None
Returns:
TypeDescription
NoneNone

query

query(
    self,
    query_texts: list[str],
    collection_name: str | None = None,
    n_results: int | None = 10,
    distance_type: str | None = 'euclidean',
    distance_threshold: float | None = -1,
    include_embedding: bool | None = False
) -> list[list[tuple[Document, float]]]

Query documents in the collection.

Parameters:
NameDescription
query_textsA list of query texts.

Type: list[str]
collection_nameThe name of the collection.

Type: str | None

Default: None
n_resultsThe maximum number of results to return.

Type: int | None

Default: 10
distance_typeDistance search type - euclidean or cosine

Type: str | None

Default: ‘euclidean’
distance_thresholdDistance threshold to limit searches

Type: float | None

Default: -1
include_embeddingInclude embedding values in QueryResults

Type: bool | None

Default: False
Returns:
TypeDescription
list[list[tuple[Document, float]]]QueryResults: The query results.

set_collection_name

set_collection_name(self, collection_name) -> str
Parameters:
NameDescription
collection_name

table_exists

table_exists(self, table_name: str) -> bool

Check if a table exists in the PostgreSQL database.

Parameters:
NameDescription
table_nameThe name of the table to check.

Type: str
Returns:
TypeDescription
boolbool: True if the table exists, False otherwise.

update

update(
    self,
    ids: list[str],
    embeddings: list[Any],
    metadatas: list[Any],
    documents: list[Document]
) -> None

Update documents in the collection.

Parameters:
NameDescription
idsA list of document IDs.

Type: list[str]
embeddingsA list of document embeddings.

Type: list[typing.Any]
metadatasA list of document metadatas.

Type: list[typing.Any]
documentsA list of documents.

Type: list[Document]
Returns:
TypeDescription
NoneNone

upsert

upsert(
    self,
    ids: list[str | int],
    documents: list[Document],
    embeddings: list[Any] | None = None,
    metadatas: list[Any] | None = None
) -> None

Upsert documents into the collection.

Parameters:
NameDescription
idsA list of document IDs.

Type: list[str | int]
documentsA list of documents.

Type: list[Document]
embeddingsA list of document embeddings.

Type: list[typing.Any] | None

Default: None
metadatasA list of document metadatas.

Type: list[typing.Any] | None

Default: None
Returns:
TypeDescription
NoneNone