PGVectorDB

PGVectorDB(
    *,
    conn: ForwardRef('Connection') | None = None,
    connection_string: str | None = None,
    host: str | None = None,
    port: int | str | None = None,
    dbname: str | None = None,
    username: str | None = None,
    password: str | None = None,
    connect_timeout: int | None = 10,
    embedding_function: Callable = None,
    metadata: dict[str, Any] | None = None
)

A vector database that uses PGVector as the backend.
Initialize the vector database.
Note: connection_string or host + port + dbname must be specified

Parameters:
NameDescription
connType: ForwardRef(‘psycopg.Connection’) | None

Default: None
connection_stringType: str | None

Default: None
hostType: str | None

Default: None
portType: int | str | None

Default: None
dbnameType: str | None

Default: None
usernameType: str | None

Default: None
passwordType: str | None

Default: None
connect_timeoutType: int | None

Default: 10
embedding_functionType: Callable

Default: None
metadataType: dict[str, typing.Any] | None

Default: None

Class Attributes

active_collection



embedding_function



type



Instance Methods

create_collection

create_collection(
    self,
    collection_name: str,
    overwrite: bool = False,
    get_or_create: bool = True
) -> Collection

Create a collection in the vector database.
Case 1. if the collection does not exist, create the collection.
Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
Case 3. the collection exists and overwrite is False, if get_or_create is True, it will get the collection, otherwise it raise a ValueError.

Parameters:
NameDescription
collection_namestrThe name of the collection.

Type: str
overwriteboolWhether to overwrite the collection if it exists.

Default is False.

Type: bool

Default: False
get_or_createboolWhether to get the collection if it exists.

Default is True.

Type: bool

Default: True
Returns:
TypeDescription
CollectionCollection | The collection object.

delete_collection

delete_collection(self, collection_name: str) -> None

Delete the collection from the vector database.

Parameters:
NameDescription
collection_namestrThe name of the collection.

Type: str
Returns:
TypeDescription
NoneNone

delete_docs

delete_docs(
    self,
    ids: list[str | int],
    collection_name: str = None
) -> None

Delete documents from the collection of the vector database.

Parameters:
NameDescription
idsList[ItemID]A list of document ids.

Each id is a typed ItemID.

Type: list[str | int]
collection_namestrThe name of the collection.

Default is None.

Type: str

Default: None
Returns:
TypeDescription
NoneNone

establish_connection

establish_connection(
    self,
    conn: ForwardRef('Connection') | None = None,
    connection_string: str | None = None,
    host: str | None = None,
    port: int | str | None = None,
    dbname: str | None = None,
    username: str | None = None,
    password: str | None = None,
    connect_timeout: int | None = 10
) -> Connection

Establishes a connection to a PostgreSQL database using psycopg.

Parameters:
NameDescription
connAn existing psycopg connection object.

If provided, this connection will be used.

Type: ForwardRef(‘psycopg.Connection’) | None

Default: None
connection_stringA string containing the connection information.

If provided, a new connection will be established using this string.

Type: str | None

Default: None
hostThe hostname of the PostgreSQL server.

Used if connection_string is not provided.

Type: str | None

Default: None
portThe port number to connect to at the server host.

Used if connection_string is not provided.

Type: int | str | None

Default: None
dbnameThe database name.

Used if connection_string is not provided.

Type: str | None

Default: None
usernameThe username to connect as.

Used if connection_string is not provided.

Type: str | None

Default: None
passwordThe user’s password.

Used if connection_string is not provided.

Type: str | None

Default: None
connect_timeoutMaximum wait for connection, in seconds.

The default is 10 seconds.

Type: int | None

Default: 10
Returns:
TypeDescription
psycopg.ConnectionA psycopg.Connection object representing the established connection.

get_collection

get_collection(self, collection_name: str = None) -> Collection

Get the collection from the vector database.

Parameters:
NameDescription
collection_namestrThe name of the collection.

Default is None.

If None, return the current active collection.

Type: str

Default: None
Returns:
TypeDescription
CollectionCollection | The collection object.

get_docs_by_ids

get_docs_by_ids(
    self,
    ids: list[str | int] = None,
    collection_name: str = None,
    include=None,
    **kwargs
) -> list[Document]

Retrieve documents from the collection of the vector database based on the ids.

Parameters:
NameDescription
idsList[ItemID]A list of document ids.

If None, will return all the documents.

Default is None.

Type: list[str | int]

Default: None
collection_namestrThe name of the collection.

Default is None.

Type: str

Default: None
include=None
**kwargs
Returns:
TypeDescription
list[Document]List[Document] | The results.

insert_docs

insert_docs(
    self,
    docs: list[Document],
    collection_name: str = None,
    upsert: bool = False
) -> None

Insert documents into the collection of the vector database.

Parameters:
NameDescription
docsList[Document]A list of documents.

Each document is a TypedDict Document.

Type: list[Document]
collection_namestrThe name of the collection.

Default is None.

Type: str

Default: None
upsertboolWhether to update the document if it exists.

Default is False.

Type: bool

Default: False
Returns:
TypeDescription
NoneNone

retrieve_docs

retrieve_docs(
    self,
    queries: list[str],
    collection_name: str = None,
    n_results: int = 10,
    distance_threshold: float = -1
) -> list[list[tuple[Document, float]]]

Retrieve documents from the collection of the vector database based on the queries.

Parameters:
NameDescription
queriesList[str]A list of queries.

Each query is a string.

Type: list[str]
collection_namestrThe name of the collection.

Default is None.

Type: str

Default: None
n_resultsintThe number of relevant documents to return.

Default is 10.

Type: int

Default: 10
distance_thresholdfloatThe threshold for the distance score, only distance smaller than it will be returned.

Don’t filter with it if 0.

Default is -1.

Type: float

Default: -1
Returns:
TypeDescription
list[list[tuple[Document, float]]]QueryResults | The query results. Each query result is a list of list of tuples containing the document and the distance.

update_docs

update_docs(
    self,
    docs: list[Document],
    collection_name: str = None
) -> None

Update documents in the collection of the vector database.

Parameters:
NameDescription
docsList[Document]A list of documents.

Type: list[Document]
collection_namestrThe name of the collection.

Default is None.

Type: str

Default: None
Returns:
TypeDescription
NoneNone