Skip to content

chromadb

langroid/vector_store/chromadb.py

ChromaDB(config=ChromaDBConfig())

Bases: VectorStore

Source code in langroid/vector_store/chromadb.py
def __init__(self, config: ChromaDBConfig = ChromaDBConfig()):
    super().__init__(config)
    try:
        import chromadb
    except ImportError:
        raise ImportError(
            """
            ChromaDB is not installed by default with Langroid.
            If you want to use it, please install it with the `chromadb` extra, e.g.
            pip install "langroid[chromadb]"
            or an equivalent command.
            """
        )
    self.config = config
    emb_model = EmbeddingModel.create(config.embedding)
    self.embedding_fn = emb_model.embedding_fn()
    self.client = chromadb.Client(
        chromadb.config.Settings(
            # chroma_db_impl="duckdb+parquet",
            persist_directory=config.storage_path,
        )
    )
    if self.config.collection_name is not None:
        self.create_collection(
            self.config.collection_name,
            replace=self.config.replace_collection,
        )

clear_all_collections(really=False, prefix='')

Clear all collections in the vector store with the given prefix.

Source code in langroid/vector_store/chromadb.py
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
    """Clear all collections in the vector store with the given prefix."""

    if not really:
        logger.warning("Not deleting all collections, set really=True to confirm")
        return 0
    coll = [c for c in self.client.list_collections() if c.name.startswith(prefix)]
    if len(coll) == 0:
        logger.warning(f"No collections found with prefix {prefix}")
        return 0
    n_empty_deletes = 0
    n_non_empty_deletes = 0
    for c in coll:
        n_empty_deletes += c.count() == 0
        n_non_empty_deletes += c.count() > 0
        self.client.delete_collection(name=c.name)
    logger.warning(
        f"""
        Deleted {n_empty_deletes} empty collections and 
        {n_non_empty_deletes} non-empty collections.
        """
    )
    return n_empty_deletes + n_non_empty_deletes

list_collections(empty=False)

List non-empty collections in the vector store. Args: empty (bool, optional): Whether to list empty collections. Returns: List[str]: List of non-empty collection names.

Source code in langroid/vector_store/chromadb.py
def list_collections(self, empty: bool = False) -> List[str]:
    """
    List non-empty collections in the vector store.
    Args:
        empty (bool, optional): Whether to list empty collections.
    Returns:
        List[str]: List of non-empty collection names.
    """
    colls = self.client.list_collections()
    if empty:
        return [coll.name for coll in colls]
    return [coll.name for coll in colls if coll.count() > 0]

create_collection(collection_name, replace=False)

Create a collection in the vector store, optionally replacing an existing collection if replace is True. Args: collection_name (str): Name of the collection to create or replace. replace (bool, optional): Whether to replace an existing collection. Defaults to False.

Source code in langroid/vector_store/chromadb.py
def create_collection(self, collection_name: str, replace: bool = False) -> None:
    """
    Create a collection in the vector store, optionally replacing an existing
        collection if `replace` is True.
    Args:
        collection_name (str): Name of the collection to create or replace.
        replace (bool, optional): Whether to replace an existing collection.
            Defaults to False.

    """
    self.config.collection_name = collection_name
    if collection_name in self.list_collections(empty=True) and replace:
        logger.warning(f"Replacing existing collection {collection_name}")
        self.client.delete_collection(collection_name)
    self.collection = self.client.create_collection(
        name=self.config.collection_name,
        embedding_function=self.embedding_fn,
        get_or_create=not replace,
    )