Skip to content

web_search

langroid/parsing/web_search.py

Utilities for web search.

NOTE: Using Google Search requires setting the GOOGLE_API_KEY and GOOGLE_CSE_ID environment variables in your .env file, as explained in the README.

WebSearchResult(title, link, max_content_length=3500, max_summary_length=300)

Class representing a Web Search result, containing the title, link, summary and full content of the result.

link (str): The link to the search result.
max_content_length (int): The maximum length of the full content.
max_summary_length (int): The maximum length of the summary.
Source code in langroid/parsing/web_search.py
def __init__(
    self,
    title: str,
    link: str,
    max_content_length: int = 3500,
    max_summary_length: int = 300,
):
    """
    Args:
        title (str): The title of the search result.
        link (str): The link to the search result.
        max_content_length (int): The maximum length of the full content.
        max_summary_length (int): The maximum length of the summary.
    """
    self.title = title
    self.link = link
    self.max_content_length = max_content_length
    self.max_summary_length = max_summary_length
    self.full_content = self.get_full_content()
    self.summary = self.get_summary()

Method that makes an API call by Metaphor client that queries the top num_results links that matches the query. Returns a list of WebSearchResult objects.

Parameters:

Name Type Description Default
query str

The query body that users wants to make.

required
num_results int

Number of top matching results that we want to grab

5
Source code in langroid/parsing/web_search.py
def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
    """
    Method that makes an API call by Metaphor client that queries
    the top num_results links that matches the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """

    load_dotenv()

    api_key = os.getenv("METAPHOR_API_KEY") or os.getenv("EXA_API_KEY")
    if not api_key:
        raise ValueError(
            """
            Neither METAPHOR_API_KEY nor EXA_API_KEY environment variables are set. 
            Please set one of them to your API key, and try again.
            """
        )

    try:
        from metaphor_python import Metaphor
    except ImportError:
        raise LangroidImportError("metaphor-python", "metaphor")

    client = Metaphor(api_key=api_key)

    response = client.search(
        query=query,
        num_results=num_results,
    )
    raw_results = response.results

    return [
        WebSearchResult(result.title, result.url, 3500, 300) for result in raw_results
    ]

Method that makes an API call by Exa client that queries the top num_results links that matches the query. Returns a list of WebSearchResult objects.

Parameters:

Name Type Description Default
query str

The query body that users wants to make.

required
num_results int

Number of top matching results that we want to grab

5
Source code in langroid/parsing/web_search.py
def exa_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
    """
    Method that makes an API call by Exa client that queries
    the top num_results links that matches the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """

    load_dotenv()

    api_key = os.getenv("EXA_API_KEY")
    if not api_key:
        raise ValueError(
            """
            EXA_API_KEY environment variables are not set. 
            Please set one of them to your API key, and try again.
            """
        )

    try:
        from exa_py import Exa
    except ImportError:
        raise LangroidImportError("exa-py", "exa")

    client = Exa(api_key=api_key)

    response = client.search(
        query=query,
        num_results=num_results,
    )
    raw_results = response.results

    return [
        WebSearchResult(
            title=result.title or "",
            link=result.url,
            max_content_length=3500,
            max_summary_length=300,
        )
        for result in raw_results
    ]

Method that makes an API call by DuckDuckGo client that queries the top num_results links that matche the query. Returns a list of WebSearchResult objects.

Parameters:

Name Type Description Default
query str

The query body that users wants to make.

required
num_results int

Number of top matching results that we want to grab

5
Source code in langroid/parsing/web_search.py
def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
    """
    Method that makes an API call by DuckDuckGo client that queries
    the top `num_results` links that matche the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """

    with DDGS() as ddgs:
        search_results = [r for r in ddgs.text(query, max_results=num_results)]

    return [
        WebSearchResult(
            title=result["title"],
            link=result["href"],
            max_content_length=3500,
            max_summary_length=300,
        )
        for result in search_results
    ]

Method that makes an API call to Tavily API that queries the top num_results links that match the query. Returns a list of WebSearchResult objects.

Parameters:

Name Type Description Default
query str

The query body that users wants to make.

required
num_results int

Number of top matching results that we want to grab

5
Source code in langroid/parsing/web_search.py
def tavily_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
    """
    Method that makes an API call to Tavily API that queries
    the top `num_results` links that match the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """

    load_dotenv()

    api_key = os.getenv("TAVILY_API_KEY")
    if not api_key:
        raise ValueError(
            "TAVILY_API_KEY environment variable is not set. "
            "Please set it to your API key and try again."
        )

    try:
        from tavily import TavilyClient
    except ImportError:
        raise LangroidImportError("tavily-python", "tavily")

    client = TavilyClient(api_key=api_key)
    response = client.search(query=query, max_results=num_results)
    search_results = response["results"]

    return [
        WebSearchResult(
            title=result["title"],
            link=result["url"],
            max_content_length=3500,
            max_summary_length=300,
        )
        for result in search_results
    ]