Skip to content

parse_json

langroid/parsing/parse_json.py

is_valid_json(json_str)

Check if the input string is a valid JSON.

Parameters:

Name Type Description Default
json_str str

The input string to check.

required

Returns:

Name Type Description
bool bool

True if the input string is a valid JSON, False otherwise.

Source code in langroid/parsing/parse_json.py
def is_valid_json(json_str: str) -> bool:
    """Check if the input string is a valid JSON.

    Args:
        json_str (str): The input string to check.

    Returns:
        bool: True if the input string is a valid JSON, False otherwise.
    """
    try:
        json.loads(json_str)
        return True
    except ValueError:
        return False

flatten(nested_list)

Flatten a nested list into a single list of strings

Source code in langroid/parsing/parse_json.py
def flatten(nested_list) -> Iterator[str]:  # type: ignore
    """Flatten a nested list into a single list of strings"""
    for item in nested_list:
        if isinstance(item, (list, tuple)):
            for subitem in flatten(item):
                yield subitem
        else:
            yield item

get_json_candidates(s)

Get top-level JSON candidates, i.e. strings between curly braces.

Source code in langroid/parsing/parse_json.py
def get_json_candidates(s: str) -> List[str]:
    """Get top-level JSON candidates, i.e. strings between curly braces."""
    # Define the grammar for matching curly braces
    curly_braces = originalTextFor(nestedExpr("{", "}"))

    # Parse the string
    try:
        results = curly_braces.searchString(s)
        # Properly convert nested lists to strings
        return [r[0] for r in results]
    except Exception:
        return []

try_repair_json_yaml(s)

Attempt to load as json, and if it fails, try repairing the JSON. If that fails, replace any with space as a last resort. NOTE - replacing with space will result in format loss, which may matter in generated code (e.g. python, toml, etc)

Source code in langroid/parsing/parse_json.py
def try_repair_json_yaml(s: str) -> str | None:
    """
    Attempt to load as json, and if it fails, try repairing the JSON.
    If that fails, replace any \n with space as a last resort.
    NOTE - replacing \n with space will result in format loss,
    which may matter in generated code (e.g. python, toml, etc)
    """
    s_repaired_obj = repair_json(s, return_objects=True)
    if isinstance(s_repaired_obj, list):
        if len(s_repaired_obj) > 0:
            s_repaired_obj = s_repaired_obj[0]
        else:
            s_repaired_obj = None
    if s_repaired_obj is not None:
        return json.dumps(s_repaired_obj)  # type: ignore
    else:
        try:
            yaml_result = yaml.safe_load(s)
            if isinstance(yaml_result, dict):
                return json.dumps(yaml_result)
        except yaml.YAMLError:
            pass
        # If it still fails, replace any \n with space as a last resort
        s = s.replace("\n", " ")
        if is_valid_json(s):
            return s
        else:
            return None  # all failed

extract_top_level_json(s)

Extract all top-level JSON-formatted substrings from a given string.

Parameters:

Name Type Description Default
s str

The input string to search for JSON substrings.

required

Returns:

Type Description
List[str]

List[str]: A list of top-level JSON-formatted substrings.

Source code in langroid/parsing/parse_json.py
def extract_top_level_json(s: str) -> List[str]:
    """Extract all top-level JSON-formatted substrings from a given string.

    Args:
        s (str): The input string to search for JSON substrings.

    Returns:
        List[str]: A list of top-level JSON-formatted substrings.
    """
    # Find JSON object and array candidates
    json_candidates = get_json_candidates(s)
    maybe_repaired_jsons = map(try_repair_json_yaml, json_candidates)

    return [candidate for candidate in maybe_repaired_jsons if candidate is not None]

top_level_json_field(s, f)

Extract the value of a field f from a top-level JSON object. If there are multiple, just return the first.

Parameters:

Name Type Description Default
s str

The input string to search for JSON substrings.

required
f str

The field to extract from the JSON object.

required

Returns:

Name Type Description
str Any

The value of the field f in the top-level JSON object, if any. Otherwise, return an empty string.

Note

This function is designed to never crash. If any exception occurs during JSON parsing or field extraction, it gracefully returns an empty string.

Source code in langroid/parsing/parse_json.py
def top_level_json_field(s: str, f: str) -> Any:
    """
    Extract the value of a field f from a top-level JSON object.
    If there are multiple, just return the first.

    Args:
        s (str): The input string to search for JSON substrings.
        f (str): The field to extract from the JSON object.

    Returns:
        str: The value of the field f in the top-level JSON object, if any.
            Otherwise, return an empty string.

    Note:
        This function is designed to never crash. If any exception occurs during
        JSON parsing or field extraction, it gracefully returns an empty string.
    """
    try:
        jsons = extract_top_level_json(s)
        if len(jsons) == 0:
            return ""
        for j in jsons:
            try:
                json_data = json.loads(j)
                if isinstance(json_data, dict):
                    if f in json_data:
                        return json_data[f]
                elif isinstance(json_data, list):
                    # Some responses wrap candidate JSON objects in a list; scan them.
                    for item in json_data:
                        if isinstance(item, dict) and f in item:
                            return item[f]
            except (json.JSONDecodeError, TypeError, KeyError):
                # If this specific JSON fails to parse, continue to next candidate
                continue
    except Exception:
        # Catch any unexpected errors to ensure we never crash
        pass

    return ""