repo_loader

`RepoLoaderConfig` ¶

Bases: BaseSettings

Configuration for RepoLoader.

`RepoLoader(url, config=RepoLoaderConfig())` ¶

Class for recursively getting all file content in a repo.

config: configuration for RepoLoader

Source code in langroid/parsing/repo_loader.py

def __init__(
    self,
    url: str,
    config: RepoLoaderConfig = RepoLoaderConfig(),
):
    """
    Args:
        url: full github url of repo, or just "owner/repo"
        config: configuration for RepoLoader
    """
    self.url = url
    self.config = config
    self.clone_path: Optional[str] = None
    self.log_file = ".logs/repo_loader/download_log.json"
    os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
    if not os.path.exists(self.log_file):
        with open(self.log_file, "w") as f:
            json.dump({"junk": "ignore"}, f)
    with open(self.log_file, "r") as f:
        log = json.load(f)
    if self.url in log and os.path.exists(log[self.url]):
        logger.info(f"Repo Already downloaded in {log[self.url]}")
        self.clone_path = log[self.url]

    if "github.com" in self.url:
        repo_name = self.url.split("github.com/")[1]
    else:
        repo_name = self.url
    load_dotenv()
    # authenticated calls to github api have higher rate limit
    token = os.getenv("GITHUB_ACCESS_TOKEN")
    g = Github(token)
    self.repo = self._get_repo_with_retry(g, repo_name)

`get_issues(k=100)` ¶

Get up to k issues from the GitHub repo.

Source code in langroid/parsing/repo_loader.py

def get_issues(self, k: int | None = 100) -> List[IssueData]:
    """Get up to k issues from the GitHub repo."""
    if k is None:
        issues = self.repo.get_issues(state="all")
    else:
        issues = self.repo.get_issues(state="all")[:k]
    issue_data_list = []
    for issue in issues:
        issue_data = IssueData(
            state=issue.state,
            year=issue.created_at.year,
            month=issue.created_at.month,
            day=issue.created_at.day,
            assignee=issue.assignee.login if issue.assignee else None,
            size=get_issue_size(issue.labels),
            text=issue.body or "No issue description body.",
        )
        issue_data_list.append(issue_data)

    return issue_data_list

`clone(path=None)` ¶

Clone a GitHub repository to a local directory specified by path, if it has not already been cloned.

Parameters:

Name	Type	Description	Default
`path`	`str`	The local directory where the repository should be cloned. If not specified, a temporary directory will be created.	`None`

Returns:

Name	Type	Description
`str`	`Optional[str]`	The path to the local directory where the repository was cloned.

Source code in langroid/parsing/repo_loader.py

def clone(self, path: Optional[str] = None) -> Optional[str]:
    """
    Clone a GitHub repository to a local directory specified by `path`,
    if it has not already been cloned.

    Args:
        path (str): The local directory where the repository should be cloned.
            If not specified, a temporary directory will be created.

    Returns:
        str: The path to the local directory where the repository was cloned.
    """
    with open(self.log_file, "r") as f:
        log: Dict[str, str] = json.load(f)

    if (
        self.url in log
        and os.path.exists(log[self.url])
        and _has_files(log[self.url])
    ):
        logger.warning(f"Repo Already downloaded in {log[self.url]}")
        self.clone_path = log[self.url]
        return self.clone_path

    self.clone_path = path
    if path is None:
        path = self.default_clone_path()
        self.clone_path = path

    try:
        subprocess.run(["git", "clone", self.url, path], check=True)
        log[self.url] = path
        with open(self.log_file, "w") as f:
            json.dump(log, f)
        return self.clone_path
    except subprocess.CalledProcessError as e:
        logger.error(f"Git clone failed: {e}")
    except Exception as e:
        logger.error(f"An error occurred while trying to clone the repository:{e}")

    return self.clone_path

`load_tree_from_github(depth, lines=0)` ¶

Get a nested dictionary of GitHub repository file and directory names up to a certain depth, with file contents.

Parameters:

Name	Type	Description	Default
`depth`	`int`	The depth level.	required
`lines`	`int`	The number of lines of file contents to include.	`0`

Returns:

Type	Description
`Dict[str, Union[str, List[Dict[str, Any]]]]`	Dict[str, Union[str, List[Dict]]]:
`Dict[str, Union[str, List[Dict[str, Any]]]]`	A dictionary containing file and directory names, with file contents.

Source code in langroid/parsing/repo_loader.py

def load_tree_from_github(
    self, depth: int, lines: int = 0
) -> Dict[str, Union[str, List[Dict[str, Any]]]]:
    """
    Get a nested dictionary of GitHub repository file and directory names
    up to a certain depth, with file contents.

    Args:
        depth (int): The depth level.
        lines (int): The number of lines of file contents to include.

    Returns:
        Dict[str, Union[str, List[Dict]]]:
        A dictionary containing file and directory names, with file contents.
    """
    root_contents = self.repo.get_contents("")
    if not isinstance(root_contents, list):
        root_contents = [root_contents]
    repo_structure = {
        "type": "dir",
        "name": "",
        "dirs": [],
        "files": [],
        "path": "",
    }

    # A queue of tuples (current_node, current_depth, parent_structure)
    queue = deque([(root_contents, 0, repo_structure)])

    while queue:
        current_node, current_depth, parent_structure = queue.popleft()

        for content in current_node:
            if not self._is_allowed(content):
                continue
            if content.type == "dir" and current_depth < depth:
                # Create a new sub-dictionary for this directory
                new_dir = {
                    "type": "dir",
                    "name": content.name,
                    "dirs": [],
                    "files": [],
                    "path": content.path,
                }
                parent_structure["dirs"].append(new_dir)
                contents = self.repo.get_contents(content.path)
                if not isinstance(contents, list):
                    contents = [contents]
                queue.append(
                    (
                        contents,
                        current_depth + 1,
                        new_dir,
                    )
                )
            elif content.type == "file":
                file_content = "\n".join(
                    _get_decoded_content(content).splitlines()[:lines]
                )
                file_dict = {
                    "type": "file",
                    "name": content.name,
                    "content": file_content,
                    "path": content.path,
                }
                parent_structure["files"].append(file_dict)

    return repo_structure

`load(path=None, depth=3, lines=0)` ¶

From a local folder path (if None, the repo clone path), get: a nested dictionary (tree) of dicts, files and contents a list of Document objects for each file.

Parameters:

Name	Type	Description	Default
`path`	`str`	The local folder path; if none, use self.clone_path()	`None`
`depth`	`int`	The depth level.	`3`
`lines`	`int`	The number of lines of file contents to include.	`0`

Returns:

Type	Description
`Tuple[Dict[str, Union[str, List[Dict[str, Any]]]], List[Document]]`	Tuple of (dict, List_of_Documents): A dictionary containing file and directory names, with file contents, and a list of Document objects for each file.

Source code in langroid/parsing/repo_loader.py

def load(
    self,
    path: Optional[str] = None,
    depth: int = 3,
    lines: int = 0,
) -> Tuple[Dict[str, Union[str, List[Dict[str, Any]]]], List[Document]]:
    """
    From a local folder `path` (if None, the repo clone path), get:
      a nested dictionary (tree) of dicts, files and contents
      a list of Document objects for each file.

    Args:
        path (str): The local folder path; if none, use self.clone_path()
        depth (int): The depth level.
        lines (int): The number of lines of file contents to include.

    Returns:
        Tuple of (dict, List_of_Documents):
            A dictionary containing file and directory names, with file
            contents, and a list of Document objects for each file.
    """
    if path is None:
        if self.clone_path is None or not _has_files(self.clone_path):
            self.clone()
        path = self.clone_path
    if path is None:
        raise ValueError("Unable to clone repo")
    return self.load_from_folder(
        path=path,
        depth=depth,
        lines=lines,
        file_types=self.config.file_types,
        exclude_dirs=self.config.exclude_dirs,
        url=self.url,
    )

`load_from_folder(path, depth=3, lines=0, file_types=None, exclude_dirs=None, url='')` `staticmethod` ¶

From a local folder path (required), get: a nested dictionary (tree) of dicts, files and contents, restricting to desired file_types and excluding undesired directories. a list of Document objects for each file.

Parameters:

Name	Type	Description	Default
`path`	`str`	The local folder path, required.	required
`depth`	`int`	The depth level. Optional, default 3.	`3`
`lines`	`int`	The number of lines of file contents to include. Optional, default 0 (no lines => empty string).	`0`
`file_types`	`List[str]`	The file types to include. Optional, default None (all).	`None`
`exclude_dirs`	`List[str]`	The directories to exclude. Optional, default None (no exclusions).	`None`
`url`	`str`	Optional url, to be stored in docs as metadata. Default "".	`''`

Returns:

Type	Description
`Tuple[Dict[str, Union[str, List[Dict[str, Any]]]], List[Document]]`	Tuple of (dict, List_of_Documents): A dictionary containing file and directory names, with file contents. A list of Document objects for each file.

Source code in langroid/parsing/repo_loader.py

@staticmethod
def load_from_folder(
    path: str,
    depth: int = 3,
    lines: int = 0,
    file_types: Optional[List[str]] = None,
    exclude_dirs: Optional[List[str]] = None,
    url: str = "",
) -> Tuple[Dict[str, Union[str, List[Dict[str, Any]]]], List[Document]]:
    """
    From a local folder `path` (required), get:
      a nested dictionary (tree) of dicts, files and contents, restricting to
        desired file_types and excluding undesired directories.
      a list of Document objects for each file.

    Args:
        path (str): The local folder path, required.
        depth (int): The depth level. Optional, default 3.
        lines (int): The number of lines of file contents to include.
                Optional, default 0 (no lines => empty string).
        file_types (List[str]): The file types to include.
                Optional, default None (all).
        exclude_dirs (List[str]): The directories to exclude.
                Optional, default None (no exclusions).
        url (str): Optional url, to be stored in docs as metadata. Default "".

    Returns:
        Tuple of (dict, List_of_Documents):
            A dictionary containing file and directory names, with file contents.
            A list of Document objects for each file.
    """

    folder_structure = {
        "type": "dir",
        "name": "",
        "dirs": [],
        "files": [],
        "path": "",
    }
    # A queue of tuples (current_path, current_depth, parent_structure)
    queue = deque([(path, 0, folder_structure)])
    docs = []
    exclude_dirs = exclude_dirs or []
    while queue:
        current_path, current_depth, parent_structure = queue.popleft()

        for item in os.listdir(current_path):
            item_path = os.path.join(current_path, item)
            relative_path = os.path.relpath(item_path, path)
            if (os.path.isdir(item_path) and item in exclude_dirs) or (
                os.path.isfile(item_path)
                and file_types is not None
                and RepoLoader._file_type(item) not in file_types
            ):
                continue

            if os.path.isdir(item_path) and current_depth < depth:
                # Create a new sub-dictionary for this directory
                new_dir = {
                    "type": "dir",
                    "name": item,
                    "dirs": [],
                    "files": [],
                    "path": relative_path,
                }
                parent_structure["dirs"].append(new_dir)
                queue.append((item_path, current_depth + 1, new_dir))
            elif os.path.isfile(item_path):
                # Add the file to the current dictionary
                with open(item_path, "r") as f:
                    file_lines = list(itertools.islice(f, lines))
                file_content = "\n".join(line.strip() for line in file_lines)
                if file_content == "":
                    continue

                file_dict = {
                    "type": "file",
                    "name": item,
                    "content": file_content,
                    "path": relative_path,
                }
                parent_structure["files"].append(file_dict)
                docs.append(
                    Document(
                        content=file_content,
                        metadata=DocMetaData(
                            repo=url,
                            source=relative_path,
                            url=url,
                            filename=item,
                            extension=RepoLoader._file_type(item),
                            language=RepoLoader._file_type(item),
                        ),
                    )
                )
    return folder_structure, docs

`get_documents(path, parser=Parser(ParsingConfig()), file_types=None, exclude_dirs=None, depth=-1, lines=None, doc_type=None)` `staticmethod` ¶

Recursively get all files under a path as Document objects.

Parameters:

Name	Type	Description	Default
`path`	`str \| bytes`	The path to the directory or file, or bytes content. The bytes option is meant to support the case where the content has already been read from a file in an upstream process (e.g. from an API or a database), and we want to avoid having to write it to a temporary file just to read it again. (which can be very slow for large files, especially in a docker container)	required
`parser`	`Parser`	Parser to use to parse files.	`Parser(ParsingConfig())`
`file_types`	`List[str]`	List of file extensions OR filenames OR file_path_names to include. Defaults to None, which includes all files.	`None`
`exclude_dirs`	`List[str]`	List of directories to exclude. Defaults to None, which includes all directories.	`None`
`depth`	`int`	Max depth of recursion. Defaults to -1, which includes all depths.	`-1`
`lines`	`int`	Number of lines to read from each file. Defaults to None, which reads all lines.	`None`
`doc_type`	`str \| DocumentType`	The type of document to parse.	`None`

Returns:

Type	Description
`List[Document]`	List[Document]: List of Document objects representing files.

Source code in langroid/parsing/repo_loader.py

@staticmethod
def get_documents(
    path: str | bytes,
    parser: Parser = Parser(ParsingConfig()),
    file_types: Optional[List[str]] = None,
    exclude_dirs: Optional[List[str]] = None,
    depth: int = -1,
    lines: Optional[int] = None,
    doc_type: str | DocumentType | None = None,
) -> List[Document]:
    """
    Recursively get all files under a path as Document objects.

    Args:
        path (str|bytes): The path to the directory or file, or bytes content.
            The bytes option is meant to support the case where the content
            has already been read from a file in an upstream process
            (e.g. from an API or a database), and we want to avoid having to
            write it to a temporary file just to read it again.
            (which can be very slow for large files,
            especially in a docker container)
        parser (Parser): Parser to use to parse files.
        file_types (List[str], optional): List of file extensions OR
            filenames OR file_path_names to  include.
            Defaults to None, which includes all files.
        exclude_dirs (List[str], optional): List of directories to exclude.
            Defaults to None, which includes all directories.
        depth (int, optional): Max depth of recursion. Defaults to -1,
            which includes all depths.
        lines (int, optional): Number of lines to read from each file.
            Defaults to None, which reads all lines.
        doc_type (str|DocumentType, optional): The type of document to parse.

    Returns:
        List[Document]: List of Document objects representing files.

    """
    docs = []
    file_paths = []
    if isinstance(path, bytes):
        file_paths.append(path)
    else:
        path_obj = Path(path).resolve()

        if path_obj.is_file():
            file_paths.append(str(path_obj))
        else:
            path_depth = len(path_obj.parts)
            for root, dirs, files in os.walk(path):
                # Exclude directories if needed
                if exclude_dirs:
                    dirs[:] = [d for d in dirs if d not in exclude_dirs]

                current_depth = len(Path(root).resolve().parts) - path_depth
                if depth == -1 or current_depth <= depth:
                    for file in files:
                        file_path = str(Path(root) / file)
                        if (
                            file_types is None
                            or RepoLoader._file_type(file_path) in file_types
                            or os.path.basename(file_path) in file_types
                            or file_path in file_types
                        ):
                            file_paths.append(file_path)

    for file_path in file_paths:
        docs.extend(
            DocumentParser.chunks_from_path_or_bytes(
                file_path,
                parser,
                doc_type=doc_type,
                lines=lines,
            )
        )
    return docs

`load_docs_from_github(k=None, depth=None, lines=None)` ¶

Directly from GitHub, recursively get all files in a repo that have one of the extensions, possibly up to a max number of files, max depth, and max number of lines per file (if any of these are specified).

Parameters:

Name	Type	Description	Default
`k`	`int`	max number of files to load, or None for all files	`None`
`depth`	`int`	max depth to recurse, or None for infinite depth	`None`
`lines`	`int`	max number of lines to get, from a file, or None for all lines	`None`

Returns:

Type	Description
`List[Document]`	list of Document objects, each has fields `content` and `metadata`,
`List[Document]`	and `metadata` has fields `url`, `filename`, `extension`, `language`

Source code in langroid/parsing/repo_loader.py

def load_docs_from_github(
    self,
    k: Optional[int] = None,
    depth: Optional[int] = None,
    lines: Optional[int] = None,
) -> List[Document]:
    """
    Directly from GitHub, recursively get all files in a repo that have one of the
    extensions, possibly up to a max number of files, max depth, and max number
    of lines per file (if any of these are specified).

    Args:
        k (int): max number of files to load, or None for all files
        depth (int): max depth to recurse, or None for infinite depth
        lines (int): max number of lines to get, from a file, or None for all lines

    Returns:
        list of Document objects, each has fields `content` and `metadata`,
        and `metadata` has fields `url`, `filename`, `extension`, `language`
    """
    contents = self.repo.get_contents("")
    if not isinstance(contents, list):
        contents = [contents]
    stack = list(zip(contents, [0] * len(contents)))  # stack of (content, depth)
    # recursively get all files in repo that have one of the extensions
    docs = []
    i = 0

    while stack:
        if k is not None and i == k:
            break
        file_content, d = stack.pop()
        if not self._is_allowed(file_content):
            continue
        if file_content.type == "dir":
            if depth is None or d <= depth:
                items = self.repo.get_contents(file_content.path)
                if not isinstance(items, list):
                    items = [items]
                stack.extend(list(zip(items, [d + 1] * len(items))))
        else:
            if depth is None or d <= depth:
                # need to decode the file content, which is in bytes
                contents = self.repo.get_contents(file_content.path)
                if isinstance(contents, list):
                    contents = contents[0]
                text = _get_decoded_content(contents)
                if lines is not None:
                    text = "\n".join(text.split("\n")[:lines])
                i += 1

                # Note `source` is important, it may be used to cite
                # evidence for an answer.
                # See  URLLoader
                # TODO we should use Pydantic to enforce/standardize this

                docs.append(
                    Document(
                        content=text,
                        metadata=DocMetaData(
                            repo=self.url,
                            source=file_content.html_url,
                            url=file_content.html_url,
                            filename=file_content.name,
                            extension=self._file_type(file_content.name),
                            language=self._file_type(file_content.name),
                        ),
                    )
                )
    return docs

`select(structure, includes, excludes=[])` `staticmethod` ¶

Filter a structure dictionary for certain directories and files.

Parameters:

Name	Type	Description	Default
`structure`	`Dict[str, Union[str, List[Dict]]]`	The structure dictionary.	required
`includes`	`List[str]`	A list of desired directories and files. For files, either full file names or "file type" can be specified. E.g. "toml" will include all files with the ".toml" extension, or "Makefile" will include all files named "Makefile".	required
`excludes`	`List[str]`	A list of directories and files to exclude. Similar to `includes`, full file/dir names or "file type" can be specified. Optional, defaults to empty list.	`[]`

Returns:

Type	Description
`Dict[str, Union[str, List[Dict[str, Any]]]]`	Dict[str, Union[str, List[Dict]]]: The filtered structure dictionary.

Source code in langroid/parsing/repo_loader.py

@staticmethod
def select(
    structure: Dict[str, Union[str, List[Dict[str, Any]]]],
    includes: List[str],
    excludes: List[str] = [],
) -> Dict[str, Union[str, List[Dict[str, Any]]]]:
    """
    Filter a structure dictionary for certain directories and files.

    Args:
        structure (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
        includes (List[str]): A list of desired directories and files.
            For files, either full file names or "file type" can be specified.
            E.g.  "toml" will include all files with the ".toml" extension,
            or "Makefile" will include all files named "Makefile".
        excludes (List[str]): A list of directories and files to exclude.
            Similar to `includes`, full file/dir names or "file type" can be
            specified. Optional, defaults to empty list.


    Returns:
        Dict[str, Union[str, List[Dict]]]: The filtered structure dictionary.
    """
    filtered_structure = {
        "type": structure["type"],
        "name": structure["name"],
        "dirs": [],
        "files": [],
        "path": structure["path"],
    }

    for dir in structure["dirs"]:
        if (
            dir["name"] in includes
            or RepoLoader._file_type(dir["name"]) in includes
        ) and (
            dir["name"] not in excludes
            and RepoLoader._file_type(dir["name"]) not in excludes
        ):
            # If the directory is in the select list, include the whole subtree
            filtered_structure["dirs"].append(dir)
        else:
            # Otherwise, filter the directory's contents
            filtered_dir = RepoLoader.select(dir, includes)
            if (
                filtered_dir["dirs"] or filtered_dir["files"]
            ):  # only add if not empty
                filtered_structure["dirs"].append(filtered_dir)

    for file in structure["files"]:
        if (
            file["name"] in includes
            or RepoLoader._file_type(file["name"]) in includes
        ) and (
            file["name"] not in excludes
            and RepoLoader._file_type(file["name"]) not in excludes
        ):
            filtered_structure["files"].append(file)

    return filtered_structure

`ls(structure, depth=0)` `staticmethod` ¶

Get a list of names of files or directories up to a certain depth from a structure dictionary.

Parameters:

Name	Type	Description	Default
`structure`	`Dict[str, Union[str, List[Dict]]]`	The structure dictionary.	required
`depth`	`int`	The depth level. Defaults to 0.	`0`

Returns:

Type	Description
`List[str]`	List[str]: A list of names of files or directories.

Source code in langroid/parsing/repo_loader.py

@staticmethod
def ls(structure: Dict[str, Union[str, List[Dict]]], depth: int = 0) -> List[str]:
    """
    Get a list of names of files or directories up to a certain depth from a
    structure dictionary.

    Args:
        structure (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
        depth (int, optional): The depth level. Defaults to 0.

    Returns:
        List[str]: A list of names of files or directories.
    """
    names = []

    # A queue of tuples (current_structure, current_depth)
    queue = deque([(structure, 0)])

    while queue:
        current_structure, current_depth = queue.popleft()

        if current_depth <= depth:
            names.append(current_structure["name"])

            for dir in current_structure["dirs"]:
                queue.append((dir, current_depth + 1))

            for file in current_structure["files"]:
                # add file names only if depth is less than the limit
                if current_depth < depth:
                    names.append(file["name"])
    names = [n for n in names if n not in ["", None]]
    return names

`list_files(dir, depth=1, include_types=[], exclude_types=[])` `staticmethod` ¶

Recursively list all files in a directory, up to a certain depth.

Parameters:

Name	Type	Description	Default
`dir`	`str`	The directory path, relative to root.	required
`depth`	`int`	The depth level. Defaults to 1.	`1`
`include_types`	`List[str]`	A list of file types to include. Defaults to empty list.	`[]`
`exclude_types`	`List[str]`	A list of file types to exclude. Defaults to empty list.	`[]`

Returns: List[str]: A list of file names.

Source code in langroid/parsing/repo_loader.py

@staticmethod
def list_files(
    dir: str,
    depth: int = 1,
    include_types: List[str] = [],
    exclude_types: List[str] = [],
) -> List[str]:
    """
    Recursively list all files in a directory, up to a certain depth.

    Args:
        dir (str): The directory path, relative to root.
        depth (int, optional): The depth level. Defaults to 1.
        include_types (List[str], optional): A list of file types to include.
            Defaults to empty list.
        exclude_types (List[str], optional): A list of file types to exclude.
            Defaults to empty list.
    Returns:
        List[str]: A list of file names.
    """
    depth = depth if depth >= 0 else 200
    output = []

    for root, dirs, files in os.walk(dir):
        if root.count(os.sep) - dir.count(os.sep) < depth:
            level = root.count(os.sep) - dir.count(os.sep)
            sub_indent = " " * 4 * (level + 1)
            for d in dirs:
                output.append("{}{}/".format(sub_indent, d))
            for f in files:
                if include_types and RepoLoader._file_type(f) not in include_types:
                    continue
                if exclude_types and RepoLoader._file_type(f) in exclude_types:
                    continue
                output.append("{}{}".format(sub_indent, f))
    return output

`show_file_contents(tree)` `staticmethod` ¶

Print the contents of all files from a structure dictionary.

Parameters:

Name	Type	Description	Default
`tree`	`Dict[str, Union[str, List[Dict]]]`	The structure dictionary.	required

Source code in langroid/parsing/repo_loader.py

@staticmethod
def show_file_contents(tree: Dict[str, Union[str, List[Dict[str, Any]]]]) -> str:
    """
    Print the contents of all files from a structure dictionary.

    Args:
        tree (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
    """
    contents = ""
    for dir in tree["dirs"]:
        contents += RepoLoader.show_file_contents(dir)
    for file in tree["files"]:
        path = file["path"]
        contents += f"""
        {path}:
        --------------------
        {file["content"]}

        """

    return contents

repo_loader

RepoLoaderConfig ¶

RepoLoader(url, config=RepoLoaderConfig()) ¶

get_issues(k=100) ¶

clone(path=None) ¶

load_tree_from_github(depth, lines=0) ¶

load(path=None, depth=3, lines=0) ¶

load_from_folder(path, depth=3, lines=0, file_types=None, exclude_dirs=None, url='') staticmethod ¶

get_documents(path, parser=Parser(ParsingConfig()), file_types=None, exclude_dirs=None, depth=-1, lines=None, doc_type=None) staticmethod ¶

load_docs_from_github(k=None, depth=None, lines=None) ¶

select(structure, includes, excludes=[]) staticmethod ¶

ls(structure, depth=0) staticmethod ¶

list_files(dir, depth=1, include_types=[], exclude_types=[]) staticmethod ¶

show_file_contents(tree) staticmethod ¶

`RepoLoaderConfig` ¶

`RepoLoader(url, config=RepoLoaderConfig())` ¶

`get_issues(k=100)` ¶

`clone(path=None)` ¶

`load_tree_from_github(depth, lines=0)` ¶

`load(path=None, depth=3, lines=0)` ¶

`load_from_folder(path, depth=3, lines=0, file_types=None, exclude_dirs=None, url='')` `staticmethod` ¶

`get_documents(path, parser=Parser(ParsingConfig()), file_types=None, exclude_dirs=None, depth=-1, lines=None, doc_type=None)` `staticmethod` ¶

`load_docs_from_github(k=None, depth=None, lines=None)` ¶

`select(structure, includes, excludes=[])` `staticmethod` ¶

`ls(structure, depth=0)` `staticmethod` ¶

`list_files(dir, depth=1, include_types=[], exclude_types=[])` `staticmethod` ¶

`show_file_contents(tree)` `staticmethod` ¶