Skip to content

file_attachment

langroid/parsing/file_attachment.py

FileAttachment(**data)

Bases: BaseModel

Represents a file attachment to be sent to an LLM API.

Source code in langroid/parsing/file_attachment.py
def __init__(self, **data: Any) -> None:
    """Initialize with sensible defaults for filename if not provided."""
    if "filename" not in data or data["filename"] is None:
        # Generate a more readable unique filename
        unique_id = str(uuid.uuid4())[:8]
        data["filename"] = f"attachment_{unique_id}.bin"
    super().__init__(**data)

from_path(path, detail=None) classmethod

Create a FileAttachment from either a local file path or a URL.

Parameters:

Name Type Description Default
path_or_url

Path to the file or URL to fetch

required

Returns:

Type Description
FileAttachment

FileAttachment instance

Source code in langroid/parsing/file_attachment.py
@classmethod
def from_path(
    cls,
    path: Union[str, Path],
    detail: str | None = None,
) -> "FileAttachment":
    """Create a FileAttachment from either a local file path or a URL.

    Args:
        path_or_url: Path to the file or URL to fetch

    Returns:
        FileAttachment instance
    """
    # Convert to string if Path object
    path_str = str(path)

    # Check if it's a URL
    if path_str.startswith(("http://", "https://", "ftp://")):
        return cls._from_url(url=path_str, detail=detail)
    else:
        # Assume it's a local file path
        return cls._from_path(path_str, detail=detail)

from_bytes(content, filename=None, mime_type=None) classmethod

Create a FileAttachment from bytes content.

Parameters:

Name Type Description Default
content bytes

Raw bytes content

required
filename Optional[str]

Optional name to use for the file

None
mime_type Optional[str]

MIME type of the content, guessed from filename if provided

None

Returns:

Type Description
FileAttachment

FileAttachment instance

Source code in langroid/parsing/file_attachment.py
@classmethod
def from_bytes(
    cls,
    content: bytes,
    filename: Optional[str] = None,
    mime_type: Optional[str] = None,
) -> "FileAttachment":
    """Create a FileAttachment from bytes content.

    Args:
        content: Raw bytes content
        filename: Optional name to use for the file
        mime_type: MIME type of the content, guessed from filename if provided

    Returns:
        FileAttachment instance
    """
    if mime_type is None and filename is not None:
        mime_type, _ = mimetypes.guess_type(filename)

    return cls(
        content=content,
        filename=filename,
        mime_type=mime_type or "application/octet-stream",
    )

from_io(file_obj, filename=None, mime_type=None) classmethod

Create a FileAttachment from a file-like object.

Parameters:

Name Type Description Default
file_obj BinaryIO

File-like object with binary content

required
filename Optional[str]

Optional name to use for the file

None
mime_type Optional[str]

MIME type of the content, guessed from filename if provided

None

Returns:

Type Description
FileAttachment

FileAttachment instance

Source code in langroid/parsing/file_attachment.py
@classmethod
def from_io(
    cls,
    file_obj: BinaryIO,
    filename: Optional[str] = None,
    mime_type: Optional[str] = None,
) -> "FileAttachment":
    """Create a FileAttachment from a file-like object.

    Args:
        file_obj: File-like object with binary content
        filename: Optional name to use for the file
        mime_type: MIME type of the content, guessed from filename if provided

    Returns:
        FileAttachment instance
    """
    content = file_obj.read()
    return cls.from_bytes(content, filename, mime_type)

from_text(text, filename=None, mime_type='text/plain', encoding='utf-8') classmethod

Create a FileAttachment from text content.

Parameters:

Name Type Description Default
text str

Text content to include

required
filename Optional[str]

Optional name to use for the file

None
mime_type str

MIME type of the content

'text/plain'
encoding str

Text encoding to use

'utf-8'

Returns:

Type Description
FileAttachment

FileAttachment instance

Source code in langroid/parsing/file_attachment.py
@classmethod
def from_text(
    cls,
    text: str,
    filename: Optional[str] = None,
    mime_type: str = "text/plain",
    encoding: str = "utf-8",
) -> "FileAttachment":
    """Create a FileAttachment from text content.

    Args:
        text: Text content to include
        filename: Optional name to use for the file
        mime_type: MIME type of the content
        encoding: Text encoding to use

    Returns:
        FileAttachment instance
    """
    content = text.encode(encoding)
    return cls(content=content, filename=filename, mime_type=mime_type)

to_base64()

Convert content to base64 encoding.

Returns:

Type Description
str

Base64 encoded string

Source code in langroid/parsing/file_attachment.py
def to_base64(self) -> str:
    """Convert content to base64 encoding.

    Returns:
        Base64 encoded string
    """
    return base64.b64encode(self.content).decode("utf-8")

to_data_uri()

Convert content to a data URI.

Returns:

Type Description
str

A data URI string containing the base64-encoded content with MIME type

Source code in langroid/parsing/file_attachment.py
def to_data_uri(self) -> str:
    """Convert content to a data URI.

    Returns:
        A data URI string containing the base64-encoded content with MIME type
    """
    base64_content = self.to_base64()
    return f"data:{self.mime_type};base64,{base64_content}"

to_dict(model)

Convert to a dictionary suitable for API requests. Tested only for PDF files.

Returns:

Type Description
Dict[str, Any]

Dictionary with file data

Source code in langroid/parsing/file_attachment.py
def to_dict(self, model: str) -> Dict[str, Any]:
    """
    Convert to a dictionary suitable for API requests.
    Tested only for PDF files.

    Returns:
        Dictionary with file data
    """
    if (
        self.mime_type
        and self.mime_type.startswith("image/")
        or "gemini" in model.lower()
    ):
        # for gemini models, we use `image_url` for both pdf-files and images

        image_url_dict = {}

        # If we have a URL and it's a full http/https URL, use it directly
        if self.url and (
            self.url.startswith("http://") or self.url.startswith("https://")
        ):
            image_url_dict["url"] = self.url
        # Otherwise use base64 data URI
        else:
            image_url_dict["url"] = self.to_data_uri()

        # Add detail parameter if specified
        if self.detail:
            image_url_dict["detail"] = self.detail

        return dict(
            type="image_url",
            image_url=image_url_dict,
        )
    else:
        # For non-image files
        return dict(
            type="file",
            file=dict(
                filename=self.filename,
                file_data=self.to_data_uri(),
            ),
        )