Skip to content

openai_gpt

langroid/language_models/openai_gpt.py

OpenAICallParams

Bases: BaseModel

Various params that can be sent to an OpenAI API chat-completion call. When specified, any param here overrides the one with same name in the OpenAIGPTConfig. See OpenAI API Reference for details on the params: https://platform.openai.com/docs/api-reference/chat

LiteLLMProxyConfig

Bases: BaseSettings

Configuration for LiteLLM proxy connection.

OpenAIGPTConfig(**kwargs)

Bases: LLMConfig

Class for any LLM with an OpenAI-like API: besides the OpenAI models this includes: (a) locally-served models behind an OpenAI-compatible API (b) non-local models, using a proxy adaptor lib like litellm that provides an OpenAI-compatible API. (We could rename this class to OpenAILikeConfig, but we keep it as-is for now)

Important Note: Due to the env_prefix = "OPENAI_" defined below, all of the fields below can be set AND OVERRIDDEN via env vars,

by upper-casing the name and prefixing with OPENAI_, e.g.

OPENAI_MAX_OUTPUT_TOKENS=1000.

If any of these is defined in this way in the environment

(either via explicit setenv or export or via .env file + load_dotenv()),

the environment variable takes precedence over the value in the config.

Source code in langroid/language_models/openai_gpt.py
def __init__(self, **kwargs) -> None:  # type: ignore
    local_model = "api_base" in kwargs and kwargs["api_base"] is not None

    chat_model = kwargs.get("chat_model", "")
    local_prefixes = ["local/", "litellm/", "ollama/", "vllm/", "llamacpp/"]
    if any(chat_model.startswith(prefix) for prefix in local_prefixes):
        local_model = True

    warn_gpt_3_5 = (
        "chat_model" not in kwargs.keys()
        and not local_model
        and default_openai_chat_model == OpenAIChatModel.GPT3_5_TURBO
    )

    if warn_gpt_3_5:
        existing_hook = kwargs.get("run_on_first_use", noop)

        def with_warning() -> None:
            existing_hook()
            gpt_3_5_warning()

        kwargs["run_on_first_use"] = with_warning

    super().__init__(**kwargs)

model_copy(*, update=None, deep=False)

Copy config while preserving nested model instances and subclasses.

Important: Avoid reconstructing via model_dump as that coerces nested models to their annotated base types (dropping subclass-only fields). Instead, defer to Pydantic's native model_copy, which keeps nested BaseModel instances (and their concrete subclasses) intact.

Source code in langroid/language_models/openai_gpt.py
def model_copy(
    self, *, update: Mapping[str, Any] | None = None, deep: bool = False
) -> "OpenAIGPTConfig":
    """
    Copy config while preserving nested model instances and subclasses.

    Important: Avoid reconstructing via `model_dump` as that coerces nested
    models to their annotated base types (dropping subclass-only fields).
    Instead, defer to Pydantic's native `model_copy`, which keeps nested
    `BaseModel` instances (and their concrete subclasses) intact.
    """
    # Delegate to BaseSettings/BaseModel implementation to preserve types
    return super().model_copy(update=update, deep=deep)  # type: ignore[return-value]

create(prefix) classmethod

Create a config class whose params can be set via a desired prefix from the .env file or env vars. E.g., using

OllamaConfig = OpenAIGPTConfig.create("ollama")
ollama_config = OllamaConfig()
you can have a group of params prefixed by "OLLAMA_", to be used with models served via ollama. This way, you can maintain several setting-groups in your .env file, one per model type.

Source code in langroid/language_models/openai_gpt.py
@classmethod
def create(cls, prefix: str) -> Type["OpenAIGPTConfig"]:
    """Create a config class whose params can be set via a desired
    prefix from the .env file or env vars.
    E.g., using
    ```python
    OllamaConfig = OpenAIGPTConfig.create("ollama")
    ollama_config = OllamaConfig()
    ```
    you can have a group of params prefixed by "OLLAMA_", to be used
    with models served via `ollama`.
    This way, you can maintain several setting-groups in your .env file,
    one per model type.
    """

    class DynamicConfig(OpenAIGPTConfig):
        pass

    DynamicConfig.model_config = SettingsConfigDict(env_prefix=prefix.upper() + "_")
    return DynamicConfig

OpenAIResponse

Bases: BaseModel

OpenAI response model, either completion or chat.

OpenAIGPT(config=OpenAIGPTConfig())

Bases: LanguageModel

Class for OpenAI LLMs

Source code in langroid/language_models/openai_gpt.py
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
def __init__(self, config: OpenAIGPTConfig = OpenAIGPTConfig()):
    """
    Args:
        config: configuration for openai-gpt model
    """
    # copy the config to avoid modifying the original; deep to decouple
    # nested models while preserving their concrete subclasses
    config = config.model_copy(deep=True)
    super().__init__(config)
    self.config: OpenAIGPTConfig = config
    # save original model name such as `provider/model` before
    # we strip out the `provider` - we retain the original in
    # case some params are specific to a provider.
    self.chat_model_orig = self.config.chat_model_orig or self.config.chat_model

    # Run the first time the model is used
    self.run_on_first_use = cache(self.config.run_on_first_use)

    # global override of chat_model,
    # to allow quick testing with other models
    if settings.chat_model != "":
        self.config.chat_model = settings.chat_model
        self.chat_model_orig = settings.chat_model
        self.config.completion_model = settings.chat_model

    if len(parts := self.config.chat_model.split("//")) > 1:
        # there is a formatter specified, e.g.
        # "litellm/ollama/mistral//hf" or
        # "local/localhost:8000/v1//mistral-instruct-v0.2"
        formatter = parts[1]
        self.config.chat_model = parts[0]
        if formatter == "hf":
            # e.g. "litellm/ollama/mistral//hf" -> "litellm/ollama/mistral"
            formatter = find_hf_formatter(self.config.chat_model)
            if formatter != "":
                # e.g. "mistral"
                self.config.formatter = formatter
                logging.warning(
                    f"""
                    Using completions (not chat) endpoint with HuggingFace
                    chat_template for {formatter} for
                    model {self.config.chat_model}
                    """
                )
        else:
            # e.g. "local/localhost:8000/v1//mistral-instruct-v0.2"
            self.config.formatter = formatter

    if self.config.formatter is not None:
        self.config.hf_formatter = HFFormatter(
            HFPromptFormatterConfig(model_name=self.config.formatter)
        )

    self.supports_json_schema: bool = self.config.supports_json_schema or False
    self.supports_strict_tools: bool = self.config.supports_strict_tools or False

    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", DUMMY_API_KEY)
    self.api_key = config.api_key

    # if model name starts with "litellm",
    # set the actual model name by stripping the "litellm/" prefix
    # and set the litellm flag to True
    if self.config.chat_model.startswith("litellm/") or self.config.litellm:
        # e.g. litellm/ollama/mistral
        self.config.litellm = True
        self.api_base = self.config.api_base
        if self.config.chat_model.startswith("litellm/"):
            # strip the "litellm/" prefix
            # e.g. litellm/ollama/llama2 => ollama/llama2
            self.config.chat_model = self.config.chat_model.split("/", 1)[1]
    elif self.config.chat_model.startswith("local/"):
        # expect this to be of the form "local/localhost:8000/v1",
        # depending on how the model is launched locally.
        # In this case the model served locally behind an OpenAI-compatible API
        # so we can just use `openai.*` methods directly,
        # and don't need a adaptor library like litellm
        self.config.litellm = False
        self.config.seed = None  # some models raise an error when seed is set
        # Extract the api_base from the model name after the "local/" prefix
        self.api_base = self.config.chat_model.split("/", 1)[1]
        if not self.api_base.startswith("http"):
            self.api_base = "http://" + self.api_base
    elif self.config.chat_model.startswith("ollama/"):
        self.config.ollama = True

        # use api_base from config if set, else fall back on OLLAMA_BASE_URL
        self.api_base = self.config.api_base or OLLAMA_BASE_URL
        if self.api_key == OPENAI_API_KEY:
            self.api_key = OLLAMA_API_KEY
        self.config.chat_model = self.config.chat_model.replace("ollama/", "")
    elif self.config.chat_model.startswith("vllm/"):
        self.supports_json_schema = True
        self.config.chat_model = self.config.chat_model.replace("vllm/", "")
        if self.api_key == OPENAI_API_KEY:
            self.api_key = os.environ.get("VLLM_API_KEY", DUMMY_API_KEY)
        self.api_base = self.config.api_base or "http://localhost:8000/v1"
        if not self.api_base.startswith("http"):
            self.api_base = "http://" + self.api_base
        if not self.api_base.endswith("/v1"):
            self.api_base = self.api_base + "/v1"
    elif self.config.chat_model.startswith("llamacpp/"):
        self.supports_json_schema = True
        self.api_base = self.config.chat_model.split("/", 1)[1]
        if not self.api_base.startswith("http"):
            self.api_base = "http://" + self.api_base
        if self.api_key == OPENAI_API_KEY:
            self.api_key = os.environ.get("LLAMA_API_KEY", DUMMY_API_KEY)
    else:
        self.api_base = self.config.api_base
        # If api_base is unset we use OpenAI's endpoint, which supports
        # these features (with JSON schema restricted to a limited set of models)
        self.supports_strict_tools = self.api_base is None
        self.supports_json_schema = (
            self.api_base is None and self.info().has_structured_output
        )

    if settings.chat_model != "":
        # if we're overriding chat model globally, set completion model to same
        self.config.completion_model = self.config.chat_model

    if self.config.formatter is not None:
        # we want to format chats -> completions using this specific formatter
        self.config.use_completion_for_chat = True
        self.config.completion_model = self.config.chat_model

    if self.config.use_completion_for_chat:
        self.config.use_chat_for_completion = False

    self.is_groq = self.config.chat_model.startswith("groq/")
    self.is_cerebras = self.config.chat_model.startswith("cerebras/")
    self.is_gemini = self.is_gemini_model()
    self.is_deepseek = self.is_deepseek_model()
    self.is_minimax = self.is_minimax_model()
    self.is_glhf = self.config.chat_model.startswith("glhf/")
    self.is_openrouter = self.config.chat_model.startswith("openrouter/")
    self.is_langdb = self.config.chat_model.startswith("langdb/")
    self.is_portkey = self.config.chat_model.startswith("portkey/")
    self.is_litellm_proxy = self.config.chat_model.startswith("litellm-proxy/")

    if self.is_groq:
        # use groq-specific client
        self.config.chat_model = self.config.chat_model.replace("groq/", "")
        if self.api_key == OPENAI_API_KEY:
            self.api_key = os.getenv("GROQ_API_KEY", DUMMY_API_KEY)
        if self.config.use_cached_client:
            self.client = get_groq_client(api_key=self.api_key)
            self.async_client = get_async_groq_client(api_key=self.api_key)
        else:
            # Create new clients without caching
            self.client = Groq(api_key=self.api_key)
            self.async_client = AsyncGroq(api_key=self.api_key)
    elif self.is_cerebras:
        # use cerebras-specific client
        self.config.chat_model = self.config.chat_model.replace("cerebras/", "")
        if self.api_key == OPENAI_API_KEY:
            self.api_key = os.getenv("CEREBRAS_API_KEY", DUMMY_API_KEY)
        if self.config.use_cached_client:
            self.client = get_cerebras_client(api_key=self.api_key)
            # TODO there is not async client, so should we do anything here?
            self.async_client = get_async_cerebras_client(api_key=self.api_key)
        else:
            # Create new clients without caching
            self.client = Cerebras(api_key=self.api_key)
            self.async_client = AsyncCerebras(api_key=self.api_key)
    else:
        # in these cases, there's no specific client: OpenAI python client suffices
        if self.is_litellm_proxy:
            self.config.chat_model = self.config.chat_model.replace(
                "litellm-proxy/", ""
            )
            if self.api_key == OPENAI_API_KEY:
                self.api_key = self.config.litellm_proxy.api_key or self.api_key
            self.api_base = self.config.litellm_proxy.api_base or self.api_base
        elif self.is_gemini:
            self.config.chat_model = self.config.chat_model.replace("gemini/", "")
            if self.api_key == OPENAI_API_KEY:
                self.api_key = os.getenv("GEMINI_API_KEY", DUMMY_API_KEY)
            # Use GEMINI_API_BASE env var if set (e.g. for Vertex AI),
            # then config.api_base only if explicitly set by the user
            # (not inherited from OPENAI_API_BASE via env_prefix),
            # then fall back to the default Gemini endpoint.
            gemini_api_base = os.getenv("GEMINI_API_BASE", "")
            openai_api_base = os.getenv("OPENAI_API_BASE")
            explicit_api_base = (
                self.config.api_base
                if self.config.api_base and self.config.api_base != openai_api_base
                else None
            )
            self.api_base = gemini_api_base or explicit_api_base or GEMINI_BASE_URL
        elif self.is_glhf:
            self.config.chat_model = self.config.chat_model.replace("glhf/", "")
            if self.api_key == OPENAI_API_KEY:
                self.api_key = os.getenv("GLHF_API_KEY", DUMMY_API_KEY)
            self.api_base = GLHF_BASE_URL
        elif self.is_openrouter:
            self.config.chat_model = self.config.chat_model.replace(
                "openrouter/", ""
            )
            if self.api_key == OPENAI_API_KEY:
                self.api_key = os.getenv("OPENROUTER_API_KEY", DUMMY_API_KEY)
            self.api_base = OPENROUTER_BASE_URL
        elif self.is_deepseek:
            self.config.chat_model = self.config.chat_model.replace("deepseek/", "")
            self.api_base = DEEPSEEK_BASE_URL
            if self.api_key == OPENAI_API_KEY:
                self.api_key = os.getenv("DEEPSEEK_API_KEY", DUMMY_API_KEY)
        elif self.is_minimax:
            self.config.chat_model = self.config.chat_model.replace("minimax/", "")
            # Honor caller-supplied base URL (e.g. regional endpoints,
            # proxies) instead of always forcing the default.
            openai_api_base = os.getenv("OPENAI_API_BASE")
            explicit_api_base = (
                self.config.api_base
                if self.config.api_base and self.config.api_base != openai_api_base
                else None
            )
            self.api_base = explicit_api_base or MINIMAX_BASE_URL
            if self.api_key == OPENAI_API_KEY:
                # Only overwrite with MINIMAX_API_KEY when it is actually
                # set, so users who intentionally put their MiniMax key in
                # OPENAI_API_KEY are not silently downgraded to a dummy key.
                minimax_key = os.getenv("MINIMAX_API_KEY", "")
                if minimax_key:
                    self.api_key = minimax_key
            # Recompute capabilities now that the prefix has been stripped
            # and self.info() can find the model in MODEL_INFO.
            self.supports_strict_tools = True
            self.supports_json_schema = self.info().has_structured_output
        elif self.is_langdb:
            self.config.chat_model = self.config.chat_model.replace("langdb/", "")
            self.api_base = self.config.langdb_params.base_url
            project_id = self.config.langdb_params.project_id
            if project_id:
                self.api_base += "/" + project_id + "/v1"
            if self.api_key == OPENAI_API_KEY:
                self.api_key = self.config.langdb_params.api_key or DUMMY_API_KEY

            if self.config.langdb_params:
                params = self.config.langdb_params
                if params.project_id:
                    self.config.headers["x-project-id"] = params.project_id
                if params.label:
                    self.config.headers["x-label"] = params.label
                if params.run_id:
                    self.config.headers["x-run-id"] = params.run_id
                if params.thread_id:
                    self.config.headers["x-thread-id"] = params.thread_id
        elif self.is_portkey:
            # Parse the model string and extract provider/model
            provider, model = self.config.portkey_params.parse_model_string(
                self.config.chat_model
            )
            self.config.chat_model = model
            if provider:
                self.config.portkey_params.provider = provider

            # Set Portkey base URL
            self.api_base = self.config.portkey_params.base_url + "/v1"

            # Set API key - use provider's API key from env if available
            if self.api_key == OPENAI_API_KEY:
                self.api_key = self.config.portkey_params.get_provider_api_key(
                    self.config.portkey_params.provider, DUMMY_API_KEY
                )

            # Add Portkey-specific headers
            self.config.headers.update(self.config.portkey_params.get_headers())

        # Sanitize the API key: strip leading/trailing whitespace
        # (including stray newlines from .env files or CI secrets).
        self.api_key = self.api_key.strip()

        # Create http_client if needed - Priority order:
        # 1. http_client_factory (most flexibility, not cacheable)
        # 2. http_client_config (cacheable, moderate flexibility)
        # 3. http_verify_ssl=False (cacheable, simple SSL bypass)
        http_client = None
        async_http_client = None
        http_client_config_used = None

        if self.config.http_client_factory is not None:
            # Use the factory to create http_client (not cacheable)
            http_client = self.config.http_client_factory()
            if isinstance(http_client, (list, tuple)):
                if len(http_client) != 2:
                    raise ValueError(
                        "http_client_factory must return either a single "
                        "httpx.Client or a tuple of "
                        "(httpx.Client, httpx.AsyncClient)"
                    )
                http_client, async_http_client = http_client
            else:
                # set async_http_client to None - so that it will
                # be created later
                async_http_client = None
        elif self.config.http_client_config is not None:
            # Use config dict (cacheable)
            http_client_config_used = self.config.http_client_config
        elif not self.config.http_verify_ssl:
            # Simple SSL bypass (cacheable)
            http_client_config_used = {"verify": False}
            logging.warning(
                "SSL verification has been disabled. This is insecure and "
                "should only be used in trusted environments (e.g., "
                "corporate networks with self-signed certificates)."
            )

        if self.config.use_cached_client:
            self.client = get_openai_client(
                api_key=self.api_key,
                base_url=self.api_base,
                organization=self.config.organization,
                timeout=Timeout(self.config.timeout),
                default_headers=self.config.headers,
                http_client=http_client,
                http_client_config=http_client_config_used,
            )
            self.async_client = get_async_openai_client(
                api_key=self.api_key,
                base_url=self.api_base,
                organization=self.config.organization,
                timeout=Timeout(self.config.timeout),
                default_headers=self.config.headers,
                http_client=async_http_client,
                http_client_config=http_client_config_used,
            )
        else:
            # Create new clients without caching
            client_kwargs: Dict[str, Any] = dict(
                api_key=self.api_key,
                base_url=self.api_base,
                organization=self.config.organization,
                timeout=Timeout(self.config.timeout),
                default_headers=self.config.headers,
            )
            if http_client is not None:
                client_kwargs["http_client"] = http_client
            elif http_client_config_used is not None:
                # Create http_client from config for non-cached scenario
                try:
                    from httpx import Client

                    client_kwargs["http_client"] = Client(**http_client_config_used)
                except ImportError:
                    raise ValueError(
                        "httpx is required to use http_client_config. "
                        "Install it with: pip install httpx"
                    )
            self.client = OpenAI(**client_kwargs)

            async_client_kwargs: Dict[str, Any] = dict(
                api_key=self.api_key,
                base_url=self.api_base,
                organization=self.config.organization,
                timeout=Timeout(self.config.timeout),
                default_headers=self.config.headers,
            )
            if async_http_client is not None:
                async_client_kwargs["http_client"] = async_http_client
            elif http_client_config_used is not None:
                # Create async http_client from config for non-cached scenario
                try:
                    from httpx import AsyncClient

                    async_client_kwargs["http_client"] = AsyncClient(
                        **http_client_config_used
                    )
                except ImportError:
                    raise ValueError(
                        "httpx is required to use http_client_config. "
                        "Install it with: pip install httpx"
                    )
            self.async_client = AsyncOpenAI(**async_client_kwargs)

    self.cache: CacheDB | None = None
    use_cache = self.config.cache_config is not None
    if "redis" in settings.cache_type and use_cache:
        if config.cache_config is None or not isinstance(
            config.cache_config,
            RedisCacheConfig,
        ):
            # switch to fresh redis config if needed
            config.cache_config = RedisCacheConfig(
                fake="fake" in settings.cache_type
            )
        if "fake" in settings.cache_type:
            # force use of fake redis if global cache_type is "fakeredis"
            config.cache_config.fake = True
        self.cache = RedisCache(config.cache_config)
    elif settings.cache_type != "none" and use_cache:
        raise ValueError(
            f"Invalid cache type {settings.cache_type}. "
            "Valid types are redis, fakeredis, none"
        )

    self.config._validate_litellm()

is_gemini_model()

Are we using the gemini OpenAI-compatible API?

Source code in langroid/language_models/openai_gpt.py
def is_gemini_model(self) -> bool:
    """Are we using the gemini OpenAI-compatible API?"""
    return self.chat_model_orig.startswith("gemini/")

is_minimax_model()

Are we using the MiniMax OpenAI-compatible API?

Source code in langroid/language_models/openai_gpt.py
def is_minimax_model(self) -> bool:
    """Are we using the MiniMax OpenAI-compatible API?"""
    minimax_models = [e.value for e in MiniMaxModel]
    return (
        self.chat_model_orig in minimax_models
        or self.chat_model_orig.startswith("minimax/")
    )

unsupported_params()

List of params that are not supported by the current model

Source code in langroid/language_models/openai_gpt.py
def unsupported_params(self) -> List[str]:
    """
    List of params that are not supported by the current model
    """
    unsupported = set(self.info().unsupported_params)
    return list(unsupported)

rename_params()

Map of param name -> new name for specific models. Currently main troublemaker is o1* series.

Source code in langroid/language_models/openai_gpt.py
def rename_params(self) -> Dict[str, str]:
    """
    Map of param name -> new name for specific models.
    Currently main troublemaker is o1* series.
    """
    return self.info().rename_params

chat_context_length()

Context-length for chat-completion models/endpoints. Get it from the config if explicitly given, otherwise use model_info based on model name, and fall back to generic model_info if there's no match.

Source code in langroid/language_models/openai_gpt.py
def chat_context_length(self) -> int:
    """
    Context-length for chat-completion models/endpoints.
    Get it from the config if explicitly given,
     otherwise use model_info based on model name, and fall back to
     generic model_info if there's no match.
    """
    return self.config.chat_context_length or self.info().context_length

completion_context_length()

Context-length for completion models/endpoints. Get it from the config if explicitly given, otherwise use model_info based on model name, and fall back to generic model_info if there's no match.

Source code in langroid/language_models/openai_gpt.py
def completion_context_length(self) -> int:
    """
    Context-length for completion models/endpoints.
    Get it from the config if explicitly given,
     otherwise use model_info based on model name, and fall back to
     generic model_info if there's no match.
    """
    return (
        self.config.completion_context_length
        or self.completion_info().context_length
    )

chat_cost()

(Prompt, Cached, Generation) cost per 1000 tokens, for chat-completion models/endpoints. Get it from the dict, otherwise fail-over to general method

Source code in langroid/language_models/openai_gpt.py
def chat_cost(self) -> Tuple[float, float, float]:
    """
    (Prompt, Cached, Generation) cost per 1000 tokens, for chat-completion
    models/endpoints.
    Get it from the dict, otherwise fail-over to general method
    """
    info = self.info()
    cached_cost_per_million = info.cached_cost_per_million
    if not cached_cost_per_million:
        cached_cost_per_million = info.input_cost_per_million
    return (
        info.input_cost_per_million / 1000,
        cached_cost_per_million / 1000,
        info.output_cost_per_million / 1000,
    )

set_stream(stream)

Enable or disable streaming output from API. Args: stream: enable streaming output from API Returns: previous value of stream

Source code in langroid/language_models/openai_gpt.py
def set_stream(self, stream: bool) -> bool:
    """Enable or disable streaming output from API.
    Args:
        stream: enable streaming output from API
    Returns: previous value of stream
    """
    tmp = self.config.stream
    self.config.stream = stream
    return tmp

get_stream()

Get streaming status.

Source code in langroid/language_models/openai_gpt.py
def get_stream(self) -> bool:
    """Get streaming status."""
    return self.config.stream and settings.stream and self.info().allows_streaming

tool_deltas_to_tools(tools) staticmethod

Convert accumulated tool-call deltas to OpenAIToolCall objects. Adapted from this excellent code: https://community.openai.com/t/help-for-function-calls-with-streaming/627170/2

Parameters:

Name Type Description Default
tools List[Dict[str, Any]]

list of tool deltas received from streaming API

required

Returns:

Name Type Description
str str

plain text corresponding to tool calls that failed to parse

List[OpenAIToolCall]

List[OpenAIToolCall]: list of OpenAIToolCall objects

List[Dict[str, Any]]

List[Dict[str, Any]]: list of tool dicts (to reconstruct OpenAI API response, so it can be cached)

Source code in langroid/language_models/openai_gpt.py
@staticmethod
def tool_deltas_to_tools(
    tools: List[Dict[str, Any]],
) -> Tuple[
    str,
    List[OpenAIToolCall],
    List[Dict[str, Any]],
]:
    """
    Convert accumulated tool-call deltas to OpenAIToolCall objects.
    Adapted from this excellent code:
     https://community.openai.com/t/help-for-function-calls-with-streaming/627170/2

    Args:
        tools: list of tool deltas received from streaming API

    Returns:
        str: plain text corresponding to tool calls that failed to parse
        List[OpenAIToolCall]: list of OpenAIToolCall objects
        List[Dict[str, Any]]: list of tool dicts
            (to reconstruct OpenAI API response, so it can be cached)
    """
    # Initialize a dictionary with default values

    # idx -> dict repr of tool
    # (used to simulate OpenAIResponse object later, and also to
    # accumulate function args as strings)
    idx2tool_dict: Dict[str, Dict[str, Any]] = defaultdict(
        lambda: {
            "id": None,
            "function": {"arguments": "", "name": None},
            "type": None,
            "extra_content": None,
        }
    )

    for tool_delta in tools:
        if tool_delta["id"] is not None:
            idx2tool_dict[tool_delta["index"]]["id"] = tool_delta["id"]

        if tool_delta["function"]["name"] is not None:
            idx2tool_dict[tool_delta["index"]]["function"]["name"] = tool_delta[
                "function"
            ]["name"]

        idx2tool_dict[tool_delta["index"]]["function"]["arguments"] += tool_delta[
            "function"
        ]["arguments"]

        if tool_delta["type"] is not None:
            idx2tool_dict[tool_delta["index"]]["type"] = tool_delta["type"]

        if tool_delta.get("extra_content") is not None:
            idx2tool_dict[tool_delta["index"]]["extra_content"] = tool_delta[
                "extra_content"
            ]

    # (try to) parse the fn args of each tool
    contents: List[str] = []
    good_indices = []
    id2args: Dict[str, None | Dict[str, Any]] = {}
    for idx, tool_dict in idx2tool_dict.items():
        failed_content, args_dict = OpenAIGPT._parse_function_args(
            tool_dict["function"]["arguments"]
        )
        # used to build tool_calls_list below
        id2args[tool_dict["id"]] = args_dict or None  # if {}, store as None
        if failed_content != "":
            contents.append(failed_content)
        else:
            good_indices.append(idx)

    # remove the failed tool calls
    idx2tool_dict = {
        idx: tool_dict
        for idx, tool_dict in idx2tool_dict.items()
        if idx in good_indices
    }

    # create OpenAIToolCall list
    tool_calls_list = [
        OpenAIToolCall(
            id=tool_dict["id"],
            function=LLMFunctionCall(
                name=tool_dict["function"]["name"],
                arguments=id2args.get(tool_dict["id"]),
            ),
            type=tool_dict["type"],
            extra_content=tool_dict.get("extra_content"),
        )
        for tool_dict in idx2tool_dict.values()
    ]
    return "\n".join(contents), tool_calls_list, list(idx2tool_dict.values())

noop()

Does nothing.

Source code in langroid/language_models/openai_gpt.py
def noop() -> None:
    """Does nothing."""
    return None

litellm_logging_fn(model_call_dict)

Logging function for litellm

Source code in langroid/language_models/openai_gpt.py
def litellm_logging_fn(model_call_dict: Dict[str, Any]) -> None:
    """Logging function for litellm"""
    try:
        api_input_dict = model_call_dict.get("additional_args", {}).get(
            "complete_input_dict"
        )
        if api_input_dict is not None:
            text = escape(json.dumps(api_input_dict, indent=2))
            print(
                f"[grey37]LITELLM: {text}[/grey37]",
            )
    except Exception:
        pass