Skip to main content

All settings

environment_variables: {}

model_list:
- model_name: string
litellm_params: {}
model_info:
id: string
mode: embedding
input_cost_per_token: 0
output_cost_per_token: 0
max_tokens: 2048
base_model: gpt-4-1106-preview
additionalProp1: {}

litellm_settings:
# Logging/Callback settings
success_callback: ["langfuse"] # list of success callbacks
failure_callback: ["sentry"] # list of failure callbacks
callbacks: ["otel"] # list of callbacks - runs on success and failure
service_callbacks: ["datadog", "prometheus"] # logs redis, postgres failures on datadog, prometheus
turn_off_message_logging: boolean # prevent the messages and responses from being logged to on your callbacks, but request metadata will still be logged.
redact_user_api_key_info: boolean # Redact information about the user api key (hashed token, user_id, team id, etc.), from logs. Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
langfuse_default_tags: ["cache_hit", "cache_key", "proxy_base_url", "user_api_key_alias", "user_api_key_user_id", "user_api_key_user_email", "user_api_key_team_alias", "semantic-similarity", "proxy_base_url"] # default tags for Langfuse Logging

# Networking settings
request_timeout: 10 # (int) llm requesttimeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
force_ipv4: boolean # If true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API

set_verbose: boolean # sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
json_logs: boolean # if true, logs will be in json format

# Fallbacks, reliability
default_fallbacks: ["claude-opus"] # set default_fallbacks, in case a specific model group is misconfigured / bad.
content_policy_fallbacks: [{"gpt-3.5-turbo-small": ["claude-opus"]}] # fallbacks for ContentPolicyErrors
context_window_fallbacks: [{"gpt-3.5-turbo-small": ["gpt-3.5-turbo-large", "claude-opus"]}] # fallbacks for ContextWindowExceededErrors



# Caching settings
cache: true
cache_params: # set cache params for redis
type: redis # type of cache to initialize

# Optional - Redis Settings
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
port: 6379 # The port number for the Redis cache. Required if type is "redis".
password: "your_password" # The password for the Redis cache. Required if type is "redis".
namespace: "litellm.caching.caching" # namespace for redis cache

# Optional - Redis Cluster Settings
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]

# Optional - Redis Sentinel Settings
service_name: "mymaster"
sentinel_nodes: [["localhost", 26379]]

# Optional - Qdrant Semantic Cache Settings
qdrant_semantic_cache_embedding_model: openai-embedding # the model should be defined on the model_list
qdrant_collection_name: test_collection
qdrant_quantization_config: binary
similarity_threshold: 0.8 # similarity threshold for semantic cache

# Optional - S3 Cache Settings
s3_bucket_name: cache-bucket-litellm # AWS Bucket Name for S3
s3_region_name: us-west-2 # AWS Region Name for S3
s3_aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID # us os.environ/<variable name> to pass environment variables. This is AWS Access Key ID for S3
s3_aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY # AWS Secret Access Key for S3
s3_endpoint_url: https://s3.amazonaws.com # [OPTIONAL] S3 endpoint URL, if you want to use Backblaze/cloudflare s3 bucket

# Common Cache settings
# Optional - Supported call types for caching
supported_call_types: ["acompletion", "atext_completion", "aembedding", "atranscription"]
# /chat/completions, /completions, /embeddings, /audio/transcriptions
mode: default_off # if default_off, you need to opt in to caching on a per call basis
ttl: 600 # ttl for caching


callback_settings:
otel:
message_logging: boolean # OTEL logging callback specific settings

general_settings:
completion_model: string
disable_spend_logs: boolean # turn off writing each transaction to the db
disable_master_key_return: boolean # turn off returning master key on UI (checked on '/user/info' endpoint)
disable_retry_on_max_parallel_request_limit_error: boolean # turn off retries when max parallel request limit is reached
disable_reset_budget: boolean # turn off reset budget scheduled task
disable_adding_master_key_hash_to_db: boolean # turn off storing master key hash in db, for spend tracking
enable_jwt_auth: boolean # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
enforce_user_param: boolean # requires all openai endpoint requests to have a 'user' param
allowed_routes: ["route1", "route2"] # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
key_management_system: google_kms # either google_kms or azure_kms
master_key: string

# Database Settings
database_url: string
database_connection_pool_limit: 0 # default 100
database_connection_timeout: 0 # default 60s
allow_requests_on_db_unavailable: boolean # if true, will allow requests that can not connect to the DB to verify Virtual Key to still work

custom_auth: string
max_parallel_requests: 0 # the max parallel requests allowed per deployment
global_max_parallel_requests: 0 # the max parallel requests allowed on the proxy all up
infer_model_from_keys: true
background_health_checks: true
health_check_interval: 300
alerting: ["slack", "email"]
alerting_threshold: 0
use_client_credentials_pass_through_routes: boolean # use client credentials for all pass through routes like "/vertex-ai", /bedrock/. When this is True Virtual Key auth will not be applied on these endpoints

litellm_settings - Reference​

NameTypeDescription
success_callbackarray of stringsList of success callbacks. Doc Proxy logging callbacks, Doc Metrics
failure_callbackarray of stringsList of failure callbacks Doc Proxy logging callbacks, Doc Metrics
callbacksarray of stringsList of callbacks - runs on success and failure Doc Proxy logging callbacks, Doc Metrics
service_callbacksarray of stringsSystem health monitoring - Logs redis, postgres failures on specified services (e.g. datadog, prometheus) Doc Metrics
turn_off_message_loggingbooleanIf true, prevents messages and responses from being logged to callbacks, but request metadata will still be logged Proxy Logging
modify_paramsbooleanIf true, allows modifying the parameters of the request before it is sent to the LLM provider
enable_preview_featuresbooleanIf true, enables preview features - e.g. Azure O1 Models with streaming support.
redact_user_api_key_infobooleanIf true, redacts information about the user api key from logs Proxy Logging
langfuse_default_tagsarray of stringsDefault tags for Langfuse Logging. Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields as tags. Further docs
set_verbosebooleanIf true, sets litellm.set_verbose=True to view verbose debug logs. DO NOT LEAVE THIS ON IN PRODUCTION
json_logsbooleanIf true, logs will be in json format. If you need to store the logs as JSON, just set the litellm.json_logs = True. We currently just log the raw POST request from litellm as a JSON Further docs
default_fallbacksarray of stringsList of fallback models to use if a specific model group is misconfigured / bad. Further docs
request_timeoutintegerThe timeout for requests in seconds. If not set, the default value is 6000 seconds. For reference OpenAI Python SDK defaults to 600 seconds.
force_ipv4booleanIf true, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6 + Anthropic API
content_policy_fallbacksarray of objectsFallbacks to use when a ContentPolicyViolationError is encountered. Further docs
context_window_fallbacksarray of objectsFallbacks to use when a ContextWindowExceededError is encountered. Further docs
cachebooleanIf true, enables caching. Further docs
cache_paramsobjectParameters for the cache. Further docs
disable_end_user_cost_trackingbooleanIf true, turns off end user cost tracking on prometheus metrics + litellm spend logs table on proxy.
disable_end_user_cost_tracking_prometheus_onlybooleanIf true, turns off end user cost tracking on prometheus metrics only.
key_generation_settingsobjectRestricts who can generate keys. Further docs

general_settings - Reference​

NameTypeDescription
completion_modelstringThe default model to use for completions when model is not specified in the request
disable_spend_logsbooleanIf true, turns off writing each transaction to the database
disable_master_key_returnbooleanIf true, turns off returning master key on UI. (checked on '/user/info' endpoint)
disable_retry_on_max_parallel_request_limit_errorbooleanIf true, turns off retries when max parallel request limit is reached
disable_reset_budgetbooleanIf true, turns off reset budget scheduled task
disable_adding_master_key_hash_to_dbbooleanIf true, turns off storing master key hash in db
enable_jwt_authbooleanallow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims. Doc on JWT Tokens
enforce_user_parambooleanIf true, requires all OpenAI endpoint requests to have a 'user' param. Doc on call hooks
allowed_routesarray of stringsList of allowed proxy API routes a user can access Doc on controlling allowed routes
key_management_systemstringSpecifies the key management system. Doc Secret Managers
master_keystringThe master key for the proxy Set up Virtual Keys
database_urlstringThe URL for the database connection Set up Virtual Keys
database_connection_pool_limitintegerThe limit for database connection pool Setting DB Connection Pool limit
database_connection_timeoutintegerThe timeout for database connections in seconds Setting DB Connection Pool limit, timeout
allow_requests_on_db_unavailablebooleanIf true, allows requests to succeed even if DB is unreachable. Only use this if running LiteLLM in your VPC This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key
custom_authstringWrite your own custom authentication logic Doc Custom Auth
max_parallel_requestsintegerThe max parallel requests allowed per deployment
global_max_parallel_requestsintegerThe max parallel requests allowed on the proxy overall
infer_model_from_keysbooleanIf true, infers the model from the provided keys
background_health_checksbooleanIf true, enables background health checks. Doc on health checks
health_check_intervalintegerThe interval for health checks in seconds Doc on health checks
alertingarray of stringsList of alerting methods Doc on Slack Alerting
alerting_thresholdintegerThe threshold for triggering alerts Doc on Slack Alerting
use_client_credentials_pass_through_routesbooleanIf true, uses client credentials for all pass-through routes. Doc on pass through routes
health_check_detailsbooleanIf false, hides health check details (e.g. remaining rate limit). Doc on health checks
public_routesList[str](Enterprise Feature) Control list of public routes
alert_typesList[str]Control list of alert types to send to slack (Doc on alert types)[./alerting.md]
enforced_paramsList[str](Enterprise Feature) List of params that must be included in all requests to the proxy
enable_oauth2_authboolean(Enterprise Feature) If true, enables oauth2.0 authentication
use_x_forwarded_forstrIf true, uses the X-Forwarded-For header to get the client IP address
service_account_settingsList[Dict[str, Any]]Set service_account_settings if you want to create settings that only apply to service account keys (Doc on service accounts)[./service_accounts.md]
image_generation_modelstrThe default model to use for image generation - ignores model set in request
store_model_in_dbbooleanIf true, allows /model/new endpoint to store model information in db. Endpoint disabled by default. Doc on /model/new endpoint
max_request_size_mbintThe maximum size for requests in MB. Requests above this size will be rejected.
max_response_size_mbintThe maximum size for responses in MB. LLM Responses above this size will not be sent.
proxy_budget_rescheduler_min_timeintThe minimum time (in seconds) to wait before checking db for budget resets. Default is 597 seconds
proxy_budget_rescheduler_max_timeintThe maximum time (in seconds) to wait before checking db for budget resets. Default is 605 seconds
proxy_batch_write_atintTime (in seconds) to wait before batch writing spend logs to the db. Default is 10 seconds
alerting_argsdictArgs for Slack Alerting Doc on Slack Alerting
custom_key_generatestrCustom function for key generation Doc on custom key generation
allowed_ipsList[str]List of IPs allowed to access the proxy. If not set, all IPs are allowed.
embedding_modelstrThe default model to use for embeddings - ignores model set in request
default_team_disabledbooleanIf true, users cannot create 'personal' keys (keys with no team_id).
alert_to_webhook_urlDict[str]Specify a webhook url for each alert type.
key_management_settingsList[Dict[str, Any]]Settings for key management system (e.g. AWS KMS, Azure Key Vault) Doc on key management
allow_user_authboolean(Deprecated) old approach for user authentication.
user_api_key_cache_ttlintThe time (in seconds) to cache user api keys in memory.
disable_prisma_schema_updatebooleanIf true, turns off automatic schema updates to DB
litellm_key_header_namestrIf set, allows passing LiteLLM keys as a custom header. Doc on custom headers
moderation_modelstrThe default model to use for moderation.
custom_ssostrPath to a python file that implements custom SSO logic. Doc on custom SSO
allow_client_side_credentialsbooleanIf true, allows passing client side credentials to the proxy. (Useful when testing finetuning models) Doc on client side credentials
admin_only_routesList[str](Enterprise Feature) List of routes that are only accessible to admin users. Doc on admin only routes
use_azure_key_vaultbooleanIf true, load keys from azure key vault
use_google_kmsbooleanIf true, load keys from google kms
spend_report_frequencystrSpecify how often you want a Spend Report to be sent (e.g. "1d", "2d", "30d") More on this
ui_access_modeLiteral["admin_only"]If set, restricts access to the UI to admin users only. Docs
litellm_jwtauthDict[str, Any]Settings for JWT authentication. Docs
litellm_licensestrThe license key for the proxy. Docs
oauth2_config_mappingsDict[str, str]Define the OAuth2 config mappings
pass_through_endpointsList[Dict[str, Any]]Define the pass through endpoints. Docs
enable_oauth2_proxy_authboolean(Enterprise Feature) If true, enables oauth2.0 authentication
forward_openai_org_idbooleanIf true, forwards the OpenAI Organization ID to the backend LLM call (if it's OpenAI).
forward_client_headers_to_llm_apibooleanIf true, forwards the client headers (any x- headers) to the backend LLM call

router_settings - Reference​

info

Most values can also be set via litellm_settings. If you see overlapping values, settings on router_settings will override those on litellm_settings.

router_settings:
routing_strategy: usage-based-routing-v2 # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
redis_host: <your-redis-host> # string
redis_password: <your-redis-password> # string
redis_port: <your-redis-port> # string
enable_pre_call_check: true # bool - Before call is made check if a call is within model context window
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # (in seconds) how long to cooldown model if fails/min > allowed_fails
disable_cooldowns: True # bool - Disable cooldowns for all models
enable_tag_filtering: True # bool - Use tag based routing for requests
retry_policy: { # Dict[str, int]: retry policy for different types of exceptions
"AuthenticationErrorRetries": 3,
"TimeoutErrorRetries": 3,
"RateLimitErrorRetries": 3,
"ContentPolicyViolationErrorRetries": 4,
"InternalServerErrorRetries": 4
}
allowed_fails_policy: {
"BadRequestErrorAllowedFails": 1000, # Allow 1000 BadRequestErrors before cooling down a deployment
"AuthenticationErrorAllowedFails": 10, # int
"TimeoutErrorAllowedFails": 12, # int
"RateLimitErrorAllowedFails": 10000, # int
"ContentPolicyViolationErrorAllowedFails": 15, # int
"InternalServerErrorAllowedFails": 20, # int
}
content_policy_fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for content policy violations
fallbacks=[{"claude-2": ["my-fallback-model"]}] # List[Dict[str, List[str]]]: Fallback model for all errors
NameTypeDescription
routing_strategystringThe strategy used for routing requests. Options: "simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing". Default is "simple-shuffle". More information here
redis_hoststringThe host address for the Redis server. Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them
redis_passwordstringThe password for the Redis server. Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them
redis_portstringThe port number for the Redis server. Only set this if you have multiple instances of LiteLLM Proxy and want current tpm/rpm tracking to be shared across them
enable_pre_call_checkbooleanIf true, checks if a call is within the model's context window before making the call. More information here
content_policy_fallbacksarray of objectsSpecifies fallback models for content policy violations. More information here
fallbacksarray of objectsSpecifies fallback models for all types of errors. More information here
enable_tag_filteringbooleanIf true, uses tag based routing for requests Tag Based Routing
cooldown_timeintegerThe duration (in seconds) to cooldown a model if it exceeds the allowed failures.
disable_cooldownsbooleanIf true, disables cooldowns for all models. More information here
retry_policyobjectSpecifies the number of retries for different types of exceptions. More information here
allowed_failsintegerThe number of failures allowed before cooling down a model. More information here
allowed_fails_policyobjectSpecifies the number of allowed failures for different error types before cooling down a deployment. More information here
default_max_parallel_requestsOptional[int]The default maximum number of parallel requests for a deployment.
default_priority(Optional[int])The default priority for a request. Only for '.scheduler_acompletion()'. Default is None.
polling_interval(Optional[float])frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms.
max_fallbacksOptional[int]The maximum number of fallbacks to try before exiting the call. Defaults to 5.
default_litellm_paramsOptional[dict]The default litellm parameters to add to all requests (e.g. temperature, max_tokens).
timeoutOptional[float]The default timeout for a request.
debug_levelLiteral["DEBUG", "INFO"]The debug level for the logging library in the router. Defaults to "INFO".
client_ttlintTime-to-live for cached clients in seconds. Defaults to 3600.
cache_kwargsdictAdditional keyword arguments for the cache initialization.
routing_strategy_argsdictAdditional keyword arguments for the routing strategy - e.g. lowest latency routing default ttl
model_group_aliasdictModel group alias mapping. E.g. {"claude-3-haiku": "claude-3-haiku-20240229"}
num_retriesintNumber of retries for a request. Defaults to 3.
default_fallbacksOptional[List[str]]Fallbacks to try if no model group-specific fallbacks are defined.
caching_groupsOptional[List[tuple]]List of model groups for caching across model groups. Defaults to None. - e.g. caching_groups=[("openai-gpt-3.5-turbo", "azure-gpt-3.5-turbo")]
alerting_configAlertingConfig[SDK-only arg] Slack alerting configuration. Defaults to None. Further Docs
assistants_configAssistantsConfigSet on proxy via assistant_settings. Further docs
set_verbosebooleanDEPRECATED PARAM - see debug docs If true, sets the logging level to verbose.
retry_afterintTime to wait before retrying a request in seconds. Defaults to 0. If x-retry-after is received from LLM API, this value is overridden.
provider_budget_configProviderBudgetConfigProvider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. Further Docs
enable_pre_call_checksbooleanIf true, checks if a call is within the model's context window before making the call. More information here
model_group_retry_policyDict[str, RetryPolicy][SDK-only arg] Set retry policy for model groups.
context_window_fallbacksList[Dict[str, List[str]]]Fallback models for context window violations.
redis_urlstrURL for Redis server. Known performance issue with Redis URL.
cache_responsesbooleanFlag to enable caching LLM Responses, if cache set under router_settings. If true, caches responses. Defaults to False.
router_general_settingsRouterGeneralSettings[SDK-Only] Router general settings - contains optimizations like 'async_only_mode'. Docs

environment variables - Reference​

NameDescription
ACTIONS_ID_TOKEN_REQUEST_TOKENToken for requesting ID in GitHub Actions
ACTIONS_ID_TOKEN_REQUEST_URLURL for requesting ID token in GitHub Actions
AISPEND_ACCOUNT_IDAccount ID for AI Spend
AISPEND_API_KEYAPI Key for AI Spend
ALLOWED_EMAIL_DOMAINSList of email domains allowed for access
ARIZE_API_KEYAPI key for Arize platform integration
ARIZE_SPACE_KEYSpace key for Arize platform
ARGILLA_BATCH_SIZEBatch size for Argilla logging
ARGILLA_API_KEYAPI key for Argilla platform
ARGILLA_SAMPLING_RATESampling rate for Argilla logging
ARGILLA_DATASET_NAMEDataset name for Argilla logging
ARGILLA_BASE_URLBase URL for Argilla service
ATHINA_API_KEYAPI key for Athina service
AUTH_STRATEGYStrategy used for authentication (e.g., OAuth, API key)
AWS_ACCESS_KEY_IDAccess Key ID for AWS services
AWS_PROFILE_NAMEAWS CLI profile name to be used
AWS_REGION_NAMEDefault AWS region for service interactions
AWS_ROLE_NAMERole name for AWS IAM usage
AWS_SECRET_ACCESS_KEYSecret Access Key for AWS services
AWS_SESSION_NAMEName for AWS session
AWS_WEB_IDENTITY_TOKENWeb identity token for AWS
AZURE_API_VERSIONVersion of the Azure API being used
AZURE_AUTHORITY_HOSTAzure authority host URL
AZURE_CLIENT_IDClient ID for Azure services
AZURE_CLIENT_SECRETClient secret for Azure services
AZURE_FEDERATED_TOKEN_FILEFile path to Azure federated token
AZURE_KEY_VAULT_URIURI for Azure Key Vault
AZURE_TENANT_IDTenant ID for Azure Active Directory
BERRISPEND_ACCOUNT_IDAccount ID for BerriSpend service
BRAINTRUST_API_KEYAPI key for Braintrust integration
CIRCLE_OIDC_TOKENOpenID Connect token for CircleCI
CIRCLE_OIDC_TOKEN_V2Version 2 of the OpenID Connect token for CircleCI
CONFIG_FILE_PATHFile path for configuration file
CUSTOM_TIKTOKEN_CACHE_DIRCustom directory for Tiktoken cache
DATABASE_HOSTHostname for the database server
DATABASE_NAMEName of the database
DATABASE_PASSWORDPassword for the database user
DATABASE_PORTPort number for database connection
DATABASE_SCHEMASchema name used in the database
DATABASE_URLConnection URL for the database
DATABASE_USERUsername for database connection
DATABASE_USERNAMEAlias for database user
DATABRICKS_API_BASEBase URL for Databricks API
DD_BASE_URLBase URL for Datadog integration
DATADOG_BASE_URL(Alternative to DD_BASE_URL) Base URL for Datadog integration
_DATADOG_BASE_URL(Alternative to DD_BASE_URL) Base URL for Datadog integration
DD_API_KEYAPI key for Datadog integration
DD_SITESite URL for Datadog (e.g., datadoghq.com)
DD_SOURCESource identifier for Datadog logs
DD_ENVEnvironment identifier for Datadog logs. Only supported for datadog_llm_observability callback
DD_SERVICEService identifier for Datadog logs. Defaults to "litellm-server"
DD_VERSIONVersion identifier for Datadog logs. Defaults to "unknown"
DEBUG_OTELEnable debug mode for OpenTelemetry
DIRECT_URLDirect URL for service endpoint
DISABLE_ADMIN_UIToggle to disable the admin UI
DISABLE_SCHEMA_UPDATEToggle to disable schema updates
DOCS_DESCRIPTIONDescription text for documentation pages
DOCS_FILTEREDFlag indicating filtered documentation
DOCS_TITLETitle of the documentation pages
DOCS_URLThe path to the Swagger API documentation. By default this is "/"
EMAIL_SUPPORT_CONTACTSupport contact email address
GCS_BUCKET_NAMEName of the Google Cloud Storage bucket
GCS_PATH_SERVICE_ACCOUNTPath to the Google Cloud service account JSON file
GCS_FLUSH_INTERVALFlush interval for GCS logging (in seconds). Specify how often you want a log to be sent to GCS. Default is 20 seconds
GCS_BATCH_SIZEBatch size for GCS logging. Specify after how many logs you want to flush to GCS. If BATCH_SIZE is set to 10, logs are flushed every 10 logs. Default is 2048
GENERIC_AUTHORIZATION_ENDPOINTAuthorization endpoint for generic OAuth providers
GENERIC_CLIENT_IDClient ID for generic OAuth providers
GENERIC_CLIENT_SECRETClient secret for generic OAuth providers
GENERIC_CLIENT_STATEState parameter for generic client authentication
GENERIC_INCLUDE_CLIENT_IDInclude client ID in requests for OAuth
GENERIC_SCOPEScope settings for generic OAuth providers
GENERIC_TOKEN_ENDPOINTToken endpoint for generic OAuth providers
GENERIC_USER_DISPLAY_NAME_ATTRIBUTEAttribute for user's display name in generic auth
GENERIC_USER_EMAIL_ATTRIBUTEAttribute for user's email in generic auth
GENERIC_USER_FIRST_NAME_ATTRIBUTEAttribute for user's first name in generic auth
GENERIC_USER_ID_ATTRIBUTEAttribute for user ID in generic auth
GENERIC_USER_LAST_NAME_ATTRIBUTEAttribute for user's last name in generic auth
GENERIC_USER_PROVIDER_ATTRIBUTEAttribute specifying the user's provider
GENERIC_USER_ROLE_ATTRIBUTEAttribute specifying the user's role
GENERIC_USERINFO_ENDPOINTEndpoint to fetch user information in generic OAuth
GALILEO_BASE_URLBase URL for Galileo platform
GALILEO_PASSWORDPassword for Galileo authentication
GALILEO_PROJECT_IDProject ID for Galileo usage
GALILEO_USERNAMEUsername for Galileo authentication
GREENSCALE_API_KEYAPI key for Greenscale service
GREENSCALE_ENDPOINTEndpoint URL for Greenscale service
GOOGLE_APPLICATION_CREDENTIALSPath to Google Cloud credentials JSON file
GOOGLE_CLIENT_IDClient ID for Google OAuth
GOOGLE_CLIENT_SECRETClient secret for Google OAuth
GOOGLE_KMS_RESOURCE_NAMEName of the resource in Google KMS
HF_API_BASEBase URL for Hugging Face API
HELICONE_API_KEYAPI key for Helicone service
HUGGINGFACE_API_BASEBase URL for Hugging Face API
IAM_TOKEN_DB_AUTHIAM token for database authentication
JSON_LOGSEnable JSON formatted logging
JWT_AUDIENCEExpected audience for JWT tokens
JWT_PUBLIC_KEY_URLURL to fetch public key for JWT verification
LAGO_API_BASEBase URL for Lago API
LAGO_API_CHARGE_BYParameter to determine charge basis in Lago
LAGO_API_EVENT_CODEEvent code for Lago API events
LAGO_API_KEYAPI key for accessing Lago services
LANGFUSE_DEBUGToggle debug mode for Langfuse
LANGFUSE_FLUSH_INTERVALInterval for flushing Langfuse logs
LANGFUSE_HOSTHost URL for Langfuse service
LANGFUSE_PUBLIC_KEYPublic key for Langfuse authentication
LANGFUSE_RELEASERelease version of Langfuse integration
LANGFUSE_SECRET_KEYSecret key for Langfuse authentication
LANGSMITH_API_KEYAPI key for Langsmith platform
LANGSMITH_BASE_URLBase URL for Langsmith service
LANGSMITH_BATCH_SIZEBatch size for operations in Langsmith
LANGSMITH_DEFAULT_RUN_NAMEDefault name for Langsmith run
LANGSMITH_PROJECTProject name for Langsmith integration
LANGSMITH_SAMPLING_RATESampling rate for Langsmith logging
LANGTRACE_API_KEYAPI key for Langtrace service
LITERAL_API_KEYAPI key for Literal integration
LITERAL_API_URLAPI URL for Literal service
LITERAL_BATCH_SIZEBatch size for Literal operations
LITELLM_DONT_SHOW_FEEDBACK_BOXFlag to hide feedback box in LiteLLM UI
LITELLM_DROP_PARAMSParameters to drop in LiteLLM requests
LITELLM_EMAILEmail associated with LiteLLM account
LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIESMaximum retries for parallel requests in LiteLLM
LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUTTimeout for retries of parallel requests in LiteLLM
LITELLM_HOSTED_UIURL of the hosted UI for LiteLLM
LITELLM_LICENSELicense key for LiteLLM usage
LITELLM_LOCAL_MODEL_COST_MAPLocal configuration for model cost mapping in LiteLLM
LITELLM_LOGEnable detailed logging for LiteLLM
LITELLM_MODEOperating mode for LiteLLM (e.g., production, development)
LITELLM_SALT_KEYSalt key for encryption in LiteLLM
LITELLM_SECRET_AWS_KMS_LITELLM_LICENSEAWS KMS encrypted license for LiteLLM
LITELLM_TOKENAccess token for LiteLLM integration
LOGFIRE_TOKENToken for Logfire logging service
MICROSOFT_CLIENT_IDClient ID for Microsoft services
MICROSOFT_CLIENT_SECRETClient secret for Microsoft services
MICROSOFT_TENANTTenant ID for Microsoft Azure
NO_DOCSFlag to disable documentation generation
NO_PROXYList of addresses to bypass proxy
OAUTH_TOKEN_INFO_ENDPOINTEndpoint for OAuth token info retrieval
OPENAI_API_BASEBase URL for OpenAI API
OPENAI_API_KEYAPI key for OpenAI services
OPENAI_ORGANIZATIONOrganization identifier for OpenAI
OPENID_BASE_URLBase URL for OpenID Connect services
OPENID_CLIENT_IDClient ID for OpenID Connect authentication
OPENID_CLIENT_SECRETClient secret for OpenID Connect authentication
OPENMETER_API_ENDPOINTAPI endpoint for OpenMeter integration
OPENMETER_API_KEYAPI key for OpenMeter services
OPENMETER_EVENT_TYPEType of events sent to OpenMeter
OTEL_ENDPOINTOpenTelemetry endpoint for traces
OTEL_ENVIRONMENT_NAMEEnvironment name for OpenTelemetry
OTEL_EXPORTERExporter type for OpenTelemetry
OTEL_HEADERSHeaders for OpenTelemetry requests
OTEL_SERVICE_NAMEService name identifier for OpenTelemetry
OTEL_TRACER_NAMETracer name for OpenTelemetry tracing
PREDIBASE_API_BASEBase URL for Predibase API
PRESIDIO_ANALYZER_API_BASEBase URL for Presidio Analyzer service
PRESIDIO_ANONYMIZER_API_BASEBase URL for Presidio Anonymizer service
PROMETHEUS_URLURL for Prometheus service
PROMPTLAYER_API_KEYAPI key for PromptLayer integration
PROXY_ADMIN_IDAdmin identifier for proxy server
PROXY_BASE_URLBase URL for proxy service
PROXY_LOGOUT_URLURL for logging out of the proxy service
PROXY_MASTER_KEYMaster key for proxy authentication
QDRANT_API_BASEBase URL for Qdrant API
QDRANT_API_KEYAPI key for Qdrant service
QDRANT_URLConnection URL for Qdrant database
REDIS_HOSTHostname for Redis server
REDIS_PASSWORDPassword for Redis service
REDIS_PORTPort number for Redis server
REDOC_URLThe path to the Redoc Fast API documentation. By default this is "/redoc"
SERVER_ROOT_PATHRoot path for the server application
SET_VERBOSEFlag to enable verbose logging
SLACK_DAILY_REPORT_FREQUENCYFrequency of daily Slack reports (e.g., daily, weekly)
SLACK_WEBHOOK_URLWebhook URL for Slack integration
SMTP_HOSTHostname for the SMTP server
SMTP_PASSWORDPassword for SMTP authentication
SMTP_PORTPort number for SMTP server
SMTP_SENDER_EMAILEmail address used as the sender in SMTP transactions
SMTP_SENDER_LOGOLogo used in emails sent via SMTP
SMTP_TLSFlag to enable or disable TLS for SMTP connections
SMTP_USERNAMEUsername for SMTP authentication
SPEND_LOGS_URLURL for retrieving spend logs
SSL_CERTIFICATEPath to the SSL certificate file
SSL_VERIFYFlag to enable or disable SSL certificate verification
SUPABASE_KEYAPI key for Supabase service
SUPABASE_URLBase URL for Supabase instance
TEST_EMAIL_ADDRESSEmail address used for testing purposes
UI_LOGO_PATHPath to the logo image used in the UI
UI_PASSWORDPassword for accessing the UI
UI_USERNAMEUsername for accessing the UI
UPSTREAM_LANGFUSE_DEBUGFlag to enable debugging for upstream Langfuse
UPSTREAM_LANGFUSE_HOSTHost URL for upstream Langfuse service
UPSTREAM_LANGFUSE_PUBLIC_KEYPublic key for upstream Langfuse authentication
UPSTREAM_LANGFUSE_RELEASERelease version identifier for upstream Langfuse
UPSTREAM_LANGFUSE_SECRET_KEYSecret key for upstream Langfuse authentication
USE_AWS_KMSFlag to enable AWS Key Management Service for encryption
WEBHOOK_URLURL for receiving webhooks from external services