1
1
model_list :
2
- - model_name : openai/*
2
+ - model_name : fake- openai-endpoint
3
3
litellm_params :
4
- model : openai/*
5
-
4
+ model : openai/my-fake-model
5
+ api_key : my-fake-key
6
+ api_base : https://exampleopenaiendpoint-production.up.railway.app/
7
+ - model_name : " anthropic/*"
8
+ litellm_params :
9
+ model : " anthropic/*"
10
+ api_key : os.environ/ANTHROPIC_API_KEY
11
+ - model_name : " bedrock/*"
12
+ litellm_params :
13
+ model : " bedrock/*"
14
+ - model_name : " bedrock-useast1/*"
15
+ litellm_params :
16
+ model : " bedrock/*"
17
+ - model_name : " bedrock-useast2/*"
18
+ litellm_params :
19
+ model : " bedrock/*"
20
+ aws_region_name : us-east-2
21
+ - model_name : " bedrock-uswest2/*"
22
+ litellm_params :
23
+ model : " bedrock/*"
24
+ aws_region_name : us-west-2
25
+ - model_name : " vertex_ai/*"
26
+ litellm_params :
27
+ model : " vertex_ai/*"
28
+ vertex_project : os.environ/VERTEX_PROJECT
29
+ vertex_location : os.environ/VERTEX_LOCATION
30
+ vertex_credentials : os.environ/VERTEX_SERVICE_ACCOUNT
31
+ - model_name : " gemini/*"
32
+ litellm_params :
33
+ model : " gemini/*"
34
+ api_key : os.environ/GEMINI_API_KEY
35
+ - model_name : " gemini-dev/*"
36
+ litellm_params :
37
+ model : " gemini/*"
38
+ api_key : os.environ/GEMINI_API_KEY_DEV
39
+ - model_name : " databricks/*"
40
+ litellm_params :
41
+ model : " databricks/*"
42
+ api_key : os.environ/DATABRICKS_API_KEY
43
+ api_base : os.environ/DATABRICKS_API_BASE
6
44
7
45
litellm_settings :
46
+ cache : True
47
+ cache_params :
48
+ type : redis
49
+ host : os.environ/REDIS_HOST
50
+ port : os.environ/REDIS_PORT
51
+ password : os.environ/REDIS_PASSWORD
52
+ supported_call_types :
53
+ - acompletion
54
+ - completion
55
+ request_timeout : 30
56
+ allowed_fails : 3
57
+ # callbacks:
58
+ # - otel
59
+ # - prometheus
60
+ failure_callback :
61
+ - sentry
62
+ success_callback :
63
+ - s3_v2
64
+ s3_callback_params :
65
+ s3_bucket_name : load-testing-oct
8
66
disable_token_counter : True
67
+ default_internal_user_params :
68
+ user_role : os.environ/DEFAULT_USER_ROLE
69
+
70
+ callback_settings :
71
+ otel :
72
+ message_logging : False
73
+
74
+ router_settings :
75
+ routing_strategy : simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
76
+ redis_host : os.environ/REDIS_HOST
77
+ redis_port : os.environ/REDIS_PORT
78
+ redis_password : os.environ/REDIS_PASSWORD
79
+ retry_policy : {
80
+ # Set the number of retries for each exception type.
81
+ # The logic is as follows:
82
+ # 1. For anything that is likely to repeat the same outcome, don't retry.
83
+ # 2. Internal server errors might be transient, so retry once.
84
+ # 3. For rate limit errors, retry twice.
85
+ # https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type
86
+ # Based on that doc, rate limit retries use exponential backoff whereas others are immediate.
87
+ " AuthenticationErrorRetries " : 0,
88
+ " BadRequestErrorRetries " : 0,
89
+ " ContentPolicyViolationErrorRetries " : 0,
90
+ " InternalServerErrorRetries " : 1,
91
+ " RateLimitErrorRetries " : 2,
92
+ " TimeoutErrorRetries " : 0
93
+ }
94
+
95
+ general_settings :
96
+ disable_spend_logs : True
97
+ proxy_batch_write_at : 60
98
+ use_redis_transaction_buffer : true
99
+ alert_types : # https://docs.litellm.ai/docs/proxy/alerting#all-possible-alert-types
100
+ - db_exceptions
101
+ - cooldown_deployment
102
+ - failed_tracking_spend
103
+ - fallback_reports
104
+ # - llm_requests_hanging
105
+ - llm_too_slow
106
+ - new_model_added
107
+ - outage_alerts
108
+ - region_outage_alerts
109
+ alerting : ["slack"]
0 commit comments