Skip to content

Commit c0c04b0

Browse files
ishaan-jaffCopilot
andauthored
[Performance]: Add debugging endpoint to track active /asyncio-tasks (#11382)
* feat: add debug/asyncio-tasks * Update litellm/proxy/common_utils/debug_utils.py Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent d247a39 commit c0c04b0

File tree

2 files changed

+143
-3
lines changed

2 files changed

+143
-3
lines changed

litellm/proxy/common_utils/debug_utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# Start tracing memory allocations
2+
import asyncio
23
import json
34
import os
45
import tracemalloc
6+
from collections import Counter
57

68
from fastapi import APIRouter
79

@@ -10,6 +12,43 @@
1012

1113
router = APIRouter()
1214

15+
16+
@router.get("/debug/asyncio-tasks")
17+
async def get_active_tasks_stats():
18+
"""
19+
Returns:
20+
total_active_tasks: int
21+
by_name: { coroutine_name: count }
22+
"""
23+
MAX_TASKS_TO_CHECK = 5000
24+
# Gather all tasks in this event loop (including this endpoint’s own task).
25+
all_tasks = asyncio.all_tasks()
26+
27+
# Filter out tasks that are already done.
28+
active_tasks = [t for t in all_tasks if not t.done()]
29+
30+
# Count how many active tasks exist, grouped by coroutine function name.
31+
counter = Counter()
32+
for idx, task in enumerate(active_tasks):
33+
34+
# reasonable max circuit breaker
35+
if idx >= MAX_TASKS_TO_CHECK:
36+
break
37+
coro = task.get_coro()
38+
# Derive a human‐readable name from the coroutine:
39+
name = (
40+
getattr(coro, "__qualname__", None)
41+
or getattr(coro, "__name__", None)
42+
or repr(coro)
43+
)
44+
counter[name] += 1
45+
46+
return {
47+
"total_active_tasks": len(active_tasks),
48+
"by_name": dict(counter),
49+
}
50+
51+
1352
if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
1453
try:
1554
import objgraph # type: ignore

litellm/proxy/proxy_config.yaml

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,109 @@
11
model_list:
2-
- model_name: openai/*
2+
- model_name: fake-openai-endpoint
33
litellm_params:
4-
model: openai/*
5-
4+
model: openai/my-fake-model
5+
api_key: my-fake-key
6+
api_base: https://exampleopenaiendpoint-production.up.railway.app/
7+
- model_name: "anthropic/*"
8+
litellm_params:
9+
model: "anthropic/*"
10+
api_key: os.environ/ANTHROPIC_API_KEY
11+
- model_name: "bedrock/*"
12+
litellm_params:
13+
model: "bedrock/*"
14+
- model_name: "bedrock-useast1/*"
15+
litellm_params:
16+
model: "bedrock/*"
17+
- model_name: "bedrock-useast2/*"
18+
litellm_params:
19+
model: "bedrock/*"
20+
aws_region_name: us-east-2
21+
- model_name: "bedrock-uswest2/*"
22+
litellm_params:
23+
model: "bedrock/*"
24+
aws_region_name: us-west-2
25+
- model_name: "vertex_ai/*"
26+
litellm_params:
27+
model: "vertex_ai/*"
28+
vertex_project: os.environ/VERTEX_PROJECT
29+
vertex_location: os.environ/VERTEX_LOCATION
30+
vertex_credentials: os.environ/VERTEX_SERVICE_ACCOUNT
31+
- model_name: "gemini/*"
32+
litellm_params:
33+
model: "gemini/*"
34+
api_key: os.environ/GEMINI_API_KEY
35+
- model_name: "gemini-dev/*"
36+
litellm_params:
37+
model: "gemini/*"
38+
api_key: os.environ/GEMINI_API_KEY_DEV
39+
- model_name: "databricks/*"
40+
litellm_params:
41+
model: "databricks/*"
42+
api_key: os.environ/DATABRICKS_API_KEY
43+
api_base: os.environ/DATABRICKS_API_BASE
644

745
litellm_settings:
46+
cache: True
47+
cache_params:
48+
type: redis
49+
host: os.environ/REDIS_HOST
50+
port: os.environ/REDIS_PORT
51+
password: os.environ/REDIS_PASSWORD
52+
supported_call_types:
53+
- acompletion
54+
- completion
55+
request_timeout: 30
56+
allowed_fails: 3
57+
# callbacks:
58+
# - otel
59+
# - prometheus
60+
failure_callback:
61+
- sentry
62+
success_callback:
63+
- s3_v2
64+
s3_callback_params:
65+
s3_bucket_name: load-testing-oct
866
disable_token_counter: True
67+
default_internal_user_params:
68+
user_role: os.environ/DEFAULT_USER_ROLE
69+
70+
callback_settings:
71+
otel:
72+
message_logging: False
73+
74+
router_settings:
75+
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
76+
redis_host: os.environ/REDIS_HOST
77+
redis_port: os.environ/REDIS_PORT
78+
redis_password: os.environ/REDIS_PASSWORD
79+
retry_policy: {
80+
# Set the number of retries for each exception type.
81+
# The logic is as follows:
82+
# 1. For anything that is likely to repeat the same outcome, don't retry.
83+
# 2. Internal server errors might be transient, so retry once.
84+
# 3. For rate limit errors, retry twice.
85+
# https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type
86+
# Based on that doc, rate limit retries use exponential backoff whereas others are immediate.
87+
"AuthenticationErrorRetries": 0,
88+
"BadRequestErrorRetries": 0,
89+
"ContentPolicyViolationErrorRetries": 0,
90+
"InternalServerErrorRetries": 1,
91+
"RateLimitErrorRetries": 2,
92+
"TimeoutErrorRetries": 0
93+
}
94+
95+
general_settings:
96+
disable_spend_logs: True
97+
proxy_batch_write_at: 60
98+
use_redis_transaction_buffer: true
99+
alert_types: # https://docs.litellm.ai/docs/proxy/alerting#all-possible-alert-types
100+
- db_exceptions
101+
- cooldown_deployment
102+
- failed_tracking_spend
103+
- fallback_reports
104+
# - llm_requests_hanging
105+
- llm_too_slow
106+
- new_model_added
107+
- outage_alerts
108+
- region_outage_alerts
109+
alerting: ["slack"]

0 commit comments

Comments
 (0)