Skip to content

[Performance]: Add debugging endpoint to track active /asyncio-tasks #11382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions litellm/proxy/common_utils/debug_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Start tracing memory allocations
import asyncio
import json
import os
import tracemalloc
from collections import Counter

from fastapi import APIRouter

Expand All @@ -10,6 +12,43 @@

router = APIRouter()


@router.get("/debug/asyncio-tasks")
async def get_active_tasks_stats():
"""
Returns:
total_active_tasks: int
by_name: { coroutine_name: count }
"""
MAX_TASKS_TO_CHECK = 5000
# Gather all tasks in this event loop (including this endpoint’s own task).
all_tasks = asyncio.all_tasks()

# Filter out tasks that are already done.
active_tasks = [t for t in all_tasks if not t.done()]

# Count how many active tasks exist, grouped by coroutine function name.
counter = Counter()
for idx, task in enumerate(active_tasks):

# reasonable max circuit breaker
if idx >= MAX_TASKS_TO_CHECK:
break
coro = task.get_coro()
# Derive a human‐readable name from the coroutine:
name = (
getattr(coro, "__qualname__", None)
or getattr(coro, "__name__", None)
or repr(coro)
)
counter[name] += 1

return {
"total_active_tasks": len(active_tasks),
"by_name": dict(counter),
}


if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
try:
import objgraph # type: ignore
Expand Down
107 changes: 104 additions & 3 deletions litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,109 @@
model_list:
- model_name: openai/*
- model_name: fake-openai-endpoint
litellm_params:
model: openai/*

model: openai/my-fake-model
api_key: my-fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: "anthropic/*"
litellm_params:
model: "anthropic/*"
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: "bedrock/*"
litellm_params:
model: "bedrock/*"
- model_name: "bedrock-useast1/*"
litellm_params:
model: "bedrock/*"
- model_name: "bedrock-useast2/*"
litellm_params:
model: "bedrock/*"
aws_region_name: us-east-2
- model_name: "bedrock-uswest2/*"
litellm_params:
model: "bedrock/*"
aws_region_name: us-west-2
- model_name: "vertex_ai/*"
litellm_params:
model: "vertex_ai/*"
vertex_project: os.environ/VERTEX_PROJECT
vertex_location: os.environ/VERTEX_LOCATION
vertex_credentials: os.environ/VERTEX_SERVICE_ACCOUNT
- model_name: "gemini/*"
litellm_params:
model: "gemini/*"
api_key: os.environ/GEMINI_API_KEY
- model_name: "gemini-dev/*"
litellm_params:
model: "gemini/*"
api_key: os.environ/GEMINI_API_KEY_DEV
- model_name: "databricks/*"
litellm_params:
model: "databricks/*"
api_key: os.environ/DATABRICKS_API_KEY
api_base: os.environ/DATABRICKS_API_BASE

litellm_settings:
cache: True
cache_params:
type: redis
host: os.environ/REDIS_HOST
port: os.environ/REDIS_PORT
password: os.environ/REDIS_PASSWORD
supported_call_types:
- acompletion
- completion
request_timeout: 30
allowed_fails: 3
# callbacks:
# - otel
# - prometheus
failure_callback:
- sentry
success_callback:
- s3_v2
s3_callback_params:
s3_bucket_name: load-testing-oct
disable_token_counter: True
default_internal_user_params:
user_role: os.environ/DEFAULT_USER_ROLE

callback_settings:
otel:
message_logging: False

router_settings:
routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD
retry_policy: {
# Set the number of retries for each exception type.
# The logic is as follows:
# 1. For anything that is likely to repeat the same outcome, don't retry.
# 2. Internal server errors might be transient, so retry once.
# 3. For rate limit errors, retry twice.
# https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type
# Based on that doc, rate limit retries use exponential backoff whereas others are immediate.
"AuthenticationErrorRetries": 0,
"BadRequestErrorRetries": 0,
"ContentPolicyViolationErrorRetries": 0,
"InternalServerErrorRetries": 1,
"RateLimitErrorRetries": 2,
"TimeoutErrorRetries": 0
}

general_settings:
disable_spend_logs: True
proxy_batch_write_at: 60
use_redis_transaction_buffer: true
alert_types: # https://docs.litellm.ai/docs/proxy/alerting#all-possible-alert-types
- db_exceptions
- cooldown_deployment
- failed_tracking_spend
- fallback_reports
# - llm_requests_hanging
- llm_too_slow
- new_model_added
- outage_alerts
- region_outage_alerts
alerting: ["slack"]
Loading