HTTP POST를 사용한 사용자별 LLM 사용량 대시보드 구축
명령어 사용
curl -X POST -H "Content-Type: application/json" -d '{
"user_id": "user id changeme",
"prompt": "question changeme",
"model_name": "gemini-2.0-flash"
}' http://domain changeme/ask_llm:5000
app.py
Python
import os
import time
import logging
import sys
print("--- app.py script started ---", file=sys.stderr)
sys.stdout.flush() # 버퍼 비우기
from prometheus_client import Counter, Histogram, start_http_server
from flask import Flask, request, jsonify
from openai import OpenAI # litellm 프록시와 호환되는 OpenAI 클라이언트 사용
# --- 로깅 설정 ---
# 콘솔에 로그를 출력하도록 설정
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# --- 1. Prometheus 메트릭 정의 ---
llm_requests_total = Counter(
'llm_requests_total',
'Total LLM API requests by user',
['user_id', 'model_name', 'status']
)
llm_prompt_tokens_total = Counter(
'llm_prompt_tokens_total',
'Total prompt tokens consumed by user',
['user_id', 'model_name']
)
llm_completion_tokens_total = Counter(
'llm_completion_tokens_total',
'Total completion tokens generated for user',
['user_id', 'model_name']
)
llm_response_time_seconds = Histogram(
'llm_response_time_seconds',
'LLM API response time in seconds',
['user_id', 'model_name'],
buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, float('inf')]
)
# --- 2. Prometheus 메트릭 서버 시작 ---
def start_prometheus_exporter(port=8000):
try:
start_http_server(port)
logger.info(f"Prometheus exporter started on port {port}. Access at http://0.0.0.0:{port}/metrics")
except Exception as e:
logger.error(f"Failed to start Prometheus exporter: {e}", exc_info=True)
# 익스포터 시작 실패는 치명적일 수 있으므로 앱 종료 고려
sys.exit(1)
LITELLM_PROXY_URL = os.getenv("LITELLM_PROXY_URL")
if not LITELLM_PROXY_URL:
logger.error("LITELLM_PROXY_URL environment variable is not set. Exiting.")
sys.exit(1) # 환경 변수 없으면 종료
LITELLM_PROXY_AUTH_KEY = os.getenv("LITELLM_PROXY_AUTH_KEY")
if not LITELLM_PROXY_AUTH_KEY:
logger.warning("LITELLM_PROXY_AUTH_KEY environment variable is not set. "
"If litellm proxy requires authentication, this will fail.")
LITELLM_PROXY_AUTH_KEY = "changeme" #*****************
try:
litellm_client = OpenAI(
api_key=LITELLM_PROXY_AUTH_KEY, # 실제 litellm 프록시 인증 키 (더미 아님!)
base_url=LITELLM_PROXY_URL,
timeout=60.0 # 타임아웃 설정 추가
)
logger.info(f"OpenAI client initialized for litellm proxy at {LITELLM_PROXY_URL}")
except Exception as e:
logger.error(f"Failed to initialize OpenAI client for litellm proxy: {e}", exc_info=True)
sys.exit(1) # 클라이언트 초기화 실패 시 종료
# --- 4. LLM 호출 함수 (litellm 프록시를 통해) ---
def call_llm_with_metrics(user_id: str, prompt: str, model_name: str = "gemini-2.0-flash") -> str:
start_time = time.time()
request_status = 'success'
prompt_tokens_consumed = 0
completion_tokens_generated = 0
response_content = ""
logger.info(f"Processing LLM request for user '{user_id}' with model '{model_name}'")
logger.debug(f"Prompt: '{prompt[:50]}...'") # 긴 프롬프트는 잘라서 로깅
try:
response = litellm_client.chat.completions.create(
model=model_name,
messages=[
{"role": "user", "content": prompt}
],
user=user_id # litellm에 user 정보를 전달 (로그/추적 용도)
)
logger.info(f"Received response from litellm for user '{user_id}'")
logger.debug(f"Raw response: {response.model_dump_json()}") # 응답 전체 JSON 로깅
response_content = response.choices[0].message.content
logger.info(f"Extracted response content: '{response_content[:100]}...'") # 추출된 content 로깅
if response.usage:
prompt_tokens_consumed = response.usage.prompt_tokens
completion_tokens_generated = response.usage.completion_tokens
logger.info(f"Tokens consumed (P/C): {prompt_tokens_consumed}/{completion_tokens_generated}")
else:
logger.warning("Usage metadata not available from litellm proxy response. Falling back to estimation.")
prompt_tokens_consumed = len(prompt.split()) # 단순 추정
completion_tokens_generated = len(response_content.split()) # 단순 추정
except Exception as e:
request_status = 'failure'
logger.error(f"LLM request failed for user '{user_id}' with model '{model_name}' via litellm: {e}", exc_info=True)
# 예외 발생 시, 사용자에게 에러 메시지를 반환
response_content = f"Error processing request: {str(e)}"
finally:
duration = time.time() - start_time
logger.info(f"Request for user '{user_id}' completed in {duration:.2f} seconds with status: {request_status}")
# --- 메트릭 업데이트 ---
llm_requests_total.labels(user_id=user_id, model_name=model_name, status=request_status).inc()
llm_response_time_seconds.labels(user_id=user_id, model_name=model_name).observe(duration)
if request_status == 'success':
llm_prompt_tokens_total.labels(user_id=user_id, model_name=model_name).inc(prompt_tokens_consumed)
llm_completion_tokens_total.labels(user_id=user_id, model_name=model_name).inc(completion_tokens_generated)
return response_content
# --- Flask 앱 설정 ---
app = Flask(__name__)
@app.route('/ask_llm', methods=['POST'])
def ask_llm():
data = request.json
user_id = data.get('user_id', 'anonymous_user')
prompt = data.get('prompt')
model_name = data.get('model_name', 'gemini-2.0-flash')
if not prompt:
logger.warning("Received request with missing prompt.")
return jsonify({"error": "Prompt is required"}), 400
try:
response_text = call_llm_with_metrics(user_id, prompt, model_name)
return jsonify({"user_id": user_id, "model": model_name, "response": response_text})
except Exception as e:
logger.critical(f"Unhandled exception in /ask_llm route: {e}", exc_info=True)
return jsonify({"error": "Internal server error"}), 500
@app.route('/health')
def health_check():
logger.info("Health check requested.")
return "OK", 200
# --- 애플리케이션 실행 ---
if __name__ == "__main__":
# 백그라운드에서 Prometheus exporter 시작
start_prometheus_exporter(port=8000)
# Flask 앱 시작
logger.info("My LLM Backend App starting on port 5000.")
app.run(host='0.0.0.0', port=5000)
Dockerfile
Dockerfile
FROM python:3.9-slim-buster WORKDIR /app COPY requirements.txt . RUN pip install -r requirements.txt COPY . . EXPOSE 8000 EXPOSE 5000 CMD ["python", "app.py"]
requirements.txt
requirements.txt
flask openai prometheus_client google-generativeai
docker-compose.yml 추가
llm_backend_app:
build: ./llm_backend_app
container_name: llm_backend_app
restart: always
ports:
- "5000:5000" # 백엔드 앱의 API 포트
- "8000:8000" # 백엔드 앱의 Prometheus 메트릭 포트
environment:
LITELLM_PROXY_URL: "http://litellm:4000/v1"
LITELLM_PROXY_AUTH_KEY: ${LITELLM_PROXY_AUTH_KEY}
depends_on:
- litellm
prometheus.yml 추가
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'llm-backend-app'
metrics_path: '/metrics'
static_configs:
- targets: ['llm_backend_app:8000']
Dashboard Report
