HTTP POST를 사용한 사용자별 LLM 사용량 대시보드 구축
명령어 사용
curl -X POST -H "Content-Type: application/json" -d '{ "user_id": "user id changeme", "prompt": "question changeme", "model_name": "gemini-2.0-flash" }' http://domain changeme/ask_llm:5000
app.py
Python
import os import time import logging import sys print("--- app.py script started ---", file=sys.stderr) sys.stdout.flush() # 버퍼 비우기 from prometheus_client import Counter, Histogram, start_http_server from flask import Flask, request, jsonify from openai import OpenAI # litellm 프록시와 호환되는 OpenAI 클라이언트 사용 # --- 로깅 설정 --- # 콘솔에 로그를 출력하도록 설정 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # --- 1. Prometheus 메트릭 정의 --- llm_requests_total = Counter( 'llm_requests_total', 'Total LLM API requests by user', ['user_id', 'model_name', 'status'] ) llm_prompt_tokens_total = Counter( 'llm_prompt_tokens_total', 'Total prompt tokens consumed by user', ['user_id', 'model_name'] ) llm_completion_tokens_total = Counter( 'llm_completion_tokens_total', 'Total completion tokens generated for user', ['user_id', 'model_name'] ) llm_response_time_seconds = Histogram( 'llm_response_time_seconds', 'LLM API response time in seconds', ['user_id', 'model_name'], buckets=[0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, float('inf')] ) # --- 2. Prometheus 메트릭 서버 시작 --- def start_prometheus_exporter(port=8000): try: start_http_server(port) logger.info(f"Prometheus exporter started on port {port}. Access at http://0.0.0.0:{port}/metrics") except Exception as e: logger.error(f"Failed to start Prometheus exporter: {e}", exc_info=True) # 익스포터 시작 실패는 치명적일 수 있으므로 앱 종료 고려 sys.exit(1) LITELLM_PROXY_URL = os.getenv("LITELLM_PROXY_URL") if not LITELLM_PROXY_URL: logger.error("LITELLM_PROXY_URL environment variable is not set. Exiting.") sys.exit(1) # 환경 변수 없으면 종료 LITELLM_PROXY_AUTH_KEY = os.getenv("LITELLM_PROXY_AUTH_KEY") if not LITELLM_PROXY_AUTH_KEY: logger.warning("LITELLM_PROXY_AUTH_KEY environment variable is not set. " "If litellm proxy requires authentication, this will fail.") LITELLM_PROXY_AUTH_KEY = "changeme" #***************** try: litellm_client = OpenAI( api_key=LITELLM_PROXY_AUTH_KEY, # 실제 litellm 프록시 인증 키 (더미 아님!) base_url=LITELLM_PROXY_URL, timeout=60.0 # 타임아웃 설정 추가 ) logger.info(f"OpenAI client initialized for litellm proxy at {LITELLM_PROXY_URL}") except Exception as e: logger.error(f"Failed to initialize OpenAI client for litellm proxy: {e}", exc_info=True) sys.exit(1) # 클라이언트 초기화 실패 시 종료 # --- 4. LLM 호출 함수 (litellm 프록시를 통해) --- def call_llm_with_metrics(user_id: str, prompt: str, model_name: str = "gemini-2.0-flash") -> str: start_time = time.time() request_status = 'success' prompt_tokens_consumed = 0 completion_tokens_generated = 0 response_content = "" logger.info(f"Processing LLM request for user '{user_id}' with model '{model_name}'") logger.debug(f"Prompt: '{prompt[:50]}...'") # 긴 프롬프트는 잘라서 로깅 try: response = litellm_client.chat.completions.create( model=model_name, messages=[ {"role": "user", "content": prompt} ], user=user_id # litellm에 user 정보를 전달 (로그/추적 용도) ) logger.info(f"Received response from litellm for user '{user_id}'") logger.debug(f"Raw response: {response.model_dump_json()}") # 응답 전체 JSON 로깅 response_content = response.choices[0].message.content logger.info(f"Extracted response content: '{response_content[:100]}...'") # 추출된 content 로깅 if response.usage: prompt_tokens_consumed = response.usage.prompt_tokens completion_tokens_generated = response.usage.completion_tokens logger.info(f"Tokens consumed (P/C): {prompt_tokens_consumed}/{completion_tokens_generated}") else: logger.warning("Usage metadata not available from litellm proxy response. Falling back to estimation.") prompt_tokens_consumed = len(prompt.split()) # 단순 추정 completion_tokens_generated = len(response_content.split()) # 단순 추정 except Exception as e: request_status = 'failure' logger.error(f"LLM request failed for user '{user_id}' with model '{model_name}' via litellm: {e}", exc_info=True) # 예외 발생 시, 사용자에게 에러 메시지를 반환 response_content = f"Error processing request: {str(e)}" finally: duration = time.time() - start_time logger.info(f"Request for user '{user_id}' completed in {duration:.2f} seconds with status: {request_status}") # --- 메트릭 업데이트 --- llm_requests_total.labels(user_id=user_id, model_name=model_name, status=request_status).inc() llm_response_time_seconds.labels(user_id=user_id, model_name=model_name).observe(duration) if request_status == 'success': llm_prompt_tokens_total.labels(user_id=user_id, model_name=model_name).inc(prompt_tokens_consumed) llm_completion_tokens_total.labels(user_id=user_id, model_name=model_name).inc(completion_tokens_generated) return response_content # --- Flask 앱 설정 --- app = Flask(__name__) @app.route('/ask_llm', methods=['POST']) def ask_llm(): data = request.json user_id = data.get('user_id', 'anonymous_user') prompt = data.get('prompt') model_name = data.get('model_name', 'gemini-2.0-flash') if not prompt: logger.warning("Received request with missing prompt.") return jsonify({"error": "Prompt is required"}), 400 try: response_text = call_llm_with_metrics(user_id, prompt, model_name) return jsonify({"user_id": user_id, "model": model_name, "response": response_text}) except Exception as e: logger.critical(f"Unhandled exception in /ask_llm route: {e}", exc_info=True) return jsonify({"error": "Internal server error"}), 500 @app.route('/health') def health_check(): logger.info("Health check requested.") return "OK", 200 # --- 애플리케이션 실행 --- if __name__ == "__main__": # 백그라운드에서 Prometheus exporter 시작 start_prometheus_exporter(port=8000) # Flask 앱 시작 logger.info("My LLM Backend App starting on port 5000.") app.run(host='0.0.0.0', port=5000)
Dockerfile
Dockerfile
FROM python:3.9-slim-buster WORKDIR /app COPY requirements.txt . RUN pip install -r requirements.txt COPY . . EXPOSE 8000 EXPOSE 5000 CMD ["python", "app.py"]
requirements.txt
requirements.txt
flask openai prometheus_client google-generativeai
docker-compose.yml 추가
llm_backend_app: build: ./llm_backend_app container_name: llm_backend_app restart: always ports: - "5000:5000" # 백엔드 앱의 API 포트 - "8000:8000" # 백엔드 앱의 Prometheus 메트릭 포트 environment: LITELLM_PROXY_URL: "http://litellm:4000/v1" LITELLM_PROXY_AUTH_KEY: ${LITELLM_PROXY_AUTH_KEY} depends_on: - litellm
prometheus.yml 추가
global: scrape_interval: 15s scrape_configs: - job_name: 'llm-backend-app' metrics_path: '/metrics' static_configs: - targets: ['llm_backend_app:8000']
Dashboard Report