1+ from flask import Flask , request , jsonify
2+ import requests
3+ import os
4+ import json
5+ import logging
6+ import openai
7+
8+ logging .basicConfig (level = logging .INFO )
9+ logger = logging .getLogger (__name__ )
10+
11+ app = Flask (__name__ )
12+
13+ openai .api_key = os .getenv ("OPENAI_API_KEY" )
14+ SLACK_WEBHOOK = os .getenv ("SLACK_WEBHOOK_URL" )
15+
16+
17+ def ask_gpt4 (pod_name , logs ):
18+ prompt = f"""
19+ Kubernetes pod '{ pod_name } ' has crashed.
20+ Last 50 lines of logs:
21+ { logs }
22+ Respond ONLY in valid JSON:
23+ {{
24+ "diagnosis": "what went wrong in 1-2 sentences",
25+ "root_cause": "specific technical cause",
26+ "fix": "restart_pod or manual_review",
27+ "confidence": 0.95
28+ }}
29+ """
30+ response = openai .ChatCompletion .create (
31+ model = "gpt-3.5-turbo" ,
32+ messages = [
33+ {"role" : "system" , "content" : "You are a Kubernetes expert. Respond in valid JSON only." },
34+ {"role" : "user" , "content" : prompt }
35+ ],
36+ temperature = 0.1
37+ )
38+ raw = response .choices [0 ].message .content
39+ raw = raw .replace ("```json" , "" ).replace ("```" , "" ).strip ()
40+ return json .loads (raw )
41+
42+
43+ def send_slack (pod_name , diagnosis , fix_applied , confidence ):
44+ if fix_applied :
45+ color = "#36a64f"
46+ status = "✅ Auto-fixed by AI"
47+ else :
48+ color = "#ff0000"
49+ status = "🚨 Needs manual review"
50+
51+ message = {
52+ "attachments" : [{
53+ "color" : color ,
54+ "title" : f"🤖 AI Healer — { pod_name } " ,
55+ "fields" : [
56+ {"title" : "Status" , "value" : status , "short" : True },
57+ {"title" : "Confidence" , "value" : f"{ int (confidence * 100 )} %" , "short" : True },
58+ {"title" : "Diagnosis" , "value" : diagnosis .get ("diagnosis" , "Unknown" ),"short" : False },
59+ {"title" : "Root Cause" , "value" : diagnosis .get ("root_cause" ,"Unknown" ),"short" : False },
60+ {"title" : "Action" , "value" : diagnosis .get ("fix" , "None" ), "short" : False }
61+ ],
62+ "footer" : "AI DevOps Platform"
63+ }]
64+ }
65+ try :
66+ requests .post (SLACK_WEBHOOK , json = message )
67+ logger .info ("Slack notification sent" )
68+ except Exception as e :
69+ logger .error (f"Slack failed: { e } " )
70+
71+
72+ @app .route ('/health' )
73+ def health ():
74+ return jsonify ({"status" : "healthy" , "service" : "ai-healer" }), 200
75+
76+
77+ @app .route ('/webhook' , methods = ['POST' ])
78+ def handle_alert ():
79+ data = request .get_json ()
80+ if not data :
81+ return jsonify ({"error" : "No data" }), 400
82+
83+ for alert in data .get ('alerts' , []):
84+ labels = alert .get ('labels' , {})
85+ pod_name = labels .get ('pod' , 'unknown-pod' )
86+ namespace = labels .get ('namespace' , 'default' )
87+
88+ logger .info (f"Processing alert for pod: { pod_name } " )
89+
90+ # Simulate logs since we are running locally
91+ logs = f"Pod { pod_name } in namespace { namespace } crashed unexpectedly."
92+
93+ try :
94+ diagnosis = ask_gpt4 (pod_name , logs )
95+ except Exception as e :
96+ logger .error (f"GPT call failed: { e } " )
97+ diagnosis = {
98+ "diagnosis" : "AI analysis failed" ,
99+ "root_cause" : str (e ),
100+ "fix" : "manual_review" ,
101+ "confidence" : 0.0
102+ }
103+
104+ confidence = diagnosis .get ('confidence' , 0.0 )
105+ fix_applied = False
106+
107+ send_slack (pod_name , diagnosis , fix_applied , confidence )
108+
109+ return jsonify ({"status" : "processed" }), 200
110+
111+
112+ if __name__ == '__main__' :
113+ app .run (host = '0.0.0.0' , port = 5010 )
0 commit comments