intesa_splunk_main/api/flask_app.py

185 lines
6.5 KiB
Python

# flask_app.py
import os, tempfile, time, gzip, json, pathlib, uuid, datetime as dt
from typing import Optional
from flask import Flask, request, jsonify
from dotenv import load_dotenv
# Load .env locally (App Service uses App Settings instead)
load_dotenv(os.getenv("ENV_FILE", ".env"))
# Agent + email
from agent_runner import build_agent
from notify import send_email
# Azure SDKs (guarded imports so we don't crash at boot)
try:
from azure.storage.blob import BlobServiceClient, ContentSettings
except Exception:
BlobServiceClient = None
ContentSettings = None
try:
from azure.storage.queue import QueueClient
except Exception:
QueueClient = None
app = Flask(__name__)
# -------- Helpers --------
def _blob_client() -> BlobServiceClient:
if not BlobServiceClient:
raise RuntimeError("azure-storage-blob not installed")
cs = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
if not cs:
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
return BlobServiceClient.from_connection_string(cs)
def _queue_client() -> QueueClient:
if not QueueClient:
raise RuntimeError("azure-storage-queue not installed")
cs = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
if not cs:
raise RuntimeError("AZURE_STORAGE_CONNECTION_STRING not set")
qname = os.getenv("AZURE_STORAGE_QUEUE_NAME", "log-chunks")
qc = QueueClient.from_connection_string(cs, qname)
try:
qc.create_queue()
except Exception:
pass
return qc
def _upload_chunk_blob(container: str, raw_bytes: bytes, compressed: bool = True) -> str:
svc = _blob_client()
cc = svc.get_container_client(container)
try:
cc.create_container()
except Exception:
pass
ext = "jsonl.gz" if compressed else "jsonl"
# folder scheme matches poller
prefix = f"intesa/{dt.datetime.now(dt.timezone.utc).strftime('%Y/%m/%d/%H')}"
blob_name = f"{prefix}/hec_{uuid.uuid4().hex[:8]}.{ext}"
data = gzip.compress(raw_bytes) if compressed else raw_bytes
settings = ContentSettings(
content_type="application/json",
content_encoding=("gzip" if compressed else None),
)
bc = cc.get_blob_client(blob_name)
bc.upload_blob(data, overwrite=True, content_settings=settings)
return blob_name
def _download_blob_to_dir(container: str, blob_name: str, outdir: str) -> str:
svc = _blob_client()
blob = svc.get_blob_client(container=container, blob=blob_name)
data = blob.download_blob().readall()
fname = os.path.basename(blob_name)
path = os.path.join(outdir, fname)
with open(path, "wb") as f:
f.write(data)
return path
def _download_sas_to_dir(sas_url: str, outdir: str) -> str:
if not BlobServiceClient:
# ultra-light fallback
import urllib.request
data = urllib.request.urlopen(sas_url, timeout=30).read()
else:
from azure.storage.blob import BlobClient
blob = BlobClient.from_blob_url(sas_url)
data = blob.download_blob().readall()
name = "chunk_from_sas.jsonl.gz" if sas_url.endswith(".gz") else "chunk_from_sas.jsonl"
path = os.path.join(outdir, name)
open(path, "wb").write(data)
return path
# -------- Routes --------
@app.get("/health")
def health():
return {"status": "ok"}, 200
@app.post("/analyze")
def analyze():
"""
POST JSON:
{
"question": "...optional custom question...",
"email": {"send": true, "to": "override@example.com"},
"blob": {
"container": "bank-logs", "blob_name": "intesa/2025/09/26/..chunk.jsonl[.gz]"
// OR
"sas_url": "https://.../chunk.jsonl.gz?sig=..."
}
}
"""
t0 = time.time()
payload = request.get_json(force=True, silent=True) or {}
question = payload.get("question") or (
"Scan the latest chunks. List any anomalies (rejected EUR >= 10000, vop_no_match, invalid IBAN/BIC). "
"Give a brief summary and next steps."
)
prev_chunk_dir = os.getenv("CHUNK_DIR", "./out")
tmp_dir = None
try:
blob_req = payload.get("blob")
if blob_req:
tmp_dir = tempfile.mkdtemp(prefix="agent_blob_")
if blob_req.get("sas_url"):
_download_sas_to_dir(blob_req["sas_url"], tmp_dir)
elif blob_req.get("container") and blob_req.get("blob_name"):
_download_blob_to_dir(blob_req["container"], blob_req["blob_name"], tmp_dir)
else:
return jsonify({"ok": False, "error": "blob requires sas_url OR (container + blob_name)"}), 400
os.environ["CHUNK_DIR"] = tmp_dir
agent = build_agent()
out = agent.invoke({"input": question, "chat_history": []})
result = out.get("output", "")
email_cfg = payload.get("email") or {}
if email_cfg.get("send"):
to_addr = email_cfg.get("to")
send_email(subject="[Intesa Logs] Agent Report", body_text=result, to_addr=to_addr)
return jsonify({"ok": True, "duration_sec": round(time.time() - t0, 3), "result": result}), 200
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500
finally:
os.environ["CHUNK_DIR"] = prev_chunk_dir
# HEC-style collector -> write one-line JSONL blob to Storage, enqueue message for worker, return 200 OK (like Splunk HEC)
@app.post("/collect")
@app.post("/services/collector/event") # alias for Splunk HEC curl compatibility
def collect_hec():
try:
container = os.getenv("AZURE_STORAGE_CONTAINER", "bank-logs")
# Accept either single JSON object or a list; we will write one line per event
body = request.get_json(force=True, silent=True)
if body is None:
return jsonify({"ok": False, "error": "invalid JSON"}), 400
lines = []
if isinstance(body, list):
for item in body:
lines.append(json.dumps(item, separators=(",", ":")))
else:
lines.append(json.dumps(body, separators=(",", ":")))
raw = ("\n".join(lines) + "\n").encode("utf-8")
blob_name = _upload_chunk_blob(container, raw, compressed=True)
# Enqueue a message your queue-worker understands
msg = {
"blob": {"container": container, "blob_name": blob_name},
# flip to true if you want emails by default
"email": {"send": False}
}
qc = _queue_client()
qc.send_message(json.dumps(msg, separators=(",", ":")))
return jsonify({"ok": True, "queued": True, "blob_name": blob_name}), 200
except Exception as e:
return jsonify({"ok": False, "error": str(e)}), 500