104 lines
3.5 KiB
Python
104 lines
3.5 KiB
Python
import os, sys, time, json, signal, logging, traceback
|
||
from typing import List
|
||
import requests
|
||
from azure.storage.queue import QueueClient
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format="%(asctime)s | %(levelname)s | %(message)s",
|
||
)
|
||
|
||
STOP = False
|
||
def _handle_stop(*_):
|
||
global STOP
|
||
STOP = True
|
||
signal.signal(signal.SIGTERM, _handle_stop)
|
||
signal.signal(signal.SIGINT, _handle_stop)
|
||
|
||
# --- config via env ---
|
||
AZURE_STORAGE_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
||
QUEUE_NAME = os.getenv("QUEUE_NAME", "log-chunks")
|
||
ANALYZER_URL = os.getenv("ANALYZER_URL", "http://agent-api:8080/analyze") # local compose default
|
||
POLL_INTERVAL_SEC = int(os.getenv("POLL_INTERVAL_SEC", "5"))
|
||
MAX_DEQUEUE = int(os.getenv("MAX_DEQUEUE", "16")) # up to 32
|
||
VISIBILITY_TIMEOUT = int(os.getenv("VISIBILITY_TIMEOUT", "120")) # seconds
|
||
HTTP_TIMEOUT = int(os.getenv("HTTP_TIMEOUT", "120")) # seconds
|
||
|
||
if not AZURE_STORAGE_CONNECTION_STRING:
|
||
logging.error("AZURE_STORAGE_CONNECTION_STRING missing")
|
||
sys.exit(1)
|
||
|
||
def process_message(text: str) -> bool:
|
||
"""
|
||
Returns True if handled successfully (and message should be deleted),
|
||
False otherwise (let it reappear for retry).
|
||
"""
|
||
try:
|
||
payload = json.loads(text)
|
||
except Exception:
|
||
logging.warning("Message is not valid JSON; ignoring: %s", text[:500])
|
||
return True # delete bad messages to avoid poison
|
||
|
||
try:
|
||
r = requests.post(ANALYZER_URL, json=payload, timeout=HTTP_TIMEOUT)
|
||
if r.status_code // 100 == 2:
|
||
logging.info("Analyzer OK: %s", r.text[:500])
|
||
return True
|
||
else:
|
||
logging.warning("Analyzer HTTP %s: %s", r.status_code, r.text[:500])
|
||
return False
|
||
except Exception as e:
|
||
logging.error("Analyzer call failed: %s", e)
|
||
return False
|
||
|
||
def main():
|
||
logging.info("queue-worker starting; queue=%s analyzer=%s", QUEUE_NAME, ANALYZER_URL)
|
||
q = QueueClient.from_connection_string(
|
||
conn_str=AZURE_STORAGE_CONNECTION_STRING,
|
||
queue_name=QUEUE_NAME,
|
||
)
|
||
# create queue if missing
|
||
try:
|
||
q.create_queue()
|
||
except Exception:
|
||
pass
|
||
|
||
while not STOP:
|
||
try:
|
||
msgs = list(q.receive_messages(
|
||
messages_per_page=MAX_DEQUEUE,
|
||
visibility_timeout=VISIBILITY_TIMEOUT
|
||
))
|
||
|
||
if not msgs:
|
||
time.sleep(POLL_INTERVAL_SEC)
|
||
continue
|
||
|
||
for m in msgs:
|
||
ok = False
|
||
try:
|
||
# In SDK v12, m.content is already base64-decoded text
|
||
ok = process_message(m.content)
|
||
except Exception as ex:
|
||
logging.error("Error processing message: %s\n%s", ex, traceback.format_exc())
|
||
ok = False
|
||
|
||
if ok:
|
||
try:
|
||
q.delete_message(m)
|
||
logging.info("Deleted message id=%s", m.id)
|
||
except Exception as de:
|
||
logging.warning("Delete failed (will reappear later): %s", de)
|
||
else:
|
||
# Don’t delete; it will become visible again after VISIBILITY_TIMEOUT
|
||
logging.info("Kept message for retry id=%s", m.id)
|
||
|
||
except Exception as loop_ex:
|
||
logging.error("Receive loop error: %s", loop_ex)
|
||
time.sleep(POLL_INTERVAL_SEC)
|
||
|
||
logging.info("queue-worker stopping gracefully")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|