""" LLM Client Abstraction Layer Supports multiple LLM providers with easy swapping """ from abc import ABC, abstractmethod from typing import List, Dict, Optional import requests import json import os class LLMClient(ABC): """Abstract base class for LLM clients""" @abstractmethod def chat(self, messages: List[Dict[str, str]], context: Optional[str] = None) -> str: """ Send messages to LLM and get response Args: messages: List of {"role": "user"/"assistant", "content": "text"} context: Optional document context to include Returns: LLM response text """ pass @abstractmethod def is_available(self) -> bool: """Check if LLM service is available""" pass class OllamaClient(LLMClient): """Ollama LLM client (local deployment)""" def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3.2", timeout: int = 120): """ Initialize Ollama client Args: base_url: Ollama server URL model: Model name (e.g., llama3.2, mistral, codellama) timeout: Request timeout in seconds """ self.base_url = base_url.rstrip('/') self.model = model self.timeout = timeout def chat(self, messages: List[Dict[str, str]], context: Optional[str] = None) -> str: """Send chat request to Ollama""" # Build prompt with context if provided formatted_messages = [] if context: # Add system message with document context formatted_messages.append({ "role": "system", "content": f"You are a helpful assistant. Use the following document content to answer questions:\n\n{context}" }) # Add conversation history formatted_messages.extend(messages) # Call Ollama API try: response = requests.post( f"{self.base_url}/api/chat", json={ "model": self.model, "messages": formatted_messages, "stream": False }, timeout=self.timeout ) response.raise_for_status() result = response.json() return result["message"]["content"] except requests.exceptions.RequestException as e: raise Exception(f"Ollama API error: {str(e)}") def chat_stream(self, messages: List[Dict[str, str]], context: Optional[str] = None): """Send chat request to Ollama with streaming response""" # Build prompt with context if provided formatted_messages = [] if context: # Add system message with document context formatted_messages.append({ "role": "system", "content": f"You are a helpful assistant. Use the following document content to answer questions:\n\n{context}" }) # Add conversation history formatted_messages.extend(messages) # Call Ollama API with streaming try: response = requests.post( f"{self.base_url}/api/chat", json={ "model": self.model, "messages": formatted_messages, "stream": True }, timeout=self.timeout, stream=True ) response.raise_for_status() # Yield chunks as they arrive for line in response.iter_lines(): if line: try: chunk = json.loads(line) if "message" in chunk and "content" in chunk["message"]: yield chunk["message"]["content"] if chunk.get("done", False): break except json.JSONDecodeError: continue except requests.exceptions.RequestException as e: raise Exception(f"Ollama API error: {str(e)}") def is_available(self) -> bool: """Check if Ollama is running""" try: response = requests.get(f"{self.base_url}/api/tags", timeout=5) return response.status_code == 200 except: return False class OpenAIClient(LLMClient): """OpenAI API client (for easy swapping)""" def __init__(self, api_key: str, model: str = "gpt-4", timeout: int = 120): """ Initialize OpenAI client Args: api_key: OpenAI API key model: Model name (e.g., gpt-4, gpt-3.5-turbo) timeout: Request timeout in seconds """ self.api_key = api_key self.model = model self.timeout = timeout def chat(self, messages: List[Dict[str, str]], context: Optional[str] = None) -> str: """Send chat request to OpenAI""" formatted_messages = [] if context: formatted_messages.append({ "role": "system", "content": f"You are a helpful assistant. Use the following document content to answer questions:\n\n{context}" }) formatted_messages.extend(messages) try: response = requests.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, json={ "model": self.model, "messages": formatted_messages }, timeout=self.timeout ) response.raise_for_status() result = response.json() return result["choices"][0]["message"]["content"] except requests.exceptions.RequestException as e: raise Exception(f"OpenAI API error: {str(e)}") def is_available(self) -> bool: """Check if API key is valid""" try: response = requests.get( "https://api.openai.com/v1/models", headers={"Authorization": f"Bearer {self.api_key}"}, timeout=5 ) return response.status_code == 200 except: return False class AnthropicClient(LLMClient): """Anthropic Claude API client (for easy swapping)""" def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022", timeout: int = 120): """ Initialize Anthropic client Args: api_key: Anthropic API key model: Model name timeout: Request timeout in seconds """ self.api_key = api_key self.model = model self.timeout = timeout def chat(self, messages: List[Dict[str, str]], context: Optional[str] = None) -> str: """Send chat request to Anthropic""" formatted_messages = [] system_prompt = None if context: system_prompt = f"You are a helpful assistant. Use the following document content to answer questions:\n\n{context}" formatted_messages.extend(messages) try: payload = { "model": self.model, "messages": formatted_messages, "max_tokens": 4096 } if system_prompt: payload["system"] = system_prompt response = requests.post( "https://api.anthropic.com/v1/messages", headers={ "x-api-key": self.api_key, "anthropic-version": "2023-06-01", "Content-Type": "application/json" }, json=payload, timeout=self.timeout ) response.raise_for_status() result = response.json() return result["content"][0]["text"] except requests.exceptions.RequestException as e: raise Exception(f"Anthropic API error: {str(e)}") def is_available(self) -> bool: """Check if API key is valid""" # Anthropic doesn't have a simple health check, so we assume it's available return len(self.api_key) > 0 def create_llm_client(provider: str = "ollama", **kwargs) -> LLMClient: """ Factory function to create LLM client Args: provider: LLM provider name ("ollama", "openai", "anthropic") **kwargs: Provider-specific configuration Returns: LLMClient instance Example: # Ollama (default) client = create_llm_client("ollama", model="llama3.2") # OpenAI client = create_llm_client("openai", api_key="sk-...", model="gpt-4") # Anthropic client = create_llm_client("anthropic", api_key="sk-ant-...", model="claude-3-5-sonnet-20241022") """ if provider.lower() == "ollama": return OllamaClient( base_url=kwargs.get("base_url", os.getenv("OLLAMA_URL", "http://localhost:11434")), model=kwargs.get("model", os.getenv("OLLAMA_MODEL", "llama3.2")), timeout=kwargs.get("timeout", 120) ) elif provider.lower() == "openai": api_key = kwargs.get("api_key", os.getenv("OPENAI_API_KEY")) if not api_key: raise ValueError("OpenAI API key required") return OpenAIClient( api_key=api_key, model=kwargs.get("model", "gpt-4"), timeout=kwargs.get("timeout", 120) ) elif provider.lower() == "anthropic": api_key = kwargs.get("api_key", os.getenv("ANTHROPIC_API_KEY")) if not api_key: raise ValueError("Anthropic API key required") return AnthropicClient( api_key=api_key, model=kwargs.get("model", "claude-3-5-sonnet-20241022"), timeout=kwargs.get("timeout", 120) ) else: raise ValueError(f"Unknown LLM provider: {provider}")