# services/llm/openai_service.py """ OpenAI service implementation """ from openai import OpenAI from typing import Dict, List import json from .base import LLMService from config.api_keys import APIKeyManager from utils.logger import setup_logger class OpenAIService(LLMService): def __init__( self, model: str = "gpt-4o-mini", temperature: float = 0.3, max_tokens: int = 16000, ): api_key = APIKeyManager.get_openai_key() if not api_key: raise ValueError( "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable." ) self.client = OpenAI(api_key=api_key) self.model = model self.temperature = temperature self.max_tokens = max_tokens self.logger = setup_logger("openai") def generate_text(self, prompt: str) -> str: self.logger.info(f"--- PROMPT ---\n{prompt}") try: response = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], temperature=self.temperature, max_tokens=self.max_tokens, ) response_content = response.choices[0].message.content self.logger.info(f"--- RESPONSE ---\n{response_content}") return response_content except Exception as e: self.logger.error(f"Error in OpenAI API call: {e}") print(f"Error in OpenAI API call: {e}") return None def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: system_prompt = ( "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. " "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting." ) request_payload = json.dumps(texts_pairs) try: response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": request_payload}, ], temperature=self.temperature, max_tokens=self.max_tokens, ) response_content = response.choices[0].message.content cleaned_response = response_content.strip().strip("'```json").strip("```") try: scores = json.loads(cleaned_response) if isinstance(scores, dict) and "similarity_scores" in scores: return scores["similarity_scores"] elif isinstance(scores, list): return scores else: raise ValueError("Unexpected response format") except json.JSONDecodeError: raise ValueError("Could not decode response as JSON") except Exception as e: print(f"Error in OpenAI similarity calculation: {e}") return None