Agregado de librerias

2025-02-09 13:22:25 +01:00 · 2025-02-09 13:22:25 +01:00 · 6fca251249
parent caa983c8da
commit 6fca251249
22 changed files with 1313 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,27 @@
 # Python cache files
 __pycache__/
 *.py[cod]
 # Environment directories
 venv/
 env/
 .env/
 # IDE configurations
 .vscode/
 .idea/
 # Logs and data files
 data/log.txt
 data/data.json
 # Allow script groups and their configurations
 !backend/script_groups/
 !backend/script_groups/*/
 !backend/script_groups/*/*.py
 !backend/script_groups/*/schema.json
 !backend/script_groups/*/esquema.json
 !backend/script_groups/*/description.json
 # But ignore working directory configurations
 backend/script_groups/*/work_dir.json
--- a/commands.sh
+++ b/commands.sh
@ -0,0 +1,3 @@
 # Crear .gitkeep en cada directorio de script grupo
 mkdir -p backend/script_groups/example_group
 touch backend/script_groups/example_group/.gitkeep
--- a/services/excel/excel_service.py
+++ b/services/excel/excel_service.py
@ -0,0 +1,241 @@
 # services/excel/excel_service.py
 """
 Excel file handling service with retry and formatting capabilities
 """
 import pandas as pd
 import time
 from typing import Optional, Union, Dict, Any
 from pathlib import Path
 import openpyxl
 from openpyxl.utils import get_column_letter
 from openpyxl.styles import PatternFill, Alignment, Font
 from openpyxl.worksheet.worksheet import Worksheet
 class ExcelService:
    """Service for handling Excel files with advanced features"""
    def __init__(self, max_retries: int = 5, retry_delay: int = 5):
        self.max_retries = max_retries
        self.retry_delay = retry_delay
    def read_excel(
        self,
        file_path: Union[str, Path],
        sheet_name: str = "Sheet1",
        **kwargs
    ) -> pd.DataFrame:
        """
        Read Excel file with retries and cleanup
        Args:
            file_path: Path to Excel file
            sheet_name: Name of sheet to read
            **kwargs: Additional arguments for pd.read_excel
        Returns:
            DataFrame with the Excel content
        """
        retries = 0
        while retries < self.max_retries:
            try:
                # Intentar leer el archivo con openpyxl
                df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs)
                # Limpiar caracteres especiales y normalizar saltos de línea
                for col in df.columns:
                    df[col] = df[col].apply(
                        lambda x: self._clean_special_chars(x) if pd.notna(x) else x
                    )
                print(f"Archivo leído y limpiado exitosamente: {file_path}")
                return df
            except ValueError as ve:
                if "must be either numerical or a string containing a wildcard" in str(ve):
                    print(f"Error al leer el archivo: {ve}")
                    print("Intentando eliminar filtros y leer el archivo nuevamente...")
                    try:
                        # Cargar el libro de trabajo
                        wb = openpyxl.load_workbook(filename=file_path)
                        sheet = wb.active
                        # Eliminar filtros si existen
                        if sheet.auto_filter:
                            sheet.auto_filter.ref = None
                        # Guardar el archivo temporalmente sin filtros
                        temp_file = str(file_path) + "_temp.xlsx"
                        wb.save(temp_file)
                        # Leer el archivo temporal
                        df = pd.read_excel(temp_file, engine="openpyxl", **kwargs)
                        # Eliminar el archivo temporal
                        Path(temp_file).unlink()
                        return df
                    except Exception as e:
                        print(f"Error al intentar eliminar filtros y leer el archivo: {e}")
                else:
                    print(f"Error de valor: {ve}")
            except PermissionError as e:
                print(
                    f"Error de permiso: {e}. Por favor cierre el archivo. "
                    f"Reintentando en {self.retry_delay} segundos..."
                )
            except Exception as e:
                print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
            retries += 1
            time.sleep(self.retry_delay)
        raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.")
    def save_excel(
        self,
        df: pd.DataFrame,
        file_path: Union[str, Path],
        sheet_name: str = "Sheet1",
        format_options: Optional[Dict[str, Any]] = None,
        **kwargs
    ) -> None:
        """
        Save DataFrame to Excel with formatting
        Args:
            df: DataFrame to save
            file_path: Path to save Excel file
            sheet_name: Name of sheet
            format_options: Dictionary with formatting options
            **kwargs: Additional arguments for pd.to_excel
        """
        if format_options is None:
            format_options = {}
        retries = 0
        while retries < self.max_retries:
            try:
                with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
                    # Save DataFrame
                    df.to_excel(writer, sheet_name=sheet_name, index=False)
                    # Apply formatting
                    self._format_worksheet(
                        writer.sheets[sheet_name],
                        format_options
                    )
                print(f"Archivo guardado exitosamente en: {file_path}")
                return
            except PermissionError as e:
                print(
                    f"Error de permiso: {e}. Por favor cierre el archivo. "
                    f"Reintentando en {self.retry_delay} segundos..."
                )
            except Exception as e:
                print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
            retries += 1
            time.sleep(self.retry_delay)
        raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.")
    def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None:
        """
        Apply formatting to worksheet
        Args:
            worksheet: Worksheet to format
            options: Formatting options
        """
        # Freeze panes if specified
        freeze_row = options.get('freeze_row', 2)
        freeze_col = options.get('freeze_col', 1)
        if freeze_row or freeze_col:
            freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}"
            worksheet.freeze_panes = freeze_cell
        # Auto-adjust column widths
        max_width = options.get('max_column_width', 50)
        min_width = options.get('min_column_width', 8)
        wrap_threshold = options.get('wrap_threshold', 50)
        for col in worksheet.columns:
            max_length = 0
            column = col[0].column_letter
            for cell in col:
                try:
                    if cell.value:
                        text_length = len(str(cell.value))
                        if text_length > wrap_threshold:
                            cell.alignment = Alignment(wrap_text=True, vertical='top')
                            text_length = min(
                                wrap_threshold,
                                max(len(word) for word in str(cell.value).split())
                            )
                        max_length = max(max_length, text_length)
                except:
                    pass
            adjusted_width = min(max_width, max(min_width, max_length + 2))
            worksheet.column_dimensions[column].width = adjusted_width
        # Apply custom styles
        header_row = options.get('header_row', 1)
        if header_row:
            header_fill = PatternFill(
                start_color=options.get('header_color', 'F2F2F2'),
                end_color=options.get('header_color', 'F2F2F2'),
                fill_type='solid'
            )
            header_font = Font(bold=True)
            for cell in worksheet[header_row]:
                cell.fill = header_fill
                cell.font = header_font
    def _clean_special_chars(self, text: Any) -> Any:
        """Clean special characters and normalize line breaks"""
        if isinstance(text, str):
            # Normalize line breaks
            text = text.replace('\r\n', '\n').replace('\r', '\n')
            # Replace other special characters if needed
            return text
        return text
 # Example usage:
 """
 from services.excel.excel_service import ExcelService
 # Create service
 excel_service = ExcelService()
 # Read Excel file
 try:
    df = excel_service.read_excel("input.xlsx")
    print("Data loaded successfully")
    # Modify data...
    # Save with formatting
    format_options = {
        'freeze_row': 2,
        'freeze_col': 1,
        'max_column_width': 50,
        'min_column_width': 8,
        'wrap_threshold': 50,
        'header_color': 'E6E6E6'
    }
    excel_service.save_excel(
        df,
        "output.xlsx",
        format_options=format_options
    )
 except Exception as e:
    print(f"Error handling Excel file: {e}")
 """
--- a/services/language/base.py
+++ b/services/language/base.py
@ -0,0 +1,25 @@
 # services/language/base.py
 """
 Base class for language detection services
 """
 from abc import ABC, abstractmethod
 from typing import Optional, List, Dict, Tuple
 class LanguageDetectionService(ABC):
    """Abstract base class for language detection services"""
    @abstractmethod
    def detect_language(self, text: str) -> Tuple[str, float]:
        """
        Detect language of a text
        Returns: Tuple of (language_code, confidence_score)
        """
        pass
    @abstractmethod
    def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
        """
        Detect language of multiple texts
        Returns: List of tuples (language_code, confidence_score)
        """
        pass
--- a/services/language/langid_service.py
+++ b/services/language/langid_service.py
@ -0,0 +1,52 @@
 # services/language/langid_service.py
 """
 Language detection service using langid
 """
 from typing import List, Tuple, Optional, Set
 import langid
 from .base import LanguageDetectionService
 class LangIdService(LanguageDetectionService):
    def __init__(self, allowed_languages: Optional[Set[str]] = None):
        """
        Initialize langid service
        Args:
            allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'})
                             If None, all languages supported by langid will be allowed
        """
        if allowed_languages:
            langid.set_languages(list(allowed_languages))
        self.allowed_languages = allowed_languages
    def detect_language(self, text: str) -> Tuple[str, float]:
        """
        Detect language of a text using langid
        Args:
            text: Text to analyze
        Returns:
            Tuple of (language_code, confidence_score)
        """
        try:
            if not text or len(text.strip()) < 3:
                return ("unknown", 0.0)
            lang, score = langid.classify(text.strip())
            return (lang, score)
        except Exception as e:
            print(f"Error in language detection: {e}")
            return ("unknown", 0.0)
    def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
        """
        Detect language of multiple texts
        Args:
            texts: List of texts to analyze
        Returns:
            List of tuples (language_code, confidence_score)
        """
        return [self.detect_language(text) for text in texts]
--- a/services/language/language_factory.py
+++ b/services/language/language_factory.py
@ -0,0 +1,33 @@
 # services/language/language_factory.py
 """
 Factory class for creating language detection services
 """
 from typing import Optional, Set
 from .langid_service import LangIdService
 class LanguageFactory:
    """Factory class for creating language detection service instances"""
    @staticmethod
    def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']:
        """
        Create an instance of the specified language detection service
        Args:
            service_type: Type of language detection service ("langid", etc.)
            allowed_languages: Set of allowed language codes
            **kwargs: Additional arguments for service initialization
        Returns:
            LanguageDetectionService instance or None if service_type is not recognized
        """
        services = {
            "langid": LangIdService,
            # Add other language detection services here
        }
        service_class = services.get(service_type.lower())
        if service_class:
            return service_class(allowed_languages=allowed_languages, **kwargs)
        else:
            raise ValueError(f"Unknown language detection service type: {service_type}")
--- a/services/language/language_utils.py
+++ b/services/language/language_utils.py
@ -0,0 +1,68 @@
 # services/language/language_utils.py
 """
 Utility functions for language detection and validation
 """
 from typing import Dict, Set
 class LanguageUtils:
    # Common language codes
    LANGUAGE_CODES = {
        'it': ('Italian', 'it-IT'),
        'en': ('English', 'en-GB'),
        'pt': ('Portuguese', 'pt-PT'),
        'es': ('Spanish', 'es-ES'),
        'ru': ('Russian', 'ru-RU'),
        'fr': ('French', 'fr-FR'),
        'de': ('German', 'de-DE'),
        'tr': ('Turkish', 'tr-TR'),
    }
    @classmethod
    def get_language_name(cls, code: str) -> str:
        """Get full language name from code"""
        return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0]
    @classmethod
    def get_full_code(cls, short_code: str) -> str:
        """Get full language code (e.g., 'en-GB' from 'en')"""
        return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1]
    @classmethod
    def get_short_code(cls, full_code: str) -> str:
        """Get short language code (e.g., 'en' from 'en-GB')"""
        return full_code.split('-')[0] if '-' in full_code else full_code
    @classmethod
    def is_valid_language(cls, code: str) -> bool:
        """Check if a language code is valid"""
        short_code = cls.get_short_code(code)
        return short_code in cls.LANGUAGE_CODES
    @classmethod
    def get_available_languages(cls) -> Set[str]:
        """Get set of available language codes"""
        return set(cls.LANGUAGE_CODES.keys())
 # Example usage:
 """
 from services.language.language_factory import LanguageFactory
 from services.language.language_utils import LanguageUtils
 # Create language detection service with specific languages
 allowed_languages = LanguageUtils.get_available_languages()
 detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages)
 # Detect language of a text
 text = "Hello, how are you?"
 lang, confidence = detector.detect_language(text)
 print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})")
 print(f"Confidence: {confidence}")
 # Detect language of multiple texts
 texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"]
 results = detector.detect_batch(texts)
 for text, (lang, confidence) in zip(texts, results):
    print(f"Text: {text}")
    print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})")
    print(f"Confidence: {confidence}")
 """
--- a/services/llm/base.py
+++ b/services/llm/base.py
@ -0,0 +1,20 @@
 # services/llm/base.py
 """
 Base class for LLM services
 """
 from abc import ABC, abstractmethod
 import json
 from typing import List, Union, Dict, Any
 class LLMService(ABC):
    """Abstract base class for LLM services"""
    @abstractmethod
    def generate_text(self, prompt: str) -> str:
        """Generate text based on a prompt"""
        pass
    @abstractmethod
    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
        """Calculate similarity scores for pairs of texts"""
        pass
--- a/services/llm/batch_processor.py
+++ b/services/llm/batch_processor.py
@ -0,0 +1,228 @@
 # services/llm/batch_processor.py
 """
 Batch processing service for LLM operations
 """
 from typing import List, Dict, Any, Optional, Callable
 import json
 from dataclasses import dataclass
 import time
 from .base import LLMService
 from utils.progress_bar import ProgressBar
@dataclass
 class BatchConfig:
    """Configuration for batch processing"""
    batch_size: int = 20
    max_retries: int = 3
    retry_delay: int = 3
    progress_callback: Optional[Callable[[str], None]] = None
 class BatchProcessor:
    """
    Handles batch processing for LLM operations
    """
    def __init__(
        self,
        llm_service: LLMService,
        config: Optional[BatchConfig] = None
    ):
        self.llm_service = llm_service
        self.config = config or BatchConfig()
    def process_batch(
        self,
        items: List[Dict[str, Any]],
        system_prompt: str,
        template: str,
        output_processor: Optional[Callable] = None
    ) -> List[Any]:
        """
        Process items in batches with consistent context
        Args:
            items: List of dictionaries containing data to process
            system_prompt: System prompt for context
            template: Template string for formatting requests
            output_processor: Optional function to process LLM responses
        Returns:
            List of processed results
        """
        results = []
        total_items = len(items)
        # Setup progress tracking
        progress = ProgressBar(
            total_items,
            "Processing batches:",
            "Complete"
        )
        if self.config.progress_callback:
            progress.set_output_callback(self.config.progress_callback)
        progress.start()
        # Process in batches
        for start_idx in range(0, total_items, self.config.batch_size):
            end_idx = min(start_idx + self.config.batch_size, total_items)
            batch_items = items[start_idx:end_idx]
            # Prepare batch request
            batch_data = {
                "items": batch_items,
                "template": template
            }
            request_payload = json.dumps(batch_data)
            # Process batch with retries
            for attempt in range(self.config.max_retries):
                try:
                    response = self.llm_service.generate_text(
                        system_prompt=system_prompt,
                        user_prompt=request_payload
                    )
                    # Parse and process response
                    batch_results = self._process_response(
                        response,
                        output_processor
                    )
                    if len(batch_results) != len(batch_items):
                        raise ValueError(
                            "Response count doesn't match input count"
                        )
                    results.extend(batch_results)
                    break
                except Exception as e:
                    if attempt < self.config.max_retries - 1:
                        if self.config.progress_callback:
                            self.config.progress_callback(
                                f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."
                            )
                        time.sleep(self.config.retry_delay)
                    else:
                        if self.config.progress_callback:
                            self.config.progress_callback(
                                f"Error in batch {start_idx}-{end_idx}: {e}"
                            )
                        # On final retry failure, add None results
                        results.extend([None] * len(batch_items))
            # Update progress
            progress.update(end_idx)
        progress.finish()
        return results
    def _process_response(
        self,
        response: str,
        output_processor: Optional[Callable] = None
    ) -> List[Any]:
        """Process LLM response"""
        try:
            # Parse JSON response
            parsed = json.loads(response)
            # Apply custom processing if provided
            if output_processor:
                return [output_processor(item) for item in parsed]
            return parsed
        except json.JSONDecodeError:
            raise ValueError("Failed to parse LLM response as JSON")
 # Example specialized batch processor for translations
 class TranslationBatchProcessor(BatchProcessor):
    """Specialized batch processor for translations"""
    def translate_batch(
        self,
        texts: List[str],
        source_lang: str,
        target_lang: str
    ) -> List[str]:
        """
        Translate a batch of texts
        Args:
            texts: List of texts to translate
            source_lang: Source language code
            target_lang: Target language code
        Returns:
            List of translated texts
        """
        # Prepare items
        items = [{"text": text} for text in texts]
        # Setup prompts
        system_prompt = (
            "You are a translator. Translate the provided texts "
            "maintaining special fields like <> and <#>."
        )
        template = (
            "Translate the following texts from {source_lang} to {target_lang}. "
            "Return translations as a JSON array of strings:"
            "\n\n{text}"
        )
        # Process batch
        results = self.process_batch(
            items=items,
            system_prompt=system_prompt,
            template=template.format(
                source_lang=source_lang,
                target_lang=target_lang
            )
        )
        return results
 # Example usage:
 """
 from services.llm.llm_factory import LLMFactory
 from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor
 # Create LLM service
 llm_service = LLMFactory.create_service("openai")
 # Setup batch processor with progress callback
 def progress_callback(message: str):
    print(message)
 config = BatchConfig(
    batch_size=20,
    progress_callback=progress_callback
 )
 # General batch processor
 processor = BatchProcessor(llm_service, config)
 # Example batch process for custom task
 items = [
    {"text": "Hello", "context": "greeting"},
    {"text": "Goodbye", "context": "farewell"}
 ]
 system_prompt = "You are a helpful assistant."
 template = "Process these items considering their context: {items}"
 results = processor.process_batch(
    items=items,
    system_prompt=system_prompt,
    template=template
 )
 # Example translation batch
 translator = TranslationBatchProcessor(llm_service, config)
 texts = ["Hello world", "How are you?"]
 translations = translator.translate_batch(
    texts=texts,
    source_lang="en",
    target_lang="es"
 )
 """
--- a/services/llm/grok_service.py
+++ b/services/llm/grok_service.py
@ -0,0 +1,63 @@
 # services/llm/grok_service.py
 """
 Grok service implementation
 """
 from typing import Dict, List, Optional
 import json
 from .base import LLMService
 from config.api_keys import APIKeyManager
 class GrokService(LLMService):
    def __init__(self, model: str = "grok-1", temperature: float = 0.3):
        api_key = APIKeyManager.get_grok_key()
        if not api_key:
            raise ValueError("Grok API key not found. Please set up your API keys.")
        self.api_key = api_key
        self.model = model
        self.temperature = temperature
    def generate_text(self, prompt: str) -> str:
        """
        Generate text using the Grok API
        TODO: Update this method when Grok API is available
        """
        try:
            # Placeholder for Grok API implementation
            # Update this when the API is released
            raise NotImplementedError("Grok API is not implemented yet")
        except Exception as e:
            print(f"Error in Grok API call: {e}")
            return None
    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
        """
        Calculate similarity scores using the Grok API
        TODO: Update this method when Grok API is available
        """
        try:
            system_prompt = (
                "Evaluate the semantic similarity between the following table of pairs of texts "
                "in json format on a scale from 0 to 1. Return the similarity scores for every "
                "row in JSON format as a list of numbers, without any additional text or formatting."
            )
            request_payload = json.dumps(texts_pairs)
            # Placeholder for Grok API implementation
            # Update this when the API is released
            raise NotImplementedError("Grok API is not implemented yet")
        except Exception as e:
            print(f"Error in Grok similarity calculation: {e}")
            return None
 # Update config/api_keys.py to include Grok
@classmethod
 def get_grok_key(cls) -> Optional[str]:
    """Get Grok API key from environment or stored configuration"""
    return (
        os.getenv('GROK_API_KEY') or 
        cls._get_stored_key('grok')
    )
--- a/services/llm/llm_factory.py
+++ b/services/llm/llm_factory.py
@ -0,0 +1,33 @@
 # services/llm/llm_factory.py
 """
 Factory class for creating LLM services
 """
 from typing import Optional
 from .openai_service import OpenAIService
 from .ollama_service import OllamaService
 from .grok_service import GrokService
 class LLMFactory:
    """Factory class for creating LLM service instances"""
    @staticmethod
    def create_service(service_type: str, **kwargs) -> Optional['LLMService']:
        """
        Create an instance of the specified LLM service
        Args:
            service_type: Type of LLM service ("openai", "ollama", "grok")
            **kwargs: Additional arguments for service initialization
        """
        services = {
            "openai": OpenAIService,
            "ollama": OllamaService,
            "grok": GrokService
        }
        service_class = services.get(service_type.lower())
        if service_class:
            return service_class(**kwargs)
        else:
            print(f"Unknown service type: {service_type}")
            return None
--- a/services/llm/ollama_service.py
+++ b/services/llm/ollama_service.py
@ -0,0 +1,53 @@
 # services/llm/ollama_service.py
 """
 Ollama service implementation
 """
 import ollama
 import json
 from typing import Dict, List
 from .base import LLMService
 class OllamaService(LLMService):
    def __init__(self, model: str = "llama3.1"):
        self.model = model
    def generate_text(self, prompt: str) -> str:
        try:
            response = ollama.generate(
                model=self.model,
                prompt=prompt
            )
            return response["response"]
        except Exception as e:
            print(f"Error in Ollama API call: {e}")
            return None
    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
        system_prompt = (
            "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
            "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
        )
        request_payload = json.dumps(texts_pairs)
        prompt = f"{system_prompt}\n\n{request_payload}"
        try:
            response = ollama.generate(
                model=self.model,
                prompt=prompt
            )
            try:
                scores = json.loads(response["response"].strip())
                if isinstance(scores, dict) and "similarity_scores" in scores:
                    return scores["similarity_scores"]
                elif isinstance(scores, list):
                    return scores
                else:
                    raise ValueError("Unexpected response format")
            except json.JSONDecodeError:
                raise ValueError("Could not decode response as JSON")
        except Exception as e:
            print(f"Error in Ollama similarity calculation: {e}")
            return None
--- a/services/llm/openai_service.py
+++ b/services/llm/openai_service.py
@ -0,0 +1,69 @@
 # services/llm/openai_service.py
 """
 OpenAI service implementation
 """
 from openai import OpenAI
 from typing import Dict, List
 import json
 from .base import LLMService
 from config.api_keys import APIKeyManager
 class OpenAIService(LLMService):
    def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3):
        api_key = APIKeyManager.get_openai_key()
        if not api_key:
            raise ValueError("OpenAI API key not found. Please set up your API keys.")
        self.client = OpenAI(api_key=api_key)
        self.model = model
        self.temperature = temperature
    def generate_text(self, prompt: str) -> str:
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=self.temperature,
                max_tokens=1500
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error in OpenAI API call: {e}")
            return None
    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
        system_prompt = (
            "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
            "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
        )
        request_payload = json.dumps(texts_pairs)
        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": request_payload}
                ],
                temperature=self.temperature,
                max_tokens=1500
            )
            response_content = response.choices[0].message.content
            cleaned_response = response_content.strip().strip("'```json").strip("```")
            try:
                scores = json.loads(cleaned_response)
                if isinstance(scores, dict) and "similarity_scores" in scores:
                    return scores["similarity_scores"]
                elif isinstance(scores, list):
                    return scores
                else:
                    raise ValueError("Unexpected response format")
            except json.JSONDecodeError:
                raise ValueError("Could not decode response as JSON")
        except Exception as e:
            print(f"Error in OpenAI similarity calculation: {e}")
            return None
--- a/services/translation/base.py
+++ b/services/translation/base.py
@ -0,0 +1,19 @@
 # services/translation/base.py
 """
 Base class for translation services
 """
 from abc import ABC, abstractmethod
 from typing import Optional, List, Dict
 class TranslationService(ABC):
    """Abstract base class for translation services"""
    @abstractmethod
    def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
        """Translate a single text"""
        pass
    @abstractmethod
    def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
        """Translate a batch of texts"""
        pass
--- a/services/translation/google_translate.py
+++ b/services/translation/google_translate.py
@ -0,0 +1,77 @@
 # services/translation/google_translate.py
 """
 Google Translation service implementation
 """
 from typing import Optional, List
 import html
 from google.cloud import translate_v2 as translate
 from google.oauth2 import service_account
 from config.api_keys import APIKeyManager
 from .base import TranslationService
 class GoogleTranslateService(TranslationService):
    def __init__(self, credentials_file: Optional[str] = None):
        """
        Initialize Google Translate service
        Args:
            credentials_file: Path to Google service account credentials JSON file.
                            If None, will use API key from APIKeyManager.
        """
        if credentials_file:
            # Use service account credentials
            try:
                credentials = service_account.Credentials.from_service_account_file(credentials_file)
                self.client = translate.Client(credentials=credentials)
            except Exception as e:
                raise ValueError(f"Error initializing Google Translate with credentials: {e}")
        else:
            # Use API key
            api_key = APIKeyManager.get_google_key()
            if not api_key:
                raise ValueError("Google API key not found. Please set up your API keys.")
            self.client = translate.Client()
    def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
        """
        Translate a single text.
        Args:
            text: Text to translate
            target_language: Target language code (e.g., 'es' for Spanish)
            source_language: Source language code. If None, will be auto-detected.
        Returns:
            Translated text
        """
        try:
            result = self.client.translate(
                text,
                target_language=target_language,
                source_language=source_language
            )
            return html.unescape(result["translatedText"])
        except Exception as e:
            raise ValueError(f"Error in Google Translate: {e}")
    def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
        """
        Translate multiple texts in batch.
        Args:
            texts: List of texts to translate
            target_language: Target language code (e.g., 'es' for Spanish)
            source_language: Source language code. If None, will be auto-detected.
        Returns:
            List of translated texts
        """
        try:
            results = self.client.translate(
                texts,
                target_language=target_language,
                source_language=source_language
            )
            return [html.unescape(result["translatedText"]) for result in results]
        except Exception as e:
            raise ValueError(f"Error in Google Translate batch: {e}")
--- a/services/translation/translation_factory.py
+++ b/services/translation/translation_factory.py
@ -0,0 +1,32 @@
 # services/translation/translation_factory.py
 """
 Factory class for creating translation services
 """
 from typing import Optional
 from .google_translate import GoogleTranslateService
 class TranslationFactory:
    """Factory class for creating translation service instances"""
    @staticmethod
    def create_service(service_type: str, **kwargs) -> Optional['TranslationService']:
        """
        Create an instance of the specified translation service
        Args:
            service_type: Type of translation service ("google", etc.)
            **kwargs: Additional arguments for service initialization
        Returns:
            TranslationService instance or None if service_type is not recognized
        """
        services = {
            "google": GoogleTranslateService,
            # Add other translation services here
        }
        service_class = services.get(service_type.lower())
        if service_class:
            return service_class(**kwargs)
        else:
            raise ValueError(f"Unknown translation service type: {service_type}")
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@ -0,0 +1,39 @@
 # utils/file_utils.py
 """
 File handling utilities
 """
 import os
 import tkinter as tk
 from tkinter import filedialog
 import pandas as pd
 def select_file(title="Select file", filetypes=None):
    if filetypes is None:
        filetypes = [
            ("Excel files", "*.xlsx;*.xls"),
            ("All files", "*.*")
        ]
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(
        title=title,
        filetypes=filetypes
    )
    return file_path if file_path else None
 def select_directory(title="Select directory"):
    root = tk.Tk()
    root.withdraw()
    dir_path = filedialog.askdirectory(title=title)
    return dir_path if dir_path else None
 def safe_read_excel(file_path, **kwargs):
    try:
        return pd.read_excel(file_path, **kwargs)
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return None
--- a/utils/logger_utils.py
+++ b/utils/logger_utils.py
@ -0,0 +1,31 @@
 # utils/logger_utils.py
 """
 Logging configuration and utilities
 """
 import logging
 import os
 from config.settings import LOG_DIR
 def setup_logger(name, log_file=None):
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(logging.INFO)
        if log_file:
            os.makedirs(LOG_DIR, exist_ok=True)
            file_handler = logging.FileHandler(
                os.path.join(LOG_DIR, log_file)
            )
            file_handler.setFormatter(
                logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            )
            logger.addHandler(file_handler)
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(
            logging.Formatter('%(levelname)s: %(message)s')
        )
        logger.addHandler(console_handler)
    return logger
--- a/utils/logging_manager.py
+++ b/utils/logging_manager.py
@ -0,0 +1,64 @@
 # utils/logging_manager.py
 import logging
 import os
 from typing import Optional
 from datetime import datetime
 import tkinter as tk
 from queue import Queue, Empty
 from .output_redirector import OutputRedirector
 class LoggingManager:
    def __init__(self, work_dir: str):
        self.work_dir = work_dir
        self.log_dir = os.path.join(work_dir, "logs")
        os.makedirs(self.log_dir, exist_ok=True)
        self.logger = self._setup_logger()
        self.queue: Optional[Queue] = None
        self.text_widget: Optional[tk.Text] = None
    def _setup_logger(self) -> logging.Logger:
        logger = logging.getLogger("app_logger")
        logger.setLevel(logging.INFO)
        # File handler
        log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log")
        file_handler = logging.FileHandler(log_file, encoding="utf-8")
        file_handler.setLevel(logging.INFO)
        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)
        return logger
    def setup_gui_logging(self, text_widget: tk.Text, queue: Queue):
        """Setup logging to GUI text widget"""
        self.text_widget = text_widget
        self.queue = queue
        # Add handler for GUI logging
        gui_handler = logging.StreamHandler(OutputRedirector(queue))
        gui_handler.setLevel(logging.INFO)
        formatter = logging.Formatter("%(message)s")
        gui_handler.setFormatter(formatter)
        self.logger.addHandler(gui_handler)
    def process_log_queue(self):
        """Process pending log messages"""
        if self.queue and self.text_widget:
            while True:
                try:
                    message = self.queue.get_nowait()
                    self.text_widget.insert(tk.END, message)
                    self.text_widget.see(tk.END)
                    self.text_widget.update_idletasks()
                except Empty:
                    break
    def clear_output(self):
        """Clear the text widget"""
        if self.text_widget:
            self.text_widget.delete("1.0", tk.END)
            self.text_widget.update_idletasks()
--- a/utils/output_redirector.py
+++ b/utils/output_redirector.py
@ -0,0 +1,17 @@
 # utils/output_redirector.py
 """
 Output redirector for capturing stdout/stderr
 """
 import sys
 from queue import Queue
 from typing import Optional
 class OutputRedirector:
    def __init__(self, queue: Queue):
        self.queue = queue
    def write(self, string: str):
        self.queue.put(string)
    def flush(self):
        pass
--- a/utils/progress_bar.py
+++ b/utils/progress_bar.py
@ -0,0 +1,51 @@
 # utils/progress_bar.py
 """
 Progress bar implementation
 """
 import tkinter as tk
 from tkinter import ttk
 from typing import Optional, Callable
 import sys
 from queue import Queue
 import threading
 class ProgressBar:
    def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30):
        self.total = total
        self.prefix = prefix
        self.suffix = suffix
        self.max_points = max_points
        self.current = 0
        self.last_points = 0
        self.output_callback: Optional[Callable] = None
    def set_output_callback(self, callback: Callable[[str], None]):
        """Set callback function for output"""
        self.output_callback = callback
    def update(self, current: int):
        self.current = current
        points = min(int((current / self.total) * self.max_points), self.max_points)
        if points > self.last_points:
            new_points = points - self.last_points
            self._write_output("." * new_points)
            self.last_points = points
    def increment(self):
        self.update(self.current + 1)
    def finish(self):
        remaining_points = self.max_points - self.last_points
        if remaining_points > 0:
            self._write_output("." * remaining_points)
        self._write_output(f"] {self.suffix}\n")
    def start(self):
        self._write_output(f"\r{self.prefix} [")
    def _write_output(self, text: str):
        if self.output_callback:
            self.output_callback(text)
        else:
            print(text, end="", flush=True)
--- a/utils/script_registry.py
+++ b/utils/script_registry.py
@ -0,0 +1,68 @@
 # utils/script_registry.py
 from typing import Dict, Callable, List, Optional
 import importlib
 import inspect
 import os
 from pathlib import Path
 from config.profile_manager import Profile, ProfileManager
 class ScriptRegistry:
    """Registry for script operations"""
    def __init__(self):
        self.operations: Dict[str, Callable] = {}
        self.descriptions: Dict[str, str] = {}
    def register(self, name: str, operation: Callable, description: str = ""):
        """Register a new operation"""
        self.operations[name] = operation
        self.descriptions[name] = description
    def auto_discover(self, scripts_dir: str = "scripts"):
        """Auto-discover scripts in the scripts directory"""
        scripts_path = Path(__file__).parent.parent / scripts_dir
        for file in scripts_path.glob("script_*.py"):
            module_name = f"{scripts_dir}.{file.stem}"
            try:
                module = importlib.import_module(module_name)
                # Look for main function and docstring
                if hasattr(module, 'main'):
                    name = file.stem.replace('script_', '')
                    description = module.__doc__ or ""
                    self.register(name, module.main, description)
            except Exception as e:
                print(f"Error loading script {file}: {e}")
    def get_operations(self) -> List[tuple]:
        """Get list of available operations"""
        return [(name, self.descriptions[name]) for name in self.operations]
    def run_operation(self, name: str, profile: Optional[Profile] = None, **kwargs):
        """
        Run a registered operation
        Args:
            name: Name of the operation to run
            profile: Current profile instance (optional)
            **kwargs: Additional arguments for the operation
        """
        if name in self.operations:
            # Prepare arguments
            operation = self.operations[name]
            sig = inspect.signature(operation)
            # Check if operation accepts profile parameter
            call_args = {}
            if 'profile' in sig.parameters:
                call_args['profile'] = profile
            # Add other kwargs that match the signature
            for param_name in sig.parameters:
                if param_name in kwargs:
                    call_args[param_name] = kwargs[param_name]
            return operation(**call_args)
        raise ValueError(f"Unknown operation: {name}")