Agregado de librerias

2025-02-09 13:22:25 +01:00 · 2025-02-09 13:22:25 +01:00 · 6fca251249
parent caa983c8da
commit 6fca251249
22 changed files with 1313 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,27 @@
+# Python cache files
+__pycache__/
+*.py[cod]
+
+# Environment directories
+venv/
+env/
+.env/
+
+# IDE configurations
+.vscode/
+.idea/
+
+# Logs and data files
+data/log.txt
+data/data.json
+
+# Allow script groups and their configurations
+!backend/script_groups/
+!backend/script_groups/*/
+!backend/script_groups/*/*.py
+!backend/script_groups/*/schema.json
+!backend/script_groups/*/esquema.json
+!backend/script_groups/*/description.json
+
+# But ignore working directory configurations
+backend/script_groups/*/work_dir.json
--- a/commands.sh
+++ b/commands.sh
@ -0,0 +1,3 @@
+# Crear .gitkeep en cada directorio de script grupo
+mkdir -p backend/script_groups/example_group
+touch backend/script_groups/example_group/.gitkeep
--- a/services/excel/excel_service.py
+++ b/services/excel/excel_service.py
@ -0,0 +1,241 @@
+# services/excel/excel_service.py
+"""
+Excel file handling service with retry and formatting capabilities
+"""
+import pandas as pd
+import time
+from typing import Optional, Union, Dict, Any
+from pathlib import Path
+import openpyxl
+from openpyxl.utils import get_column_letter
+from openpyxl.styles import PatternFill, Alignment, Font
+from openpyxl.worksheet.worksheet import Worksheet
+
+class ExcelService:
+    """Service for handling Excel files with advanced features"""
+    
+    def __init__(self, max_retries: int = 5, retry_delay: int = 5):
+        self.max_retries = max_retries
+        self.retry_delay = retry_delay
+
+    def read_excel(
+        self,
+        file_path: Union[str, Path],
+        sheet_name: str = "Sheet1",
+        **kwargs
+    ) -> pd.DataFrame:
+        """
+        Read Excel file with retries and cleanup
+        
+        Args:
+            file_path: Path to Excel file
+            sheet_name: Name of sheet to read
+            **kwargs: Additional arguments for pd.read_excel
+            
+        Returns:
+            DataFrame with the Excel content
+        """
+        retries = 0
+        while retries < self.max_retries:
+            try:
+                # Intentar leer el archivo con openpyxl
+                df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs)
+
+                # Limpiar caracteres especiales y normalizar saltos de línea
+                for col in df.columns:
+                    df[col] = df[col].apply(
+                        lambda x: self._clean_special_chars(x) if pd.notna(x) else x
+                    )
+
+                print(f"Archivo leído y limpiado exitosamente: {file_path}")
+                return df
+
+            except ValueError as ve:
+                if "must be either numerical or a string containing a wildcard" in str(ve):
+                    print(f"Error al leer el archivo: {ve}")
+                    print("Intentando eliminar filtros y leer el archivo nuevamente...")
+                    try:
+                        # Cargar el libro de trabajo
+                        wb = openpyxl.load_workbook(filename=file_path)
+                        sheet = wb.active
+
+                        # Eliminar filtros si existen
+                        if sheet.auto_filter:
+                            sheet.auto_filter.ref = None
+
+                        # Guardar el archivo temporalmente sin filtros
+                        temp_file = str(file_path) + "_temp.xlsx"
+                        wb.save(temp_file)
+
+                        # Leer el archivo temporal
+                        df = pd.read_excel(temp_file, engine="openpyxl", **kwargs)
+
+                        # Eliminar el archivo temporal
+                        Path(temp_file).unlink()
+
+                        return df
+                    except Exception as e:
+                        print(f"Error al intentar eliminar filtros y leer el archivo: {e}")
+                else:
+                    print(f"Error de valor: {ve}")
+
+            except PermissionError as e:
+                print(
+                    f"Error de permiso: {e}. Por favor cierre el archivo. "
+                    f"Reintentando en {self.retry_delay} segundos..."
+                )
+            except Exception as e:
+                print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
+
+            retries += 1
+            time.sleep(self.retry_delay)
+
+        raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.")
+
+    def save_excel(
+        self,
+        df: pd.DataFrame,
+        file_path: Union[str, Path],
+        sheet_name: str = "Sheet1",
+        format_options: Optional[Dict[str, Any]] = None,
+        **kwargs
+    ) -> None:
+        """
+        Save DataFrame to Excel with formatting
+        
+        Args:
+            df: DataFrame to save
+            file_path: Path to save Excel file
+            sheet_name: Name of sheet
+            format_options: Dictionary with formatting options
+            **kwargs: Additional arguments for pd.to_excel
+        """
+        if format_options is None:
+            format_options = {}
+
+        retries = 0
+        while retries < self.max_retries:
+            try:
+                with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
+                    # Save DataFrame
+                    df.to_excel(writer, sheet_name=sheet_name, index=False)
+                    
+                    # Apply formatting
+                    self._format_worksheet(
+                        writer.sheets[sheet_name],
+                        format_options
+                    )
+                
+                print(f"Archivo guardado exitosamente en: {file_path}")
+                return
+
+            except PermissionError as e:
+                print(
+                    f"Error de permiso: {e}. Por favor cierre el archivo. "
+                    f"Reintentando en {self.retry_delay} segundos..."
+                )
+            except Exception as e:
+                print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
+
+            retries += 1
+            time.sleep(self.retry_delay)
+
+        raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.")
+
+    def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None:
+        """
+        Apply formatting to worksheet
+        
+        Args:
+            worksheet: Worksheet to format
+            options: Formatting options
+        """
+        # Freeze panes if specified
+        freeze_row = options.get('freeze_row', 2)
+        freeze_col = options.get('freeze_col', 1)
+        if freeze_row or freeze_col:
+            freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}"
+            worksheet.freeze_panes = freeze_cell
+
+        # Auto-adjust column widths
+        max_width = options.get('max_column_width', 50)
+        min_width = options.get('min_column_width', 8)
+        wrap_threshold = options.get('wrap_threshold', 50)
+
+        for col in worksheet.columns:
+            max_length = 0
+            column = col[0].column_letter
+
+            for cell in col:
+                try:
+                    if cell.value:
+                        text_length = len(str(cell.value))
+                        if text_length > wrap_threshold:
+                            cell.alignment = Alignment(wrap_text=True, vertical='top')
+                            text_length = min(
+                                wrap_threshold,
+                                max(len(word) for word in str(cell.value).split())
+                            )
+                        max_length = max(max_length, text_length)
+                except:
+                    pass
+
+            adjusted_width = min(max_width, max(min_width, max_length + 2))
+            worksheet.column_dimensions[column].width = adjusted_width
+
+        # Apply custom styles
+        header_row = options.get('header_row', 1)
+        if header_row:
+            header_fill = PatternFill(
+                start_color=options.get('header_color', 'F2F2F2'),
+                end_color=options.get('header_color', 'F2F2F2'),
+                fill_type='solid'
+            )
+            header_font = Font(bold=True)
+            
+            for cell in worksheet[header_row]:
+                cell.fill = header_fill
+                cell.font = header_font
+
+    def _clean_special_chars(self, text: Any) -> Any:
+        """Clean special characters and normalize line breaks"""
+        if isinstance(text, str):
+            # Normalize line breaks
+            text = text.replace('\r\n', '\n').replace('\r', '\n')
+            # Replace other special characters if needed
+            return text
+        return text
+
+# Example usage:
+"""
+from services.excel.excel_service import ExcelService
+
+# Create service
+excel_service = ExcelService()
+
+# Read Excel file
+try:
+    df = excel_service.read_excel("input.xlsx")
+    print("Data loaded successfully")
+    
+    # Modify data...
+    
+    # Save with formatting
+    format_options = {
+        'freeze_row': 2,
+        'freeze_col': 1,
+        'max_column_width': 50,
+        'min_column_width': 8,
+        'wrap_threshold': 50,
+        'header_color': 'E6E6E6'
+    }
+    
+    excel_service.save_excel(
+        df,
+        "output.xlsx",
+        format_options=format_options
+    )
+    
+except Exception as e:
+    print(f"Error handling Excel file: {e}")
+"""
--- a/services/language/base.py
+++ b/services/language/base.py
@ -0,0 +1,25 @@
+# services/language/base.py
+"""
+Base class for language detection services
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, List, Dict, Tuple
+
+class LanguageDetectionService(ABC):
+    """Abstract base class for language detection services"""
+    
+    @abstractmethod
+    def detect_language(self, text: str) -> Tuple[str, float]:
+        """
+        Detect language of a text
+        Returns: Tuple of (language_code, confidence_score)
+        """
+        pass
+    
+    @abstractmethod
+    def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
+        """
+        Detect language of multiple texts
+        Returns: List of tuples (language_code, confidence_score)
+        """
+        pass
--- a/services/language/langid_service.py
+++ b/services/language/langid_service.py
@ -0,0 +1,52 @@
+# services/language/langid_service.py
+"""
+Language detection service using langid
+"""
+from typing import List, Tuple, Optional, Set
+import langid
+from .base import LanguageDetectionService
+
+class LangIdService(LanguageDetectionService):
+    def __init__(self, allowed_languages: Optional[Set[str]] = None):
+        """
+        Initialize langid service
+        
+        Args:
+            allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'})
+                             If None, all languages supported by langid will be allowed
+        """
+        if allowed_languages:
+            langid.set_languages(list(allowed_languages))
+        self.allowed_languages = allowed_languages
+
+    def detect_language(self, text: str) -> Tuple[str, float]:
+        """
+        Detect language of a text using langid
+        
+        Args:
+            text: Text to analyze
+            
+        Returns:
+            Tuple of (language_code, confidence_score)
+        """
+        try:
+            if not text or len(text.strip()) < 3:
+                return ("unknown", 0.0)
+            
+            lang, score = langid.classify(text.strip())
+            return (lang, score)
+        except Exception as e:
+            print(f"Error in language detection: {e}")
+            return ("unknown", 0.0)
+
+    def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
+        """
+        Detect language of multiple texts
+        
+        Args:
+            texts: List of texts to analyze
+            
+        Returns:
+            List of tuples (language_code, confidence_score)
+        """
+        return [self.detect_language(text) for text in texts]
--- a/services/language/language_factory.py
+++ b/services/language/language_factory.py
@ -0,0 +1,33 @@
+# services/language/language_factory.py
+"""
+Factory class for creating language detection services
+"""
+from typing import Optional, Set
+from .langid_service import LangIdService
+
+class LanguageFactory:
+    """Factory class for creating language detection service instances"""
+    
+    @staticmethod
+    def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']:
+        """
+        Create an instance of the specified language detection service
+        
+        Args:
+            service_type: Type of language detection service ("langid", etc.)
+            allowed_languages: Set of allowed language codes
+            **kwargs: Additional arguments for service initialization
+            
+        Returns:
+            LanguageDetectionService instance or None if service_type is not recognized
+        """
+        services = {
+            "langid": LangIdService,
+            # Add other language detection services here
+        }
+        
+        service_class = services.get(service_type.lower())
+        if service_class:
+            return service_class(allowed_languages=allowed_languages, **kwargs)
+        else:
+            raise ValueError(f"Unknown language detection service type: {service_type}")
--- a/services/language/language_utils.py
+++ b/services/language/language_utils.py
@ -0,0 +1,68 @@
+# services/language/language_utils.py
+"""
+Utility functions for language detection and validation
+"""
+from typing import Dict, Set
+
+class LanguageUtils:
+    # Common language codes
+    LANGUAGE_CODES = {
+        'it': ('Italian', 'it-IT'),
+        'en': ('English', 'en-GB'),
+        'pt': ('Portuguese', 'pt-PT'),
+        'es': ('Spanish', 'es-ES'),
+        'ru': ('Russian', 'ru-RU'),
+        'fr': ('French', 'fr-FR'),
+        'de': ('German', 'de-DE'),
+        'tr': ('Turkish', 'tr-TR'),
+    }
+
+    @classmethod
+    def get_language_name(cls, code: str) -> str:
+        """Get full language name from code"""
+        return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0]
+
+    @classmethod
+    def get_full_code(cls, short_code: str) -> str:
+        """Get full language code (e.g., 'en-GB' from 'en')"""
+        return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1]
+
+    @classmethod
+    def get_short_code(cls, full_code: str) -> str:
+        """Get short language code (e.g., 'en' from 'en-GB')"""
+        return full_code.split('-')[0] if '-' in full_code else full_code
+
+    @classmethod
+    def is_valid_language(cls, code: str) -> bool:
+        """Check if a language code is valid"""
+        short_code = cls.get_short_code(code)
+        return short_code in cls.LANGUAGE_CODES
+
+    @classmethod
+    def get_available_languages(cls) -> Set[str]:
+        """Get set of available language codes"""
+        return set(cls.LANGUAGE_CODES.keys())
+
+# Example usage:
+"""
+from services.language.language_factory import LanguageFactory
+from services.language.language_utils import LanguageUtils
+
+# Create language detection service with specific languages
+allowed_languages = LanguageUtils.get_available_languages()
+detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages)
+
+# Detect language of a text
+text = "Hello, how are you?"
+lang, confidence = detector.detect_language(text)
+print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})")
+print(f"Confidence: {confidence}")
+
+# Detect language of multiple texts
+texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"]
+results = detector.detect_batch(texts)
+for text, (lang, confidence) in zip(texts, results):
+    print(f"Text: {text}")
+    print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})")
+    print(f"Confidence: {confidence}")
+"""
--- a/services/llm/base.py
+++ b/services/llm/base.py
@ -0,0 +1,20 @@
+# services/llm/base.py
+"""
+Base class for LLM services
+"""
+from abc import ABC, abstractmethod
+import json
+from typing import List, Union, Dict, Any
+
+class LLMService(ABC):
+    """Abstract base class for LLM services"""
+    
+    @abstractmethod
+    def generate_text(self, prompt: str) -> str:
+        """Generate text based on a prompt"""
+        pass
+    
+    @abstractmethod
+    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
+        """Calculate similarity scores for pairs of texts"""
+        pass
--- a/services/llm/batch_processor.py
+++ b/services/llm/batch_processor.py
@ -0,0 +1,228 @@
+# services/llm/batch_processor.py
+"""
+Batch processing service for LLM operations
+"""
+from typing import List, Dict, Any, Optional, Callable
+import json
+from dataclasses import dataclass
+import time
+from .base import LLMService
+from utils.progress_bar import ProgressBar
+
+@dataclass
+class BatchConfig:
+    """Configuration for batch processing"""
+    batch_size: int = 20
+    max_retries: int = 3
+    retry_delay: int = 3
+    progress_callback: Optional[Callable[[str], None]] = None
+
+class BatchProcessor:
+    """
+    Handles batch processing for LLM operations
+    """
+    def __init__(
+        self,
+        llm_service: LLMService,
+        config: Optional[BatchConfig] = None
+    ):
+        self.llm_service = llm_service
+        self.config = config or BatchConfig()
+
+    def process_batch(
+        self,
+        items: List[Dict[str, Any]],
+        system_prompt: str,
+        template: str,
+        output_processor: Optional[Callable] = None
+    ) -> List[Any]:
+        """
+        Process items in batches with consistent context
+        
+        Args:
+            items: List of dictionaries containing data to process
+            system_prompt: System prompt for context
+            template: Template string for formatting requests
+            output_processor: Optional function to process LLM responses
+            
+        Returns:
+            List of processed results
+        """
+        results = []
+        total_items = len(items)
+        
+        # Setup progress tracking
+        progress = ProgressBar(
+            total_items,
+            "Processing batches:",
+            "Complete"
+        )
+        if self.config.progress_callback:
+            progress.set_output_callback(self.config.progress_callback)
+        progress.start()
+        
+        # Process in batches
+        for start_idx in range(0, total_items, self.config.batch_size):
+            end_idx = min(start_idx + self.config.batch_size, total_items)
+            batch_items = items[start_idx:end_idx]
+            
+            # Prepare batch request
+            batch_data = {
+                "items": batch_items,
+                "template": template
+            }
+            request_payload = json.dumps(batch_data)
+            
+            # Process batch with retries
+            for attempt in range(self.config.max_retries):
+                try:
+                    response = self.llm_service.generate_text(
+                        system_prompt=system_prompt,
+                        user_prompt=request_payload
+                    )
+                    
+                    # Parse and process response
+                    batch_results = self._process_response(
+                        response,
+                        output_processor
+                    )
+                    
+                    if len(batch_results) != len(batch_items):
+                        raise ValueError(
+                            "Response count doesn't match input count"
+                        )
+                    
+                    results.extend(batch_results)
+                    break
+                    
+                except Exception as e:
+                    if attempt < self.config.max_retries - 1:
+                        if self.config.progress_callback:
+                            self.config.progress_callback(
+                                f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."
+                            )
+                        time.sleep(self.config.retry_delay)
+                    else:
+                        if self.config.progress_callback:
+                            self.config.progress_callback(
+                                f"Error in batch {start_idx}-{end_idx}: {e}"
+                            )
+                        # On final retry failure, add None results
+                        results.extend([None] * len(batch_items))
+            
+            # Update progress
+            progress.update(end_idx)
+        
+        progress.finish()
+        return results
+
+    def _process_response(
+        self,
+        response: str,
+        output_processor: Optional[Callable] = None
+    ) -> List[Any]:
+        """Process LLM response"""
+        try:
+            # Parse JSON response
+            parsed = json.loads(response)
+            
+            # Apply custom processing if provided
+            if output_processor:
+                return [output_processor(item) for item in parsed]
+            return parsed
+            
+        except json.JSONDecodeError:
+            raise ValueError("Failed to parse LLM response as JSON")
+
+# Example specialized batch processor for translations
+class TranslationBatchProcessor(BatchProcessor):
+    """Specialized batch processor for translations"""
+    
+    def translate_batch(
+        self,
+        texts: List[str],
+        source_lang: str,
+        target_lang: str
+    ) -> List[str]:
+        """
+        Translate a batch of texts
+        
+        Args:
+            texts: List of texts to translate
+            source_lang: Source language code
+            target_lang: Target language code
+            
+        Returns:
+            List of translated texts
+        """
+        # Prepare items
+        items = [{"text": text} for text in texts]
+        
+        # Setup prompts
+        system_prompt = (
+            "You are a translator. Translate the provided texts "
+            "maintaining special fields like <> and <#>."
+        )
+        
+        template = (
+            "Translate the following texts from {source_lang} to {target_lang}. "
+            "Return translations as a JSON array of strings:"
+            "\n\n{text}"
+        )
+        
+        # Process batch
+        results = self.process_batch(
+            items=items,
+            system_prompt=system_prompt,
+            template=template.format(
+                source_lang=source_lang,
+                target_lang=target_lang
+            )
+        )
+        
+        return results
+
+# Example usage:
+"""
+from services.llm.llm_factory import LLMFactory
+from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor
+
+# Create LLM service
+llm_service = LLMFactory.create_service("openai")
+
+# Setup batch processor with progress callback
+def progress_callback(message: str):
+    print(message)
+
+config = BatchConfig(
+    batch_size=20,
+    progress_callback=progress_callback
+)
+
+# General batch processor
+processor = BatchProcessor(llm_service, config)
+
+# Example batch process for custom task
+items = [
+    {"text": "Hello", "context": "greeting"},
+    {"text": "Goodbye", "context": "farewell"}
+]
+
+system_prompt = "You are a helpful assistant."
+template = "Process these items considering their context: {items}"
+
+results = processor.process_batch(
+    items=items,
+    system_prompt=system_prompt,
+    template=template
+)
+
+# Example translation batch
+translator = TranslationBatchProcessor(llm_service, config)
+texts = ["Hello world", "How are you?"]
+translations = translator.translate_batch(
+    texts=texts,
+    source_lang="en",
+    target_lang="es"
+)
+"""
--- a/services/llm/grok_service.py
+++ b/services/llm/grok_service.py
@ -0,0 +1,63 @@
+# services/llm/grok_service.py
+"""
+Grok service implementation
+"""
+from typing import Dict, List, Optional
+import json
+from .base import LLMService
+from config.api_keys import APIKeyManager
+
+class GrokService(LLMService):
+    def __init__(self, model: str = "grok-1", temperature: float = 0.3):
+        api_key = APIKeyManager.get_grok_key()
+        if not api_key:
+            raise ValueError("Grok API key not found. Please set up your API keys.")
+            
+        self.api_key = api_key
+        self.model = model
+        self.temperature = temperature
+
+    def generate_text(self, prompt: str) -> str:
+        """
+        Generate text using the Grok API
+        TODO: Update this method when Grok API is available
+        """
+        try:
+            # Placeholder for Grok API implementation
+            # Update this when the API is released
+            raise NotImplementedError("Grok API is not implemented yet")
+            
+        except Exception as e:
+            print(f"Error in Grok API call: {e}")
+            return None
+
+    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
+        """
+        Calculate similarity scores using the Grok API
+        TODO: Update this method when Grok API is available
+        """
+        try:
+            system_prompt = (
+                "Evaluate the semantic similarity between the following table of pairs of texts "
+                "in json format on a scale from 0 to 1. Return the similarity scores for every "
+                "row in JSON format as a list of numbers, without any additional text or formatting."
+            )
+            
+            request_payload = json.dumps(texts_pairs)
+            
+            # Placeholder for Grok API implementation
+            # Update this when the API is released
+            raise NotImplementedError("Grok API is not implemented yet")
+            
+        except Exception as e:
+            print(f"Error in Grok similarity calculation: {e}")
+            return None
+
+# Update config/api_keys.py to include Grok
+@classmethod
+def get_grok_key(cls) -> Optional[str]:
+    """Get Grok API key from environment or stored configuration"""
+    return (
+        os.getenv('GROK_API_KEY') or 
+        cls._get_stored_key('grok')
+    )
--- a/services/llm/llm_factory.py
+++ b/services/llm/llm_factory.py
@ -0,0 +1,33 @@
+# services/llm/llm_factory.py
+"""
+Factory class for creating LLM services
+"""
+from typing import Optional
+from .openai_service import OpenAIService
+from .ollama_service import OllamaService
+from .grok_service import GrokService
+
+class LLMFactory:
+    """Factory class for creating LLM service instances"""
+    
+    @staticmethod
+    def create_service(service_type: str, **kwargs) -> Optional['LLMService']:
+        """
+        Create an instance of the specified LLM service
+        
+        Args:
+            service_type: Type of LLM service ("openai", "ollama", "grok")
+            **kwargs: Additional arguments for service initialization
+        """
+        services = {
+            "openai": OpenAIService,
+            "ollama": OllamaService,
+            "grok": GrokService
+        }
+        
+        service_class = services.get(service_type.lower())
+        if service_class:
+            return service_class(**kwargs)
+        else:
+            print(f"Unknown service type: {service_type}")
+            return None
--- a/services/llm/ollama_service.py
+++ b/services/llm/ollama_service.py
@ -0,0 +1,53 @@
+# services/llm/ollama_service.py
+"""
+Ollama service implementation
+"""
+import ollama
+import json
+from typing import Dict, List
+from .base import LLMService
+
+class OllamaService(LLMService):
+    def __init__(self, model: str = "llama3.1"):
+        self.model = model
+
+    def generate_text(self, prompt: str) -> str:
+        try:
+            response = ollama.generate(
+                model=self.model,
+                prompt=prompt
+            )
+            return response["response"]
+        except Exception as e:
+            print(f"Error in Ollama API call: {e}")
+            return None
+
+    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
+        system_prompt = (
+            "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
+            "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
+        )
+        
+        request_payload = json.dumps(texts_pairs)
+        prompt = f"{system_prompt}\n\n{request_payload}"
+        
+        try:
+            response = ollama.generate(
+                model=self.model,
+                prompt=prompt
+            )
+            
+            try:
+                scores = json.loads(response["response"].strip())
+                if isinstance(scores, dict) and "similarity_scores" in scores:
+                    return scores["similarity_scores"]
+                elif isinstance(scores, list):
+                    return scores
+                else:
+                    raise ValueError("Unexpected response format")
+            except json.JSONDecodeError:
+                raise ValueError("Could not decode response as JSON")
+                
+        except Exception as e:
+            print(f"Error in Ollama similarity calculation: {e}")
+            return None
--- a/services/llm/openai_service.py
+++ b/services/llm/openai_service.py
@ -0,0 +1,69 @@
+# services/llm/openai_service.py
+"""
+OpenAI service implementation
+"""
+from openai import OpenAI
+from typing import Dict, List
+import json
+from .base import LLMService
+from config.api_keys import APIKeyManager
+
+class OpenAIService(LLMService):
+    def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3):
+        api_key = APIKeyManager.get_openai_key()
+        if not api_key:
+            raise ValueError("OpenAI API key not found. Please set up your API keys.")
+            
+        self.client = OpenAI(api_key=api_key)
+        self.model = model
+        self.temperature = temperature
+
+    def generate_text(self, prompt: str) -> str:
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.temperature,
+                max_tokens=1500
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error in OpenAI API call: {e}")
+            return None
+
+    def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
+        system_prompt = (
+            "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
+            "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
+        )
+        
+        request_payload = json.dumps(texts_pairs)
+        
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": request_payload}
+                ],
+                temperature=self.temperature,
+                max_tokens=1500
+            )
+            
+            response_content = response.choices[0].message.content
+            cleaned_response = response_content.strip().strip("'```json").strip("```")
+            
+            try:
+                scores = json.loads(cleaned_response)
+                if isinstance(scores, dict) and "similarity_scores" in scores:
+                    return scores["similarity_scores"]
+                elif isinstance(scores, list):
+                    return scores
+                else:
+                    raise ValueError("Unexpected response format")
+            except json.JSONDecodeError:
+                raise ValueError("Could not decode response as JSON")
+                
+        except Exception as e:
+            print(f"Error in OpenAI similarity calculation: {e}")
+            return None
--- a/services/translation/base.py
+++ b/services/translation/base.py
@ -0,0 +1,19 @@
+# services/translation/base.py
+"""
+Base class for translation services
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, List, Dict
+
+class TranslationService(ABC):
+    """Abstract base class for translation services"""
+    
+    @abstractmethod
+    def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
+        """Translate a single text"""
+        pass
+    
+    @abstractmethod
+    def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
+        """Translate a batch of texts"""
+        pass
--- a/services/translation/google_translate.py
+++ b/services/translation/google_translate.py
@ -0,0 +1,77 @@
+# services/translation/google_translate.py
+"""
+Google Translation service implementation
+"""
+from typing import Optional, List
+import html
+from google.cloud import translate_v2 as translate
+from google.oauth2 import service_account
+from config.api_keys import APIKeyManager
+from .base import TranslationService
+
+class GoogleTranslateService(TranslationService):
+    def __init__(self, credentials_file: Optional[str] = None):
+        """
+        Initialize Google Translate service
+        
+        Args:
+            credentials_file: Path to Google service account credentials JSON file.
+                            If None, will use API key from APIKeyManager.
+        """
+        if credentials_file:
+            # Use service account credentials
+            try:
+                credentials = service_account.Credentials.from_service_account_file(credentials_file)
+                self.client = translate.Client(credentials=credentials)
+            except Exception as e:
+                raise ValueError(f"Error initializing Google Translate with credentials: {e}")
+        else:
+            # Use API key
+            api_key = APIKeyManager.get_google_key()
+            if not api_key:
+                raise ValueError("Google API key not found. Please set up your API keys.")
+            self.client = translate.Client()
+
+    def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
+        """
+        Translate a single text.
+        
+        Args:
+            text: Text to translate
+            target_language: Target language code (e.g., 'es' for Spanish)
+            source_language: Source language code. If None, will be auto-detected.
+            
+        Returns:
+            Translated text
+        """
+        try:
+            result = self.client.translate(
+                text,
+                target_language=target_language,
+                source_language=source_language
+            )
+            return html.unescape(result["translatedText"])
+        except Exception as e:
+            raise ValueError(f"Error in Google Translate: {e}")
+
+    def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
+        """
+        Translate multiple texts in batch.
+        
+        Args:
+            texts: List of texts to translate
+            target_language: Target language code (e.g., 'es' for Spanish)
+            source_language: Source language code. If None, will be auto-detected.
+            
+        Returns:
+            List of translated texts
+        """
+        try:
+            results = self.client.translate(
+                texts,
+                target_language=target_language,
+                source_language=source_language
+            )
+            return [html.unescape(result["translatedText"]) for result in results]
+        except Exception as e:
+            raise ValueError(f"Error in Google Translate batch: {e}")
--- a/services/translation/translation_factory.py
+++ b/services/translation/translation_factory.py
@ -0,0 +1,32 @@
+# services/translation/translation_factory.py
+"""
+Factory class for creating translation services
+"""
+from typing import Optional
+from .google_translate import GoogleTranslateService
+
+class TranslationFactory:
+    """Factory class for creating translation service instances"""
+    
+    @staticmethod
+    def create_service(service_type: str, **kwargs) -> Optional['TranslationService']:
+        """
+        Create an instance of the specified translation service
+        
+        Args:
+            service_type: Type of translation service ("google", etc.)
+            **kwargs: Additional arguments for service initialization
+            
+        Returns:
+            TranslationService instance or None if service_type is not recognized
+        """
+        services = {
+            "google": GoogleTranslateService,
+            # Add other translation services here
+        }
+        
+        service_class = services.get(service_type.lower())
+        if service_class:
+            return service_class(**kwargs)
+        else:
+            raise ValueError(f"Unknown translation service type: {service_type}")
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@ -0,0 +1,39 @@
+# utils/file_utils.py
+"""
+File handling utilities
+"""
+import os
+import tkinter as tk
+from tkinter import filedialog
+import pandas as pd
+
+def select_file(title="Select file", filetypes=None):
+    if filetypes is None:
+        filetypes = [
+            ("Excel files", "*.xlsx;*.xls"),
+            ("All files", "*.*")
+        ]
+    
+    root = tk.Tk()
+    root.withdraw()
+    
+    file_path = filedialog.askopenfilename(
+        title=title,
+        filetypes=filetypes
+    )
+    
+    return file_path if file_path else None
+
+def select_directory(title="Select directory"):
+    root = tk.Tk()
+    root.withdraw()
+    
+    dir_path = filedialog.askdirectory(title=title)
+    return dir_path if dir_path else None
+
+def safe_read_excel(file_path, **kwargs):
+    try:
+        return pd.read_excel(file_path, **kwargs)
+    except Exception as e:
+        print(f"Error reading Excel file: {e}")
+        return None
--- a/utils/logger_utils.py
+++ b/utils/logger_utils.py
@ -0,0 +1,31 @@
+# utils/logger_utils.py
+"""
+Logging configuration and utilities
+"""
+import logging
+import os
+from config.settings import LOG_DIR
+
+def setup_logger(name, log_file=None):
+    logger = logging.getLogger(name)
+    
+    if not logger.handlers:
+        logger.setLevel(logging.INFO)
+        
+        if log_file:
+            os.makedirs(LOG_DIR, exist_ok=True)
+            file_handler = logging.FileHandler(
+                os.path.join(LOG_DIR, log_file)
+            )
+            file_handler.setFormatter(
+                logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+            )
+            logger.addHandler(file_handler)
+        
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(
+            logging.Formatter('%(levelname)s: %(message)s')
+        )
+        logger.addHandler(console_handler)
+    
+    return logger
--- a/utils/logging_manager.py
+++ b/utils/logging_manager.py
@ -0,0 +1,64 @@
+# utils/logging_manager.py
+import logging
+import os
+from typing import Optional
+from datetime import datetime
+import tkinter as tk
+from queue import Queue, Empty
+from .output_redirector import OutputRedirector
+
+class LoggingManager:
+    def __init__(self, work_dir: str):
+        self.work_dir = work_dir
+        self.log_dir = os.path.join(work_dir, "logs")
+        os.makedirs(self.log_dir, exist_ok=True)
+        
+        self.logger = self._setup_logger()
+        self.queue: Optional[Queue] = None
+        self.text_widget: Optional[tk.Text] = None
+        
+    def _setup_logger(self) -> logging.Logger:
+        logger = logging.getLogger("app_logger")
+        logger.setLevel(logging.INFO)
+        
+        # File handler
+        log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log")
+        file_handler = logging.FileHandler(log_file, encoding="utf-8")
+        file_handler.setLevel(logging.INFO)
+        
+        formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+        file_handler.setFormatter(formatter)
+        
+        logger.addHandler(file_handler)
+        return logger
+        
+    def setup_gui_logging(self, text_widget: tk.Text, queue: Queue):
+        """Setup logging to GUI text widget"""
+        self.text_widget = text_widget
+        self.queue = queue
+        
+        # Add handler for GUI logging
+        gui_handler = logging.StreamHandler(OutputRedirector(queue))
+        gui_handler.setLevel(logging.INFO)
+        formatter = logging.Formatter("%(message)s")
+        gui_handler.setFormatter(formatter)
+        
+        self.logger.addHandler(gui_handler)
+        
+    def process_log_queue(self):
+        """Process pending log messages"""
+        if self.queue and self.text_widget:
+            while True:
+                try:
+                    message = self.queue.get_nowait()
+                    self.text_widget.insert(tk.END, message)
+                    self.text_widget.see(tk.END)
+                    self.text_widget.update_idletasks()
+                except Empty:
+                    break
+                    
+    def clear_output(self):
+        """Clear the text widget"""
+        if self.text_widget:
+            self.text_widget.delete("1.0", tk.END)
+            self.text_widget.update_idletasks()
--- a/utils/output_redirector.py
+++ b/utils/output_redirector.py
@ -0,0 +1,17 @@
+# utils/output_redirector.py
+"""
+Output redirector for capturing stdout/stderr
+"""
+import sys
+from queue import Queue
+from typing import Optional
+
+class OutputRedirector:
+    def __init__(self, queue: Queue):
+        self.queue = queue
+
+    def write(self, string: str):
+        self.queue.put(string)
+
+    def flush(self):
+        pass
--- a/utils/progress_bar.py
+++ b/utils/progress_bar.py
@ -0,0 +1,51 @@
+# utils/progress_bar.py
+"""
+Progress bar implementation
+"""
+import tkinter as tk
+from tkinter import ttk
+from typing import Optional, Callable
+import sys
+from queue import Queue
+import threading
+
+class ProgressBar:
+    def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30):
+        self.total = total
+        self.prefix = prefix
+        self.suffix = suffix
+        self.max_points = max_points
+        self.current = 0
+        self.last_points = 0
+        self.output_callback: Optional[Callable] = None
+        
+    def set_output_callback(self, callback: Callable[[str], None]):
+        """Set callback function for output"""
+        self.output_callback = callback
+        
+    def update(self, current: int):
+        self.current = current
+        points = min(int((current / self.total) * self.max_points), self.max_points)
+        
+        if points > self.last_points:
+            new_points = points - self.last_points
+            self._write_output("." * new_points)
+            self.last_points = points
+
+    def increment(self):
+        self.update(self.current + 1)
+
+    def finish(self):
+        remaining_points = self.max_points - self.last_points
+        if remaining_points > 0:
+            self._write_output("." * remaining_points)
+        self._write_output(f"] {self.suffix}\n")
+
+    def start(self):
+        self._write_output(f"\r{self.prefix} [")
+        
+    def _write_output(self, text: str):
+        if self.output_callback:
+            self.output_callback(text)
+        else:
+            print(text, end="", flush=True)
--- a/utils/script_registry.py
+++ b/utils/script_registry.py
@ -0,0 +1,68 @@
+# utils/script_registry.py
+from typing import Dict, Callable, List, Optional
+import importlib
+import inspect
+import os
+from pathlib import Path
+from config.profile_manager import Profile, ProfileManager
+
+class ScriptRegistry:
+    """Registry for script operations"""
+    
+    def __init__(self):
+        self.operations: Dict[str, Callable] = {}
+        self.descriptions: Dict[str, str] = {}
+        
+    def register(self, name: str, operation: Callable, description: str = ""):
+        """Register a new operation"""
+        self.operations[name] = operation
+        self.descriptions[name] = description
+        
+    def auto_discover(self, scripts_dir: str = "scripts"):
+        """Auto-discover scripts in the scripts directory"""
+        scripts_path = Path(__file__).parent.parent / scripts_dir
+        
+        for file in scripts_path.glob("script_*.py"):
+            module_name = f"{scripts_dir}.{file.stem}"
+            try:
+                module = importlib.import_module(module_name)
+                
+                # Look for main function and docstring
+                if hasattr(module, 'main'):
+                    name = file.stem.replace('script_', '')
+                    description = module.__doc__ or ""
+                    self.register(name, module.main, description)
+                    
+            except Exception as e:
+                print(f"Error loading script {file}: {e}")
+                
+    def get_operations(self) -> List[tuple]:
+        """Get list of available operations"""
+        return [(name, self.descriptions[name]) for name in self.operations]
+        
+    def run_operation(self, name: str, profile: Optional[Profile] = None, **kwargs):
+        """
+        Run a registered operation
+        
+        Args:
+            name: Name of the operation to run
+            profile: Current profile instance (optional)
+            **kwargs: Additional arguments for the operation
+        """
+        if name in self.operations:
+            # Prepare arguments
+            operation = self.operations[name]
+            sig = inspect.signature(operation)
+            
+            # Check if operation accepts profile parameter
+            call_args = {}
+            if 'profile' in sig.parameters:
+                call_args['profile'] = profile
+            
+            # Add other kwargs that match the signature
+            for param_name in sig.parameters:
+                if param_name in kwargs:
+                    call_args[param_name] = kwargs[param_name]
+            
+            return operation(**call_args)
+        raise ValueError(f"Unknown operation: {name}")