From 6fca251249a2fba8a1d149722c718c7baad344e4 Mon Sep 17 00:00:00 2001 From: Miguel Date: Sun, 9 Feb 2025 13:22:25 +0100 Subject: [PATCH] Agregado de librerias --- .gitignore | 27 +++ commands.sh | 3 + services/excel/excel_service.py | 241 ++++++++++++++++++++ services/language/base.py | 25 ++ services/language/langid_service.py | 52 +++++ services/language/language_factory.py | 33 +++ services/language/language_utils.py | 68 ++++++ services/llm/base.py | 20 ++ services/llm/batch_processor.py | 228 ++++++++++++++++++ services/llm/grok_service.py | 63 +++++ services/llm/llm_factory.py | 33 +++ services/llm/ollama_service.py | 53 +++++ services/llm/openai_service.py | 69 ++++++ services/translation/base.py | 19 ++ services/translation/google_translate.py | 77 +++++++ services/translation/translation_factory.py | 32 +++ utils/file_utils.py | 39 ++++ utils/logger_utils.py | 31 +++ utils/logging_manager.py | 64 ++++++ utils/output_redirector.py | 17 ++ utils/progress_bar.py | 51 +++++ utils/script_registry.py | 68 ++++++ 22 files changed, 1313 insertions(+) create mode 100644 .gitignore create mode 100644 commands.sh create mode 100644 services/excel/excel_service.py create mode 100644 services/language/base.py create mode 100644 services/language/langid_service.py create mode 100644 services/language/language_factory.py create mode 100644 services/language/language_utils.py create mode 100644 services/llm/base.py create mode 100644 services/llm/batch_processor.py create mode 100644 services/llm/grok_service.py create mode 100644 services/llm/llm_factory.py create mode 100644 services/llm/ollama_service.py create mode 100644 services/llm/openai_service.py create mode 100644 services/translation/base.py create mode 100644 services/translation/google_translate.py create mode 100644 services/translation/translation_factory.py create mode 100644 utils/file_utils.py create mode 100644 utils/logger_utils.py create mode 100644 utils/logging_manager.py create mode 100644 utils/output_redirector.py create mode 100644 utils/progress_bar.py create mode 100644 utils/script_registry.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a5a3c28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# Python cache files +__pycache__/ +*.py[cod] + +# Environment directories +venv/ +env/ +.env/ + +# IDE configurations +.vscode/ +.idea/ + +# Logs and data files +data/log.txt +data/data.json + +# Allow script groups and their configurations +!backend/script_groups/ +!backend/script_groups/*/ +!backend/script_groups/*/*.py +!backend/script_groups/*/schema.json +!backend/script_groups/*/esquema.json +!backend/script_groups/*/description.json + +# But ignore working directory configurations +backend/script_groups/*/work_dir.json diff --git a/commands.sh b/commands.sh new file mode 100644 index 0000000..7c17e14 --- /dev/null +++ b/commands.sh @@ -0,0 +1,3 @@ +# Crear .gitkeep en cada directorio de script grupo +mkdir -p backend/script_groups/example_group +touch backend/script_groups/example_group/.gitkeep diff --git a/services/excel/excel_service.py b/services/excel/excel_service.py new file mode 100644 index 0000000..7b5338a --- /dev/null +++ b/services/excel/excel_service.py @@ -0,0 +1,241 @@ +# services/excel/excel_service.py +""" +Excel file handling service with retry and formatting capabilities +""" +import pandas as pd +import time +from typing import Optional, Union, Dict, Any +from pathlib import Path +import openpyxl +from openpyxl.utils import get_column_letter +from openpyxl.styles import PatternFill, Alignment, Font +from openpyxl.worksheet.worksheet import Worksheet + +class ExcelService: + """Service for handling Excel files with advanced features""" + + def __init__(self, max_retries: int = 5, retry_delay: int = 5): + self.max_retries = max_retries + self.retry_delay = retry_delay + + def read_excel( + self, + file_path: Union[str, Path], + sheet_name: str = "Sheet1", + **kwargs + ) -> pd.DataFrame: + """ + Read Excel file with retries and cleanup + + Args: + file_path: Path to Excel file + sheet_name: Name of sheet to read + **kwargs: Additional arguments for pd.read_excel + + Returns: + DataFrame with the Excel content + """ + retries = 0 + while retries < self.max_retries: + try: + # Intentar leer el archivo con openpyxl + df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs) + + # Limpiar caracteres especiales y normalizar saltos de línea + for col in df.columns: + df[col] = df[col].apply( + lambda x: self._clean_special_chars(x) if pd.notna(x) else x + ) + + print(f"Archivo leído y limpiado exitosamente: {file_path}") + return df + + except ValueError as ve: + if "must be either numerical or a string containing a wildcard" in str(ve): + print(f"Error al leer el archivo: {ve}") + print("Intentando eliminar filtros y leer el archivo nuevamente...") + try: + # Cargar el libro de trabajo + wb = openpyxl.load_workbook(filename=file_path) + sheet = wb.active + + # Eliminar filtros si existen + if sheet.auto_filter: + sheet.auto_filter.ref = None + + # Guardar el archivo temporalmente sin filtros + temp_file = str(file_path) + "_temp.xlsx" + wb.save(temp_file) + + # Leer el archivo temporal + df = pd.read_excel(temp_file, engine="openpyxl", **kwargs) + + # Eliminar el archivo temporal + Path(temp_file).unlink() + + return df + except Exception as e: + print(f"Error al intentar eliminar filtros y leer el archivo: {e}") + else: + print(f"Error de valor: {ve}") + + except PermissionError as e: + print( + f"Error de permiso: {e}. Por favor cierre el archivo. " + f"Reintentando en {self.retry_delay} segundos..." + ) + except Exception as e: + print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...") + + retries += 1 + time.sleep(self.retry_delay) + + raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.") + + def save_excel( + self, + df: pd.DataFrame, + file_path: Union[str, Path], + sheet_name: str = "Sheet1", + format_options: Optional[Dict[str, Any]] = None, + **kwargs + ) -> None: + """ + Save DataFrame to Excel with formatting + + Args: + df: DataFrame to save + file_path: Path to save Excel file + sheet_name: Name of sheet + format_options: Dictionary with formatting options + **kwargs: Additional arguments for pd.to_excel + """ + if format_options is None: + format_options = {} + + retries = 0 + while retries < self.max_retries: + try: + with pd.ExcelWriter(file_path, engine='openpyxl') as writer: + # Save DataFrame + df.to_excel(writer, sheet_name=sheet_name, index=False) + + # Apply formatting + self._format_worksheet( + writer.sheets[sheet_name], + format_options + ) + + print(f"Archivo guardado exitosamente en: {file_path}") + return + + except PermissionError as e: + print( + f"Error de permiso: {e}. Por favor cierre el archivo. " + f"Reintentando en {self.retry_delay} segundos..." + ) + except Exception as e: + print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...") + + retries += 1 + time.sleep(self.retry_delay) + + raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.") + + def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None: + """ + Apply formatting to worksheet + + Args: + worksheet: Worksheet to format + options: Formatting options + """ + # Freeze panes if specified + freeze_row = options.get('freeze_row', 2) + freeze_col = options.get('freeze_col', 1) + if freeze_row or freeze_col: + freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}" + worksheet.freeze_panes = freeze_cell + + # Auto-adjust column widths + max_width = options.get('max_column_width', 50) + min_width = options.get('min_column_width', 8) + wrap_threshold = options.get('wrap_threshold', 50) + + for col in worksheet.columns: + max_length = 0 + column = col[0].column_letter + + for cell in col: + try: + if cell.value: + text_length = len(str(cell.value)) + if text_length > wrap_threshold: + cell.alignment = Alignment(wrap_text=True, vertical='top') + text_length = min( + wrap_threshold, + max(len(word) for word in str(cell.value).split()) + ) + max_length = max(max_length, text_length) + except: + pass + + adjusted_width = min(max_width, max(min_width, max_length + 2)) + worksheet.column_dimensions[column].width = adjusted_width + + # Apply custom styles + header_row = options.get('header_row', 1) + if header_row: + header_fill = PatternFill( + start_color=options.get('header_color', 'F2F2F2'), + end_color=options.get('header_color', 'F2F2F2'), + fill_type='solid' + ) + header_font = Font(bold=True) + + for cell in worksheet[header_row]: + cell.fill = header_fill + cell.font = header_font + + def _clean_special_chars(self, text: Any) -> Any: + """Clean special characters and normalize line breaks""" + if isinstance(text, str): + # Normalize line breaks + text = text.replace('\r\n', '\n').replace('\r', '\n') + # Replace other special characters if needed + return text + return text + +# Example usage: +""" +from services.excel.excel_service import ExcelService + +# Create service +excel_service = ExcelService() + +# Read Excel file +try: + df = excel_service.read_excel("input.xlsx") + print("Data loaded successfully") + + # Modify data... + + # Save with formatting + format_options = { + 'freeze_row': 2, + 'freeze_col': 1, + 'max_column_width': 50, + 'min_column_width': 8, + 'wrap_threshold': 50, + 'header_color': 'E6E6E6' + } + + excel_service.save_excel( + df, + "output.xlsx", + format_options=format_options + ) + +except Exception as e: + print(f"Error handling Excel file: {e}") +""" \ No newline at end of file diff --git a/services/language/base.py b/services/language/base.py new file mode 100644 index 0000000..5823797 --- /dev/null +++ b/services/language/base.py @@ -0,0 +1,25 @@ +# services/language/base.py +""" +Base class for language detection services +""" +from abc import ABC, abstractmethod +from typing import Optional, List, Dict, Tuple + +class LanguageDetectionService(ABC): + """Abstract base class for language detection services""" + + @abstractmethod + def detect_language(self, text: str) -> Tuple[str, float]: + """ + Detect language of a text + Returns: Tuple of (language_code, confidence_score) + """ + pass + + @abstractmethod + def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]: + """ + Detect language of multiple texts + Returns: List of tuples (language_code, confidence_score) + """ + pass diff --git a/services/language/langid_service.py b/services/language/langid_service.py new file mode 100644 index 0000000..ebe1d50 --- /dev/null +++ b/services/language/langid_service.py @@ -0,0 +1,52 @@ +# services/language/langid_service.py +""" +Language detection service using langid +""" +from typing import List, Tuple, Optional, Set +import langid +from .base import LanguageDetectionService + +class LangIdService(LanguageDetectionService): + def __init__(self, allowed_languages: Optional[Set[str]] = None): + """ + Initialize langid service + + Args: + allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'}) + If None, all languages supported by langid will be allowed + """ + if allowed_languages: + langid.set_languages(list(allowed_languages)) + self.allowed_languages = allowed_languages + + def detect_language(self, text: str) -> Tuple[str, float]: + """ + Detect language of a text using langid + + Args: + text: Text to analyze + + Returns: + Tuple of (language_code, confidence_score) + """ + try: + if not text or len(text.strip()) < 3: + return ("unknown", 0.0) + + lang, score = langid.classify(text.strip()) + return (lang, score) + except Exception as e: + print(f"Error in language detection: {e}") + return ("unknown", 0.0) + + def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]: + """ + Detect language of multiple texts + + Args: + texts: List of texts to analyze + + Returns: + List of tuples (language_code, confidence_score) + """ + return [self.detect_language(text) for text in texts] \ No newline at end of file diff --git a/services/language/language_factory.py b/services/language/language_factory.py new file mode 100644 index 0000000..ac65561 --- /dev/null +++ b/services/language/language_factory.py @@ -0,0 +1,33 @@ +# services/language/language_factory.py +""" +Factory class for creating language detection services +""" +from typing import Optional, Set +from .langid_service import LangIdService + +class LanguageFactory: + """Factory class for creating language detection service instances""" + + @staticmethod + def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']: + """ + Create an instance of the specified language detection service + + Args: + service_type: Type of language detection service ("langid", etc.) + allowed_languages: Set of allowed language codes + **kwargs: Additional arguments for service initialization + + Returns: + LanguageDetectionService instance or None if service_type is not recognized + """ + services = { + "langid": LangIdService, + # Add other language detection services here + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(allowed_languages=allowed_languages, **kwargs) + else: + raise ValueError(f"Unknown language detection service type: {service_type}") \ No newline at end of file diff --git a/services/language/language_utils.py b/services/language/language_utils.py new file mode 100644 index 0000000..4834210 --- /dev/null +++ b/services/language/language_utils.py @@ -0,0 +1,68 @@ +# services/language/language_utils.py +""" +Utility functions for language detection and validation +""" +from typing import Dict, Set + +class LanguageUtils: + # Common language codes + LANGUAGE_CODES = { + 'it': ('Italian', 'it-IT'), + 'en': ('English', 'en-GB'), + 'pt': ('Portuguese', 'pt-PT'), + 'es': ('Spanish', 'es-ES'), + 'ru': ('Russian', 'ru-RU'), + 'fr': ('French', 'fr-FR'), + 'de': ('German', 'de-DE'), + 'tr': ('Turkish', 'tr-TR'), + } + + @classmethod + def get_language_name(cls, code: str) -> str: + """Get full language name from code""" + return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0] + + @classmethod + def get_full_code(cls, short_code: str) -> str: + """Get full language code (e.g., 'en-GB' from 'en')""" + return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1] + + @classmethod + def get_short_code(cls, full_code: str) -> str: + """Get short language code (e.g., 'en' from 'en-GB')""" + return full_code.split('-')[0] if '-' in full_code else full_code + + @classmethod + def is_valid_language(cls, code: str) -> bool: + """Check if a language code is valid""" + short_code = cls.get_short_code(code) + return short_code in cls.LANGUAGE_CODES + + @classmethod + def get_available_languages(cls) -> Set[str]: + """Get set of available language codes""" + return set(cls.LANGUAGE_CODES.keys()) + +# Example usage: +""" +from services.language.language_factory import LanguageFactory +from services.language.language_utils import LanguageUtils + +# Create language detection service with specific languages +allowed_languages = LanguageUtils.get_available_languages() +detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages) + +# Detect language of a text +text = "Hello, how are you?" +lang, confidence = detector.detect_language(text) +print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})") +print(f"Confidence: {confidence}") + +# Detect language of multiple texts +texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"] +results = detector.detect_batch(texts) +for text, (lang, confidence) in zip(texts, results): + print(f"Text: {text}") + print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})") + print(f"Confidence: {confidence}") +""" \ No newline at end of file diff --git a/services/llm/base.py b/services/llm/base.py new file mode 100644 index 0000000..e533497 --- /dev/null +++ b/services/llm/base.py @@ -0,0 +1,20 @@ +# services/llm/base.py +""" +Base class for LLM services +""" +from abc import ABC, abstractmethod +import json +from typing import List, Union, Dict, Any + +class LLMService(ABC): + """Abstract base class for LLM services""" + + @abstractmethod + def generate_text(self, prompt: str) -> str: + """Generate text based on a prompt""" + pass + + @abstractmethod + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + """Calculate similarity scores for pairs of texts""" + pass \ No newline at end of file diff --git a/services/llm/batch_processor.py b/services/llm/batch_processor.py new file mode 100644 index 0000000..0a0a6fa --- /dev/null +++ b/services/llm/batch_processor.py @@ -0,0 +1,228 @@ +# services/llm/batch_processor.py +""" +Batch processing service for LLM operations +""" +from typing import List, Dict, Any, Optional, Callable +import json +from dataclasses import dataclass +import time +from .base import LLMService +from utils.progress_bar import ProgressBar + +@dataclass +class BatchConfig: + """Configuration for batch processing""" + batch_size: int = 20 + max_retries: int = 3 + retry_delay: int = 3 + progress_callback: Optional[Callable[[str], None]] = None + +class BatchProcessor: + """ + Handles batch processing for LLM operations + """ + def __init__( + self, + llm_service: LLMService, + config: Optional[BatchConfig] = None + ): + self.llm_service = llm_service + self.config = config or BatchConfig() + + def process_batch( + self, + items: List[Dict[str, Any]], + system_prompt: str, + template: str, + output_processor: Optional[Callable] = None + ) -> List[Any]: + """ + Process items in batches with consistent context + + Args: + items: List of dictionaries containing data to process + system_prompt: System prompt for context + template: Template string for formatting requests + output_processor: Optional function to process LLM responses + + Returns: + List of processed results + """ + results = [] + total_items = len(items) + + # Setup progress tracking + progress = ProgressBar( + total_items, + "Processing batches:", + "Complete" + ) + if self.config.progress_callback: + progress.set_output_callback(self.config.progress_callback) + progress.start() + + # Process in batches + for start_idx in range(0, total_items, self.config.batch_size): + end_idx = min(start_idx + self.config.batch_size, total_items) + batch_items = items[start_idx:end_idx] + + # Prepare batch request + batch_data = { + "items": batch_items, + "template": template + } + request_payload = json.dumps(batch_data) + + # Process batch with retries + for attempt in range(self.config.max_retries): + try: + response = self.llm_service.generate_text( + system_prompt=system_prompt, + user_prompt=request_payload + ) + + # Parse and process response + batch_results = self._process_response( + response, + output_processor + ) + + if len(batch_results) != len(batch_items): + raise ValueError( + "Response count doesn't match input count" + ) + + results.extend(batch_results) + break + + except Exception as e: + if attempt < self.config.max_retries - 1: + if self.config.progress_callback: + self.config.progress_callback( + f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..." + ) + time.sleep(self.config.retry_delay) + else: + if self.config.progress_callback: + self.config.progress_callback( + f"Error in batch {start_idx}-{end_idx}: {e}" + ) + # On final retry failure, add None results + results.extend([None] * len(batch_items)) + + # Update progress + progress.update(end_idx) + + progress.finish() + return results + + def _process_response( + self, + response: str, + output_processor: Optional[Callable] = None + ) -> List[Any]: + """Process LLM response""" + try: + # Parse JSON response + parsed = json.loads(response) + + # Apply custom processing if provided + if output_processor: + return [output_processor(item) for item in parsed] + return parsed + + except json.JSONDecodeError: + raise ValueError("Failed to parse LLM response as JSON") + +# Example specialized batch processor for translations +class TranslationBatchProcessor(BatchProcessor): + """Specialized batch processor for translations""" + + def translate_batch( + self, + texts: List[str], + source_lang: str, + target_lang: str + ) -> List[str]: + """ + Translate a batch of texts + + Args: + texts: List of texts to translate + source_lang: Source language code + target_lang: Target language code + + Returns: + List of translated texts + """ + # Prepare items + items = [{"text": text} for text in texts] + + # Setup prompts + system_prompt = ( + "You are a translator. Translate the provided texts " + "maintaining special fields like <> and <#>." + ) + + template = ( + "Translate the following texts from {source_lang} to {target_lang}. " + "Return translations as a JSON array of strings:" + "\n\n{text}" + ) + + # Process batch + results = self.process_batch( + items=items, + system_prompt=system_prompt, + template=template.format( + source_lang=source_lang, + target_lang=target_lang + ) + ) + + return results + +# Example usage: +""" +from services.llm.llm_factory import LLMFactory +from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor + +# Create LLM service +llm_service = LLMFactory.create_service("openai") + +# Setup batch processor with progress callback +def progress_callback(message: str): + print(message) + +config = BatchConfig( + batch_size=20, + progress_callback=progress_callback +) + +# General batch processor +processor = BatchProcessor(llm_service, config) + +# Example batch process for custom task +items = [ + {"text": "Hello", "context": "greeting"}, + {"text": "Goodbye", "context": "farewell"} +] + +system_prompt = "You are a helpful assistant." +template = "Process these items considering their context: {items}" + +results = processor.process_batch( + items=items, + system_prompt=system_prompt, + template=template +) + +# Example translation batch +translator = TranslationBatchProcessor(llm_service, config) +texts = ["Hello world", "How are you?"] +translations = translator.translate_batch( + texts=texts, + source_lang="en", + target_lang="es" +) +""" \ No newline at end of file diff --git a/services/llm/grok_service.py b/services/llm/grok_service.py new file mode 100644 index 0000000..b7e5434 --- /dev/null +++ b/services/llm/grok_service.py @@ -0,0 +1,63 @@ +# services/llm/grok_service.py +""" +Grok service implementation +""" +from typing import Dict, List, Optional +import json +from .base import LLMService +from config.api_keys import APIKeyManager + +class GrokService(LLMService): + def __init__(self, model: str = "grok-1", temperature: float = 0.3): + api_key = APIKeyManager.get_grok_key() + if not api_key: + raise ValueError("Grok API key not found. Please set up your API keys.") + + self.api_key = api_key + self.model = model + self.temperature = temperature + + def generate_text(self, prompt: str) -> str: + """ + Generate text using the Grok API + TODO: Update this method when Grok API is available + """ + try: + # Placeholder for Grok API implementation + # Update this when the API is released + raise NotImplementedError("Grok API is not implemented yet") + + except Exception as e: + print(f"Error in Grok API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + """ + Calculate similarity scores using the Grok API + TODO: Update this method when Grok API is available + """ + try: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts " + "in json format on a scale from 0 to 1. Return the similarity scores for every " + "row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + + # Placeholder for Grok API implementation + # Update this when the API is released + raise NotImplementedError("Grok API is not implemented yet") + + except Exception as e: + print(f"Error in Grok similarity calculation: {e}") + return None + +# Update config/api_keys.py to include Grok +@classmethod +def get_grok_key(cls) -> Optional[str]: + """Get Grok API key from environment or stored configuration""" + return ( + os.getenv('GROK_API_KEY') or + cls._get_stored_key('grok') + ) \ No newline at end of file diff --git a/services/llm/llm_factory.py b/services/llm/llm_factory.py new file mode 100644 index 0000000..9405c61 --- /dev/null +++ b/services/llm/llm_factory.py @@ -0,0 +1,33 @@ +# services/llm/llm_factory.py +""" +Factory class for creating LLM services +""" +from typing import Optional +from .openai_service import OpenAIService +from .ollama_service import OllamaService +from .grok_service import GrokService + +class LLMFactory: + """Factory class for creating LLM service instances""" + + @staticmethod + def create_service(service_type: str, **kwargs) -> Optional['LLMService']: + """ + Create an instance of the specified LLM service + + Args: + service_type: Type of LLM service ("openai", "ollama", "grok") + **kwargs: Additional arguments for service initialization + """ + services = { + "openai": OpenAIService, + "ollama": OllamaService, + "grok": GrokService + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(**kwargs) + else: + print(f"Unknown service type: {service_type}") + return None diff --git a/services/llm/ollama_service.py b/services/llm/ollama_service.py new file mode 100644 index 0000000..65a6885 --- /dev/null +++ b/services/llm/ollama_service.py @@ -0,0 +1,53 @@ +# services/llm/ollama_service.py +""" +Ollama service implementation +""" +import ollama +import json +from typing import Dict, List +from .base import LLMService + +class OllamaService(LLMService): + def __init__(self, model: str = "llama3.1"): + self.model = model + + def generate_text(self, prompt: str) -> str: + try: + response = ollama.generate( + model=self.model, + prompt=prompt + ) + return response["response"] + except Exception as e: + print(f"Error in Ollama API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. " + "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + prompt = f"{system_prompt}\n\n{request_payload}" + + try: + response = ollama.generate( + model=self.model, + prompt=prompt + ) + + try: + scores = json.loads(response["response"].strip()) + if isinstance(scores, dict) and "similarity_scores" in scores: + return scores["similarity_scores"] + elif isinstance(scores, list): + return scores + else: + raise ValueError("Unexpected response format") + except json.JSONDecodeError: + raise ValueError("Could not decode response as JSON") + + except Exception as e: + print(f"Error in Ollama similarity calculation: {e}") + return None \ No newline at end of file diff --git a/services/llm/openai_service.py b/services/llm/openai_service.py new file mode 100644 index 0000000..7363bbc --- /dev/null +++ b/services/llm/openai_service.py @@ -0,0 +1,69 @@ +# services/llm/openai_service.py +""" +OpenAI service implementation +""" +from openai import OpenAI +from typing import Dict, List +import json +from .base import LLMService +from config.api_keys import APIKeyManager + +class OpenAIService(LLMService): + def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3): + api_key = APIKeyManager.get_openai_key() + if not api_key: + raise ValueError("OpenAI API key not found. Please set up your API keys.") + + self.client = OpenAI(api_key=api_key) + self.model = model + self.temperature = temperature + + def generate_text(self, prompt: str) -> str: + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + temperature=self.temperature, + max_tokens=1500 + ) + return response.choices[0].message.content + except Exception as e: + print(f"Error in OpenAI API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. " + "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": request_payload} + ], + temperature=self.temperature, + max_tokens=1500 + ) + + response_content = response.choices[0].message.content + cleaned_response = response_content.strip().strip("'```json").strip("```") + + try: + scores = json.loads(cleaned_response) + if isinstance(scores, dict) and "similarity_scores" in scores: + return scores["similarity_scores"] + elif isinstance(scores, list): + return scores + else: + raise ValueError("Unexpected response format") + except json.JSONDecodeError: + raise ValueError("Could not decode response as JSON") + + except Exception as e: + print(f"Error in OpenAI similarity calculation: {e}") + return None \ No newline at end of file diff --git a/services/translation/base.py b/services/translation/base.py new file mode 100644 index 0000000..e3946ff --- /dev/null +++ b/services/translation/base.py @@ -0,0 +1,19 @@ +# services/translation/base.py +""" +Base class for translation services +""" +from abc import ABC, abstractmethod +from typing import Optional, List, Dict + +class TranslationService(ABC): + """Abstract base class for translation services""" + + @abstractmethod + def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str: + """Translate a single text""" + pass + + @abstractmethod + def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]: + """Translate a batch of texts""" + pass diff --git a/services/translation/google_translate.py b/services/translation/google_translate.py new file mode 100644 index 0000000..eb5f49d --- /dev/null +++ b/services/translation/google_translate.py @@ -0,0 +1,77 @@ +# services/translation/google_translate.py +""" +Google Translation service implementation +""" +from typing import Optional, List +import html +from google.cloud import translate_v2 as translate +from google.oauth2 import service_account +from config.api_keys import APIKeyManager +from .base import TranslationService + +class GoogleTranslateService(TranslationService): + def __init__(self, credentials_file: Optional[str] = None): + """ + Initialize Google Translate service + + Args: + credentials_file: Path to Google service account credentials JSON file. + If None, will use API key from APIKeyManager. + """ + if credentials_file: + # Use service account credentials + try: + credentials = service_account.Credentials.from_service_account_file(credentials_file) + self.client = translate.Client(credentials=credentials) + except Exception as e: + raise ValueError(f"Error initializing Google Translate with credentials: {e}") + else: + # Use API key + api_key = APIKeyManager.get_google_key() + if not api_key: + raise ValueError("Google API key not found. Please set up your API keys.") + self.client = translate.Client() + + def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str: + """ + Translate a single text. + + Args: + text: Text to translate + target_language: Target language code (e.g., 'es' for Spanish) + source_language: Source language code. If None, will be auto-detected. + + Returns: + Translated text + """ + try: + result = self.client.translate( + text, + target_language=target_language, + source_language=source_language + ) + return html.unescape(result["translatedText"]) + except Exception as e: + raise ValueError(f"Error in Google Translate: {e}") + + def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]: + """ + Translate multiple texts in batch. + + Args: + texts: List of texts to translate + target_language: Target language code (e.g., 'es' for Spanish) + source_language: Source language code. If None, will be auto-detected. + + Returns: + List of translated texts + """ + try: + results = self.client.translate( + texts, + target_language=target_language, + source_language=source_language + ) + return [html.unescape(result["translatedText"]) for result in results] + except Exception as e: + raise ValueError(f"Error in Google Translate batch: {e}") \ No newline at end of file diff --git a/services/translation/translation_factory.py b/services/translation/translation_factory.py new file mode 100644 index 0000000..3df8563 --- /dev/null +++ b/services/translation/translation_factory.py @@ -0,0 +1,32 @@ +# services/translation/translation_factory.py +""" +Factory class for creating translation services +""" +from typing import Optional +from .google_translate import GoogleTranslateService + +class TranslationFactory: + """Factory class for creating translation service instances""" + + @staticmethod + def create_service(service_type: str, **kwargs) -> Optional['TranslationService']: + """ + Create an instance of the specified translation service + + Args: + service_type: Type of translation service ("google", etc.) + **kwargs: Additional arguments for service initialization + + Returns: + TranslationService instance or None if service_type is not recognized + """ + services = { + "google": GoogleTranslateService, + # Add other translation services here + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(**kwargs) + else: + raise ValueError(f"Unknown translation service type: {service_type}") \ No newline at end of file diff --git a/utils/file_utils.py b/utils/file_utils.py new file mode 100644 index 0000000..84e2309 --- /dev/null +++ b/utils/file_utils.py @@ -0,0 +1,39 @@ +# utils/file_utils.py +""" +File handling utilities +""" +import os +import tkinter as tk +from tkinter import filedialog +import pandas as pd + +def select_file(title="Select file", filetypes=None): + if filetypes is None: + filetypes = [ + ("Excel files", "*.xlsx;*.xls"), + ("All files", "*.*") + ] + + root = tk.Tk() + root.withdraw() + + file_path = filedialog.askopenfilename( + title=title, + filetypes=filetypes + ) + + return file_path if file_path else None + +def select_directory(title="Select directory"): + root = tk.Tk() + root.withdraw() + + dir_path = filedialog.askdirectory(title=title) + return dir_path if dir_path else None + +def safe_read_excel(file_path, **kwargs): + try: + return pd.read_excel(file_path, **kwargs) + except Exception as e: + print(f"Error reading Excel file: {e}") + return None \ No newline at end of file diff --git a/utils/logger_utils.py b/utils/logger_utils.py new file mode 100644 index 0000000..df2f815 --- /dev/null +++ b/utils/logger_utils.py @@ -0,0 +1,31 @@ +# utils/logger_utils.py +""" +Logging configuration and utilities +""" +import logging +import os +from config.settings import LOG_DIR + +def setup_logger(name, log_file=None): + logger = logging.getLogger(name) + + if not logger.handlers: + logger.setLevel(logging.INFO) + + if log_file: + os.makedirs(LOG_DIR, exist_ok=True) + file_handler = logging.FileHandler( + os.path.join(LOG_DIR, log_file) + ) + file_handler.setFormatter( + logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setFormatter( + logging.Formatter('%(levelname)s: %(message)s') + ) + logger.addHandler(console_handler) + + return logger diff --git a/utils/logging_manager.py b/utils/logging_manager.py new file mode 100644 index 0000000..7caa642 --- /dev/null +++ b/utils/logging_manager.py @@ -0,0 +1,64 @@ +# utils/logging_manager.py +import logging +import os +from typing import Optional +from datetime import datetime +import tkinter as tk +from queue import Queue, Empty +from .output_redirector import OutputRedirector + +class LoggingManager: + def __init__(self, work_dir: str): + self.work_dir = work_dir + self.log_dir = os.path.join(work_dir, "logs") + os.makedirs(self.log_dir, exist_ok=True) + + self.logger = self._setup_logger() + self.queue: Optional[Queue] = None + self.text_widget: Optional[tk.Text] = None + + def _setup_logger(self) -> logging.Logger: + logger = logging.getLogger("app_logger") + logger.setLevel(logging.INFO) + + # File handler + log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log") + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(logging.INFO) + + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + return logger + + def setup_gui_logging(self, text_widget: tk.Text, queue: Queue): + """Setup logging to GUI text widget""" + self.text_widget = text_widget + self.queue = queue + + # Add handler for GUI logging + gui_handler = logging.StreamHandler(OutputRedirector(queue)) + gui_handler.setLevel(logging.INFO) + formatter = logging.Formatter("%(message)s") + gui_handler.setFormatter(formatter) + + self.logger.addHandler(gui_handler) + + def process_log_queue(self): + """Process pending log messages""" + if self.queue and self.text_widget: + while True: + try: + message = self.queue.get_nowait() + self.text_widget.insert(tk.END, message) + self.text_widget.see(tk.END) + self.text_widget.update_idletasks() + except Empty: + break + + def clear_output(self): + """Clear the text widget""" + if self.text_widget: + self.text_widget.delete("1.0", tk.END) + self.text_widget.update_idletasks() \ No newline at end of file diff --git a/utils/output_redirector.py b/utils/output_redirector.py new file mode 100644 index 0000000..953c067 --- /dev/null +++ b/utils/output_redirector.py @@ -0,0 +1,17 @@ +# utils/output_redirector.py +""" +Output redirector for capturing stdout/stderr +""" +import sys +from queue import Queue +from typing import Optional + +class OutputRedirector: + def __init__(self, queue: Queue): + self.queue = queue + + def write(self, string: str): + self.queue.put(string) + + def flush(self): + pass \ No newline at end of file diff --git a/utils/progress_bar.py b/utils/progress_bar.py new file mode 100644 index 0000000..0180577 --- /dev/null +++ b/utils/progress_bar.py @@ -0,0 +1,51 @@ +# utils/progress_bar.py +""" +Progress bar implementation +""" +import tkinter as tk +from tkinter import ttk +from typing import Optional, Callable +import sys +from queue import Queue +import threading + +class ProgressBar: + def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30): + self.total = total + self.prefix = prefix + self.suffix = suffix + self.max_points = max_points + self.current = 0 + self.last_points = 0 + self.output_callback: Optional[Callable] = None + + def set_output_callback(self, callback: Callable[[str], None]): + """Set callback function for output""" + self.output_callback = callback + + def update(self, current: int): + self.current = current + points = min(int((current / self.total) * self.max_points), self.max_points) + + if points > self.last_points: + new_points = points - self.last_points + self._write_output("." * new_points) + self.last_points = points + + def increment(self): + self.update(self.current + 1) + + def finish(self): + remaining_points = self.max_points - self.last_points + if remaining_points > 0: + self._write_output("." * remaining_points) + self._write_output(f"] {self.suffix}\n") + + def start(self): + self._write_output(f"\r{self.prefix} [") + + def _write_output(self, text: str): + if self.output_callback: + self.output_callback(text) + else: + print(text, end="", flush=True) \ No newline at end of file diff --git a/utils/script_registry.py b/utils/script_registry.py new file mode 100644 index 0000000..e733d37 --- /dev/null +++ b/utils/script_registry.py @@ -0,0 +1,68 @@ +# utils/script_registry.py +from typing import Dict, Callable, List, Optional +import importlib +import inspect +import os +from pathlib import Path +from config.profile_manager import Profile, ProfileManager + +class ScriptRegistry: + """Registry for script operations""" + + def __init__(self): + self.operations: Dict[str, Callable] = {} + self.descriptions: Dict[str, str] = {} + + def register(self, name: str, operation: Callable, description: str = ""): + """Register a new operation""" + self.operations[name] = operation + self.descriptions[name] = description + + def auto_discover(self, scripts_dir: str = "scripts"): + """Auto-discover scripts in the scripts directory""" + scripts_path = Path(__file__).parent.parent / scripts_dir + + for file in scripts_path.glob("script_*.py"): + module_name = f"{scripts_dir}.{file.stem}" + try: + module = importlib.import_module(module_name) + + # Look for main function and docstring + if hasattr(module, 'main'): + name = file.stem.replace('script_', '') + description = module.__doc__ or "" + self.register(name, module.main, description) + + except Exception as e: + print(f"Error loading script {file}: {e}") + + def get_operations(self) -> List[tuple]: + """Get list of available operations""" + return [(name, self.descriptions[name]) for name in self.operations] + + def run_operation(self, name: str, profile: Optional[Profile] = None, **kwargs): + """ + Run a registered operation + + Args: + name: Name of the operation to run + profile: Current profile instance (optional) + **kwargs: Additional arguments for the operation + """ + if name in self.operations: + # Prepare arguments + operation = self.operations[name] + sig = inspect.signature(operation) + + # Check if operation accepts profile parameter + call_args = {} + if 'profile' in sig.parameters: + call_args['profile'] = profile + + # Add other kwargs that match the signature + for param_name in sig.parameters: + if param_name in kwargs: + call_args[param_name] = kwargs[param_name] + + return operation(**call_args) + raise ValueError(f"Unknown operation: {name}") \ No newline at end of file