commit aa543d650fe2613482a8990075aa76608446d261 Author: Miguel Date: Thu Feb 6 14:11:57 2025 +0100 Original diff --git a/config/__pycache__/api_keys.cpython-310.pyc b/config/__pycache__/api_keys.cpython-310.pyc new file mode 100644 index 0000000..645dd64 Binary files /dev/null and b/config/__pycache__/api_keys.cpython-310.pyc differ diff --git a/config/__pycache__/api_setup.cpython-310.pyc b/config/__pycache__/api_setup.cpython-310.pyc new file mode 100644 index 0000000..939d458 Binary files /dev/null and b/config/__pycache__/api_setup.cpython-310.pyc differ diff --git a/config/__pycache__/settings.cpython-310.pyc b/config/__pycache__/settings.cpython-310.pyc new file mode 100644 index 0000000..d3c3bcc Binary files /dev/null and b/config/__pycache__/settings.cpython-310.pyc differ diff --git a/config/api_keys.py b/config/api_keys.py new file mode 100644 index 0000000..b590b18 --- /dev/null +++ b/config/api_keys.py @@ -0,0 +1,86 @@ +# config/api_keys.py +""" +API keys configuration +The keys can be set through environment variables or stored in a .env file +""" +import os +from pathlib import Path +from dotenv import load_dotenv +import json +from typing import Optional + +# Load .env file if it exists +env_path = Path(__file__).parent.parent / '.env' +if env_path.exists(): + load_dotenv(env_path) + +class APIKeyManager: + """Manages API keys and their storage/retrieval""" + + # Define default paths + DEFAULT_KEY_FILE = Path(__file__).parent / 'stored_keys.json' + + @classmethod + def get_openai_key(cls) -> Optional[str]: + """Get OpenAI API key from environment or stored configuration""" + return ( + os.getenv('OPENAI_API_KEY') or + cls._get_stored_key('openai') + ) + + @classmethod + def get_google_key(cls) -> Optional[str]: + """Get Google API key from environment or stored configuration""" + return ( + os.getenv('GOOGLE_API_KEY') or + cls._get_stored_key('google') + ) + + @classmethod + def get_anthropic_key(cls) -> Optional[str]: + """Get Anthropic API key from environment or stored configuration""" + return ( + os.getenv('ANTHROPIC_API_KEY') or + cls._get_stored_key('anthropic') + ) + + @classmethod + def get_grok_key(cls) -> Optional[str]: + """Get Grok API key from environment or stored configuration""" + return ( + os.getenv('GROK_API_KEY') or + cls._get_stored_key('grok') + ) + + @classmethod + def store_key(cls, service: str, key: str) -> None: + """Store an API key in the configuration file""" + stored_keys = cls._read_stored_keys() + stored_keys[service] = key + + cls.DEFAULT_KEY_FILE.parent.mkdir(exist_ok=True) + with open(cls.DEFAULT_KEY_FILE, 'w') as f: + json.dump(stored_keys, f) + + @classmethod + def get_google_credentials_path(cls) -> Optional[str]: + """Get path to Google credentials JSON file""" + return os.getenv('GOOGLE_CREDENTIALS_PATH') or cls._get_stored_key('google_credentials_path') + + @classmethod + def _get_stored_key(cls, service: str) -> Optional[str]: + """Get a stored API key from the configuration file""" + stored_keys = cls._read_stored_keys() + return stored_keys.get(service) + + @classmethod + def _read_stored_keys(cls) -> dict: + """Read stored keys from configuration file""" + if cls.DEFAULT_KEY_FILE.exists(): + try: + with open(cls.DEFAULT_KEY_FILE, 'r') as f: + return json.load(f) + except json.JSONDecodeError: + print(f"Error reading stored keys from {cls.DEFAULT_KEY_FILE}") + return {} + return {} \ No newline at end of file diff --git a/config/api_setup.py b/config/api_setup.py new file mode 100644 index 0000000..82b0c2f --- /dev/null +++ b/config/api_setup.py @@ -0,0 +1,65 @@ +# config/api_setup.py +""" +Setup script for API keys +""" +import tkinter as tk +from tkinter import ttk +from .api_keys import APIKeyManager +import sys + +class APISetupGUI: + def __init__(self): + self.root = tk.Tk() + self.root.title("API Key Setup") + self.root.geometry("400x300") + + self.services = { + "OpenAI": APIKeyManager.get_openai_key, + "Google": APIKeyManager.get_google_key, + "Anthropic": APIKeyManager.get_anthropic_key + } + + self.setup_ui() + + def setup_ui(self): + # Create main frame + main_frame = ttk.Frame(self.root, padding="10") + main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # Create entries for each service + self.entries = {} + for i, (service, _) in enumerate(self.services.items()): + ttk.Label(main_frame, text=f"{service} API Key:").grid( + row=i, column=0, pady=5, sticky=tk.W + ) + + entry = ttk.Entry(main_frame, width=40) + entry.grid(row=i, column=1, pady=5, padx=5) + current_key = self.services[service]() + if current_key: + entry.insert(0, current_key) + + self.entries[service.lower()] = entry + + # Add save button + ttk.Button( + main_frame, + text="Save Keys", + command=self.save_keys + ).grid(row=len(self.services), column=0, columnspan=2, pady=20) + + def save_keys(self): + for service, entry in self.entries.items(): + key = entry.get().strip() + if key: + APIKeyManager.store_key(service, key) + + self.root.destroy() + + def run(self): + self.root.mainloop() + +def setup_apis(): + """Run the API setup GUI""" + app = APISetupGUI() + app.run() \ No newline at end of file diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..6ab9a18 --- /dev/null +++ b/config/settings.py @@ -0,0 +1,15 @@ +# config/settings.py +""" +General configuration settings +""" +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +LOG_DIR = os.path.join(BASE_DIR, 'logs') +DEFAULT_WORK_DIR = os.path.expanduser("~/Documents/WorkFolder") + +LLM_SETTINGS = { + "default_model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1000 +} \ No newline at end of file diff --git a/library-docs.md b/library-docs.md new file mode 100644 index 0000000..b569ba9 --- /dev/null +++ b/library-docs.md @@ -0,0 +1,282 @@ +# Base Project Library Documentation + +## Overview +A modular Python library designed for building applications that work with LLMs, translation services, and file processing, with a focus on Windows environments. + +## Table of Contents +1. [Installation](#installation) +2. [Project Structure](#project-structure) +3. [Core Components](#core-components) +4. [Services](#services) +5. [Utilities](#utilities) +6. [Examples](#examples) + +## Installation + +### Requirements +``` +Python 3.8+ +``` + +### Dependencies +```bash +pip install -r requirements.txt +``` + +Required packages: +```text +python-dotenv>=1.0.0 +openai>=1.0.0 +google-cloud-translate>=3.0.0 +pandas>=2.0.0 +openpyxl>=3.0.0 +langid>=1.1.6 +tk>=0.1.0 +``` + +## Project Structure +``` +base_project/ +├── config/ +│ ├── __init__.py +│ ├── api_keys.py # API key management +│ ├── api_setup.py # GUI for API configuration +│ └── settings.py # General settings +│ +├── services/ +│ ├── llm/ # LLM Services +│ │ ├── base.py +│ │ ├── openai_service.py +│ │ ├── grok_service.py +│ │ ├── ollama_service.py +│ │ ├── batch_processor.py +│ │ └── llm_factory.py +│ │ +│ ├── excel/ # Excel Services +│ │ └── excel_service.py +│ │ +│ └── translation/ # Translation Services +│ ├── base.py +│ ├── google_translate.py +│ └── translation_factory.py +│ +├── utils/ +│ ├── file_utils.py # File handling utilities +│ ├── gui_utils.py # GUI utilities +│ ├── logger_utils.py # Logging utilities +│ └── progress_bar.py # Progress tracking +│ +├── menu.py # Main application interface +├── requirements.txt +└── README.md +``` + +## Core Components + +### 1. API Key Management +The APIKeyManager provides secure storage and retrieval of API keys: + +```python +from config.api_keys import APIKeyManager + +# Get API keys +openai_key = APIKeyManager.get_openai_key() +google_key = APIKeyManager.get_google_key() + +# Store new key +APIKeyManager.store_key("openai", "your-key-here") +``` + +### 2. GUI Interface +The main menu provides a user-friendly interface: + +```python +from menu import MainMenu +import tkinter as tk + +def main(): + root = tk.Tk() + app = MainMenu(root) + root.mainloop() +``` + +## Services + +### LLM Services +Modular services for different LLM providers with a unified interface. + +#### Factory Pattern Usage +```python +from services.llm.llm_factory import LLMFactory + +# Create service instance +llm_service = LLMFactory.create_service("openai", model="gpt-4") + +# Generate text +response = llm_service.generate_text("Tell me a joke") +``` + +#### Batch Processing +Efficient processing of multiple items: + +```python +from services.llm.batch_processor import BatchProcessor, BatchConfig + +# Configure batch processing +config = BatchConfig( + batch_size=20, + progress_callback=lambda msg: print(msg) +) + +# Create processor +processor = BatchProcessor(llm_service, config) + +# Process items +items = [{"text": "Process this"}, {"text": "And this"}] +results = processor.process_batch( + items=items, + system_prompt="You are a helpful assistant.", + template="Process these items: {items}" +) +``` + +### Translation Services +Translation capabilities with multiple provider support. + +#### Google Translate +```python +from services.translation.translation_factory import TranslationFactory + +# Create translator +translator = TranslationFactory.create_service("google") + +# Translate text +translated = translator.translate_text( + "Hello world", + target_language="es" +) + +# Batch translation +texts = ["Hello", "World"] +translations = translator.translate_batch( + texts, + target_language="es" +) +``` + +### Excel Services +Robust Excel file handling with retry mechanism and formatting. + +```python +from services.excel.excel_service import ExcelService + +# Create service +excel_service = ExcelService() + +# Read with retries +df = excel_service.read_excel("input.xlsx") + +# Save with formatting +format_options = { + 'freeze_row': 2, + 'max_column_width': 50, + 'wrap_threshold': 50, +} + +excel_service.save_excel( + df, + "output.xlsx", + format_options=format_options +) +``` + +## Utilities + +### Progress Bar +Visual progress tracking in console: + +```python +from utils.progress_bar import ProgressBar + +# Create progress bar +progress = ProgressBar( + total=100, + prefix="Processing:", + suffix="Complete" +) + +# Update progress +progress.start() +for i in range(100): + progress.increment() +progress.finish() +``` + +### Logging +Centralized logging with file and GUI output: + +```python +from utils.logging_manager import LoggingManager + +# Initialize logging +logger = LoggingManager("./logs") + +# Log messages +logger.logger.info("Process started") +logger.logger.error("An error occurred") +``` + +## Best Practices + +1. **API Keys** + - Never hardcode API keys + - Use environment variables or the APIKeyManager + - Store sensitive data securely + +2. **Error Handling** + - Always use try-except blocks for external services + - Implement retries for network operations + - Log errors appropriately + +3. **Resource Management** + - Use context managers for file operations + - Close connections and handlers properly + - Clean up temporary files + +4. **Batch Processing** + - Use appropriate batch sizes + - Implement progress tracking + - Handle partial failures gracefully + +## Common Issues and Solutions + +### 1. File Access Errors +```python +# Use retry mechanism +excel_service = ExcelService(max_retries=5) +``` + +### 2. API Rate Limits +```python +# Configure batch size appropriately +config = BatchConfig(batch_size=10, retry_delay=3) +``` + +### 3. Memory Management +```python +# Process large files in chunks +for chunk in pd.read_excel("large.xlsx", chunksize=1000): + process_chunk(chunk) +``` + +## Contributing Guidelines + +1. Follow PEP 8 style guide +2. Add type hints to new functions +3. Include docstrings for all modules and functions +4. Add unit tests for new features +5. Update documentation when adding features + +## License + +MIT License - Feel free to use and modify as needed. diff --git a/logs/menu.log b/logs/menu.log new file mode 100644 index 0000000..e69de29 diff --git a/menu.py b/menu.py new file mode 100644 index 0000000..85c5985 --- /dev/null +++ b/menu.py @@ -0,0 +1,331 @@ +# menu.py +""" +Main menu interface for the application. +Integrates logging, progress bar, and provides access to all functionality. +""" +import tkinter as tk +from tkinter import ttk, filedialog +import subprocess +import sys +import os +import json +from queue import Queue +import threading +from pathlib import Path +from typing import Optional, Dict, Any + +from utils.logging_manager import LoggingManager +from utils.progress_bar import ProgressBar +from utils.file_utils import select_file, select_directory +from config.api_setup import setup_apis +from services.llm.llm_factory import LLMFactory + +class MainMenu: + def load_config(self): + """Load configuration from JSON file""" + config_file = os.path.join(self.work_dir, "config.json") + if os.path.exists(config_file): + try: + with open(config_file, 'r') as f: + config = json.load(f) + self.work_dir = config.get('work_dir', self.work_dir) + self.work_dir_var.set(self.work_dir) + self.logging_manager.logger.info("Configuration loaded successfully") + except Exception as e: + self.logging_manager.logger.error(f"Error loading configuration: {e}") + + def save_config(self): + """Save configuration to JSON file""" + config_file = os.path.join(self.work_dir, "config.json") + try: + config = { + 'work_dir': self.work_dir + } + with open(config_file, 'w') as f: + json.dump(config, f, indent=4) + self.logging_manager.logger.info("Configuration saved successfully") + except Exception as e: + self.logging_manager.logger.error(f"Error saving configuration: {e}") + + def __init__(self, root: tk.Tk): + self.root = root + self.root.title("Process Manager") + self.root.geometry("1200x800") + + # Initialize state + self.work_dir = os.path.expanduser("~/Documents/WorkFolder") + self.queue = Queue() + self.logging_manager = LoggingManager(self.work_dir) + self.after_id = None + + # Create UI + self.setup_ui() + self.setup_logging() + + # Load configuration + try: + self.load_config() + except Exception as e: + self.logging_manager.logger.error(f"Error loading initial configuration: {e}") + + # Configure closing behavior + self.root.protocol("WM_DELETE_WINDOW", self.on_closing) + + def on_closing(self): + """Handle window closing""" + try: + # Cancel any pending after callbacks + if self.after_id: + self.root.after_cancel(self.after_id) + + # Save current configuration + self.save_config() + + # Stop all running threads + for thread in threading.enumerate(): + if thread != threading.main_thread(): + try: + thread.join(timeout=1.0) # Give threads 1 second to finish + except: + pass + + # Destroy all widgets + for widget in self.root.winfo_children(): + widget.destroy() + + # Quit the application + self.root.quit() + self.root.destroy() + sys.exit(0) # Ensure complete exit + + except Exception as e: + print(f"Error during shutdown: {e}") + sys.exit(1) # Force exit if there's an error + + def setup_ui(self): + """Setup the main user interface""" + # Main frame + self.main_frame = ttk.Frame(self.root, padding="10") + self.main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # Make the main frame expandable + self.root.grid_rowconfigure(0, weight=1) + self.root.grid_columnconfigure(0, weight=1) + + # Setup UI sections + self.setup_config_section() + self.setup_buttons_section() + self.setup_output_section() + + # Configure grid weights for main frame + self.main_frame.grid_columnconfigure(0, weight=1) + self.main_frame.grid_rowconfigure(2, weight=1) # Make output section expandable + + def setup_config_section(self): + """Setup the configuration section""" + config_frame = ttk.LabelFrame(self.main_frame, text="Configuration", padding="5") + config_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=5) + + # Work directory selection + ttk.Label(config_frame, text="Work Directory:").grid(row=0, column=0, padx=5) + + self.work_dir_var = tk.StringVar(value=self.work_dir) + work_dir_entry = ttk.Entry(config_frame, textvariable=self.work_dir_var, width=50) + work_dir_entry.grid(row=0, column=1, padx=5) + + ttk.Button( + config_frame, + text="Browse", + command=self.select_work_dir + ).grid(row=0, column=2, padx=5) + + ttk.Button( + config_frame, + text="Open in Explorer", + command=self.open_work_dir + ).grid(row=0, column=3, padx=5) + + # API Setup button + ttk.Button( + config_frame, + text="Setup APIs", + command=self.setup_apis + ).grid(row=0, column=4, padx=5) + + config_frame.grid_columnconfigure(1, weight=1) + + def setup_apis(self): + """Run API setup and log the result""" + try: + setup_apis() + self.logging_manager.logger.info("API setup completed") + except Exception as e: + self.logging_manager.logger.error(f"Error in API setup: {e}") + + def run_llm_operation(self): + """Example of running an LLM operation""" + def run(): + logger = self.logging_manager.logger + logger.info("Starting LLM operation...") + + try: + # Check if API keys are configured + from config.api_keys import APIKeyManager + if not APIKeyManager.get_openai_key(): + logger.error("OpenAI API key not configured. Please use 'Setup APIs' first.") + return + + # Create LLM service + llm_service = LLMFactory.create_service("openai") + + # Example operation + response = llm_service.generate_text("Tell me a joke") + logger.info(f"LLM Response: {response}") + + except Exception as e: + logger.error(f"Error in LLM operation: {e}") + + logger.info("LLM operation completed!") + + threading.Thread(target=run, daemon=True).start() + + def setup_buttons_section(self): + """Setup the buttons section""" + buttons_frame = ttk.LabelFrame(self.main_frame, text="Operations", padding="5") + buttons_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=5) + + # Add buttons for different operations + ttk.Button( + buttons_frame, + text="Process Files", + command=self.run_process_files + ).grid(row=0, column=0, padx=5, pady=5) + + ttk.Button( + buttons_frame, + text="LLM Operation", + command=self.run_llm_operation + ).grid(row=0, column=1, padx=5, pady=5) + + ttk.Button( + buttons_frame, + text="Clear Output", + command=self.clear_output + ).grid(row=0, column=2, padx=5, pady=5) + + buttons_frame.grid_columnconfigure(3, weight=1) # Push buttons to the left + + def setup_output_section(self): + """Setup the output section with text widget and scrollbars""" + output_frame = ttk.LabelFrame(self.main_frame, text="Output", padding="5") + output_frame.grid(row=2, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=5) + + # Create text widget + self.output_text = tk.Text(output_frame, wrap="none", height=20) + self.output_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # Scrollbars + scrollbar_y = ttk.Scrollbar( + output_frame, + orient="vertical", + command=self.output_text.yview + ) + scrollbar_y.grid(row=0, column=1, sticky=(tk.N, tk.S)) + + scrollbar_x = ttk.Scrollbar( + output_frame, + orient="horizontal", + command=self.output_text.xview + ) + scrollbar_x.grid(row=1, column=0, sticky=(tk.W, tk.E)) + + self.output_text.configure( + yscrollcommand=scrollbar_y.set, + xscrollcommand=scrollbar_x.set + ) + + # Configure grid weights + output_frame.grid_rowconfigure(0, weight=1) + output_frame.grid_columnconfigure(0, weight=1) + + def setup_logging(self): + """Setup logging to GUI""" + self.logging_manager.setup_gui_logging(self.output_text, self.queue) + self.update_output() + + def update_output(self): + """Process logging queue and schedule next update""" + self.logging_manager.process_log_queue() + self.after_id = self.root.after(100, self.update_output) + + def clear_output(self): + """Clear output text""" + self.logging_manager.clear_output() + + def clear_output(self): + """Clear output text""" + self.logging_manager.clear_output() + + def select_work_dir(self): + """Select working directory""" + dir_path = select_directory("Select Work Directory") + if dir_path: + self.work_dir = dir_path + self.work_dir_var.set(dir_path) + self.logging_manager.logger.info(f"Work directory set to: {dir_path}") + os.makedirs(os.path.join(dir_path, "logs"), exist_ok=True) + + def open_work_dir(self): + """Open working directory in file explorer""" + if os.path.exists(self.work_dir): + if sys.platform == "win32": + os.startfile(self.work_dir) + else: + subprocess.run(["xdg-open", self.work_dir]) + else: + self.logging_manager.logger.error(f"Directory does not exist: {self.work_dir}") + + def run_process_files(self): + """Example of running a file processing operation""" + def run(): + logger = self.logging_manager.logger + logger.info("Starting file processing...") + + # Example progress bar usage + total_steps = 100 + progress = ProgressBar(total_steps, "Processing files:", "Complete") + progress.set_output_callback(lambda x: self.queue.put(x)) + + progress.start() + for i in range(total_steps): + # Simulate work + import time + time.sleep(0.1) + progress.increment() + logger.info(f"Processing step {i + 1}") + progress.finish() + + logger.info("File processing completed!") + + threading.Thread(target=run, daemon=True).start() + +def main(): + try: + root = tk.Tk() + app = MainMenu(root) + root.mainloop() + except Exception as e: + print(f"Error in main: {e}") + sys.exit(1) + finally: + # Ensure all threads are stopped + for thread in threading.enumerate(): + if thread != threading.main_thread(): + try: + thread.join(timeout=1.0) + except: + pass + sys.exit(0) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f740cce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +# API and LLM dependencies +python-dotenv>=1.0.0 +openai>=1.0.0 +google-cloud-translate>=3.0.0 +langid>=1.1.6 +ollama>=0.1.0 + +# Data processing +pandas>=2.0.0 +openpyxl>=3.0.0 + +# GUI and interface +tk>=0.1.0 + +# Utilities +requests>=2.31.0 \ No newline at end of file diff --git a/scripts/script_x1.py b/scripts/script_x1.py new file mode 100644 index 0000000..96a0bc9 --- /dev/null +++ b/scripts/script_x1.py @@ -0,0 +1,39 @@ +# scripts/script_x1.py +""" +Example script showing parameter handling and logging +""" +import argparse +from utils.logger_utils import setup_logger +from utils.file_utils import select_file, safe_read_excel +from services.llm.openai_service import OpenAIService + +logger = setup_logger(__name__, "script_x1.log") + +def process_file(input_file, llm_service): + logger.info(f"Processing file: {input_file}") + + df = safe_read_excel(input_file) + if df is None: + return + + # Example processing + for index, row in df.iterrows(): + # Process each row + pass + + logger.info("Processing complete") + +def main(args): + input_file = args.input_file or select_file("Select input file") + if not input_file: + logger.error("No input file selected") + return + + llm_service = OpenAIService() + process_file(input_file, llm_service) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input-file", help="Path to input file") + args = parser.parse_args() + main(args) \ No newline at end of file diff --git a/services/excel/excel_service.py b/services/excel/excel_service.py new file mode 100644 index 0000000..7b5338a --- /dev/null +++ b/services/excel/excel_service.py @@ -0,0 +1,241 @@ +# services/excel/excel_service.py +""" +Excel file handling service with retry and formatting capabilities +""" +import pandas as pd +import time +from typing import Optional, Union, Dict, Any +from pathlib import Path +import openpyxl +from openpyxl.utils import get_column_letter +from openpyxl.styles import PatternFill, Alignment, Font +from openpyxl.worksheet.worksheet import Worksheet + +class ExcelService: + """Service for handling Excel files with advanced features""" + + def __init__(self, max_retries: int = 5, retry_delay: int = 5): + self.max_retries = max_retries + self.retry_delay = retry_delay + + def read_excel( + self, + file_path: Union[str, Path], + sheet_name: str = "Sheet1", + **kwargs + ) -> pd.DataFrame: + """ + Read Excel file with retries and cleanup + + Args: + file_path: Path to Excel file + sheet_name: Name of sheet to read + **kwargs: Additional arguments for pd.read_excel + + Returns: + DataFrame with the Excel content + """ + retries = 0 + while retries < self.max_retries: + try: + # Intentar leer el archivo con openpyxl + df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs) + + # Limpiar caracteres especiales y normalizar saltos de línea + for col in df.columns: + df[col] = df[col].apply( + lambda x: self._clean_special_chars(x) if pd.notna(x) else x + ) + + print(f"Archivo leído y limpiado exitosamente: {file_path}") + return df + + except ValueError as ve: + if "must be either numerical or a string containing a wildcard" in str(ve): + print(f"Error al leer el archivo: {ve}") + print("Intentando eliminar filtros y leer el archivo nuevamente...") + try: + # Cargar el libro de trabajo + wb = openpyxl.load_workbook(filename=file_path) + sheet = wb.active + + # Eliminar filtros si existen + if sheet.auto_filter: + sheet.auto_filter.ref = None + + # Guardar el archivo temporalmente sin filtros + temp_file = str(file_path) + "_temp.xlsx" + wb.save(temp_file) + + # Leer el archivo temporal + df = pd.read_excel(temp_file, engine="openpyxl", **kwargs) + + # Eliminar el archivo temporal + Path(temp_file).unlink() + + return df + except Exception as e: + print(f"Error al intentar eliminar filtros y leer el archivo: {e}") + else: + print(f"Error de valor: {ve}") + + except PermissionError as e: + print( + f"Error de permiso: {e}. Por favor cierre el archivo. " + f"Reintentando en {self.retry_delay} segundos..." + ) + except Exception as e: + print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...") + + retries += 1 + time.sleep(self.retry_delay) + + raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.") + + def save_excel( + self, + df: pd.DataFrame, + file_path: Union[str, Path], + sheet_name: str = "Sheet1", + format_options: Optional[Dict[str, Any]] = None, + **kwargs + ) -> None: + """ + Save DataFrame to Excel with formatting + + Args: + df: DataFrame to save + file_path: Path to save Excel file + sheet_name: Name of sheet + format_options: Dictionary with formatting options + **kwargs: Additional arguments for pd.to_excel + """ + if format_options is None: + format_options = {} + + retries = 0 + while retries < self.max_retries: + try: + with pd.ExcelWriter(file_path, engine='openpyxl') as writer: + # Save DataFrame + df.to_excel(writer, sheet_name=sheet_name, index=False) + + # Apply formatting + self._format_worksheet( + writer.sheets[sheet_name], + format_options + ) + + print(f"Archivo guardado exitosamente en: {file_path}") + return + + except PermissionError as e: + print( + f"Error de permiso: {e}. Por favor cierre el archivo. " + f"Reintentando en {self.retry_delay} segundos..." + ) + except Exception as e: + print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...") + + retries += 1 + time.sleep(self.retry_delay) + + raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.") + + def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None: + """ + Apply formatting to worksheet + + Args: + worksheet: Worksheet to format + options: Formatting options + """ + # Freeze panes if specified + freeze_row = options.get('freeze_row', 2) + freeze_col = options.get('freeze_col', 1) + if freeze_row or freeze_col: + freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}" + worksheet.freeze_panes = freeze_cell + + # Auto-adjust column widths + max_width = options.get('max_column_width', 50) + min_width = options.get('min_column_width', 8) + wrap_threshold = options.get('wrap_threshold', 50) + + for col in worksheet.columns: + max_length = 0 + column = col[0].column_letter + + for cell in col: + try: + if cell.value: + text_length = len(str(cell.value)) + if text_length > wrap_threshold: + cell.alignment = Alignment(wrap_text=True, vertical='top') + text_length = min( + wrap_threshold, + max(len(word) for word in str(cell.value).split()) + ) + max_length = max(max_length, text_length) + except: + pass + + adjusted_width = min(max_width, max(min_width, max_length + 2)) + worksheet.column_dimensions[column].width = adjusted_width + + # Apply custom styles + header_row = options.get('header_row', 1) + if header_row: + header_fill = PatternFill( + start_color=options.get('header_color', 'F2F2F2'), + end_color=options.get('header_color', 'F2F2F2'), + fill_type='solid' + ) + header_font = Font(bold=True) + + for cell in worksheet[header_row]: + cell.fill = header_fill + cell.font = header_font + + def _clean_special_chars(self, text: Any) -> Any: + """Clean special characters and normalize line breaks""" + if isinstance(text, str): + # Normalize line breaks + text = text.replace('\r\n', '\n').replace('\r', '\n') + # Replace other special characters if needed + return text + return text + +# Example usage: +""" +from services.excel.excel_service import ExcelService + +# Create service +excel_service = ExcelService() + +# Read Excel file +try: + df = excel_service.read_excel("input.xlsx") + print("Data loaded successfully") + + # Modify data... + + # Save with formatting + format_options = { + 'freeze_row': 2, + 'freeze_col': 1, + 'max_column_width': 50, + 'min_column_width': 8, + 'wrap_threshold': 50, + 'header_color': 'E6E6E6' + } + + excel_service.save_excel( + df, + "output.xlsx", + format_options=format_options + ) + +except Exception as e: + print(f"Error handling Excel file: {e}") +""" \ No newline at end of file diff --git a/services/language/base.py b/services/language/base.py new file mode 100644 index 0000000..5823797 --- /dev/null +++ b/services/language/base.py @@ -0,0 +1,25 @@ +# services/language/base.py +""" +Base class for language detection services +""" +from abc import ABC, abstractmethod +from typing import Optional, List, Dict, Tuple + +class LanguageDetectionService(ABC): + """Abstract base class for language detection services""" + + @abstractmethod + def detect_language(self, text: str) -> Tuple[str, float]: + """ + Detect language of a text + Returns: Tuple of (language_code, confidence_score) + """ + pass + + @abstractmethod + def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]: + """ + Detect language of multiple texts + Returns: List of tuples (language_code, confidence_score) + """ + pass diff --git a/services/language/langid_service.py b/services/language/langid_service.py new file mode 100644 index 0000000..ebe1d50 --- /dev/null +++ b/services/language/langid_service.py @@ -0,0 +1,52 @@ +# services/language/langid_service.py +""" +Language detection service using langid +""" +from typing import List, Tuple, Optional, Set +import langid +from .base import LanguageDetectionService + +class LangIdService(LanguageDetectionService): + def __init__(self, allowed_languages: Optional[Set[str]] = None): + """ + Initialize langid service + + Args: + allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'}) + If None, all languages supported by langid will be allowed + """ + if allowed_languages: + langid.set_languages(list(allowed_languages)) + self.allowed_languages = allowed_languages + + def detect_language(self, text: str) -> Tuple[str, float]: + """ + Detect language of a text using langid + + Args: + text: Text to analyze + + Returns: + Tuple of (language_code, confidence_score) + """ + try: + if not text or len(text.strip()) < 3: + return ("unknown", 0.0) + + lang, score = langid.classify(text.strip()) + return (lang, score) + except Exception as e: + print(f"Error in language detection: {e}") + return ("unknown", 0.0) + + def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]: + """ + Detect language of multiple texts + + Args: + texts: List of texts to analyze + + Returns: + List of tuples (language_code, confidence_score) + """ + return [self.detect_language(text) for text in texts] \ No newline at end of file diff --git a/services/language/language_factory.py b/services/language/language_factory.py new file mode 100644 index 0000000..ac65561 --- /dev/null +++ b/services/language/language_factory.py @@ -0,0 +1,33 @@ +# services/language/language_factory.py +""" +Factory class for creating language detection services +""" +from typing import Optional, Set +from .langid_service import LangIdService + +class LanguageFactory: + """Factory class for creating language detection service instances""" + + @staticmethod + def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']: + """ + Create an instance of the specified language detection service + + Args: + service_type: Type of language detection service ("langid", etc.) + allowed_languages: Set of allowed language codes + **kwargs: Additional arguments for service initialization + + Returns: + LanguageDetectionService instance or None if service_type is not recognized + """ + services = { + "langid": LangIdService, + # Add other language detection services here + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(allowed_languages=allowed_languages, **kwargs) + else: + raise ValueError(f"Unknown language detection service type: {service_type}") \ No newline at end of file diff --git a/services/language/language_utils.py b/services/language/language_utils.py new file mode 100644 index 0000000..4834210 --- /dev/null +++ b/services/language/language_utils.py @@ -0,0 +1,68 @@ +# services/language/language_utils.py +""" +Utility functions for language detection and validation +""" +from typing import Dict, Set + +class LanguageUtils: + # Common language codes + LANGUAGE_CODES = { + 'it': ('Italian', 'it-IT'), + 'en': ('English', 'en-GB'), + 'pt': ('Portuguese', 'pt-PT'), + 'es': ('Spanish', 'es-ES'), + 'ru': ('Russian', 'ru-RU'), + 'fr': ('French', 'fr-FR'), + 'de': ('German', 'de-DE'), + 'tr': ('Turkish', 'tr-TR'), + } + + @classmethod + def get_language_name(cls, code: str) -> str: + """Get full language name from code""" + return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0] + + @classmethod + def get_full_code(cls, short_code: str) -> str: + """Get full language code (e.g., 'en-GB' from 'en')""" + return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1] + + @classmethod + def get_short_code(cls, full_code: str) -> str: + """Get short language code (e.g., 'en' from 'en-GB')""" + return full_code.split('-')[0] if '-' in full_code else full_code + + @classmethod + def is_valid_language(cls, code: str) -> bool: + """Check if a language code is valid""" + short_code = cls.get_short_code(code) + return short_code in cls.LANGUAGE_CODES + + @classmethod + def get_available_languages(cls) -> Set[str]: + """Get set of available language codes""" + return set(cls.LANGUAGE_CODES.keys()) + +# Example usage: +""" +from services.language.language_factory import LanguageFactory +from services.language.language_utils import LanguageUtils + +# Create language detection service with specific languages +allowed_languages = LanguageUtils.get_available_languages() +detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages) + +# Detect language of a text +text = "Hello, how are you?" +lang, confidence = detector.detect_language(text) +print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})") +print(f"Confidence: {confidence}") + +# Detect language of multiple texts +texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"] +results = detector.detect_batch(texts) +for text, (lang, confidence) in zip(texts, results): + print(f"Text: {text}") + print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})") + print(f"Confidence: {confidence}") +""" \ No newline at end of file diff --git a/services/llm/__pycache__/base.cpython-310.pyc b/services/llm/__pycache__/base.cpython-310.pyc new file mode 100644 index 0000000..e5aadd3 Binary files /dev/null and b/services/llm/__pycache__/base.cpython-310.pyc differ diff --git a/services/llm/__pycache__/grok_service.cpython-310.pyc b/services/llm/__pycache__/grok_service.cpython-310.pyc new file mode 100644 index 0000000..e12073b Binary files /dev/null and b/services/llm/__pycache__/grok_service.cpython-310.pyc differ diff --git a/services/llm/__pycache__/llm_factory.cpython-310.pyc b/services/llm/__pycache__/llm_factory.cpython-310.pyc new file mode 100644 index 0000000..8f8a6fb Binary files /dev/null and b/services/llm/__pycache__/llm_factory.cpython-310.pyc differ diff --git a/services/llm/__pycache__/ollama_service.cpython-310.pyc b/services/llm/__pycache__/ollama_service.cpython-310.pyc new file mode 100644 index 0000000..e8106f6 Binary files /dev/null and b/services/llm/__pycache__/ollama_service.cpython-310.pyc differ diff --git a/services/llm/__pycache__/openai_service.cpython-310.pyc b/services/llm/__pycache__/openai_service.cpython-310.pyc new file mode 100644 index 0000000..e34a380 Binary files /dev/null and b/services/llm/__pycache__/openai_service.cpython-310.pyc differ diff --git a/services/llm/base.py b/services/llm/base.py new file mode 100644 index 0000000..e533497 --- /dev/null +++ b/services/llm/base.py @@ -0,0 +1,20 @@ +# services/llm/base.py +""" +Base class for LLM services +""" +from abc import ABC, abstractmethod +import json +from typing import List, Union, Dict, Any + +class LLMService(ABC): + """Abstract base class for LLM services""" + + @abstractmethod + def generate_text(self, prompt: str) -> str: + """Generate text based on a prompt""" + pass + + @abstractmethod + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + """Calculate similarity scores for pairs of texts""" + pass \ No newline at end of file diff --git a/services/llm/batch_processor.py b/services/llm/batch_processor.py new file mode 100644 index 0000000..0a0a6fa --- /dev/null +++ b/services/llm/batch_processor.py @@ -0,0 +1,228 @@ +# services/llm/batch_processor.py +""" +Batch processing service for LLM operations +""" +from typing import List, Dict, Any, Optional, Callable +import json +from dataclasses import dataclass +import time +from .base import LLMService +from utils.progress_bar import ProgressBar + +@dataclass +class BatchConfig: + """Configuration for batch processing""" + batch_size: int = 20 + max_retries: int = 3 + retry_delay: int = 3 + progress_callback: Optional[Callable[[str], None]] = None + +class BatchProcessor: + """ + Handles batch processing for LLM operations + """ + def __init__( + self, + llm_service: LLMService, + config: Optional[BatchConfig] = None + ): + self.llm_service = llm_service + self.config = config or BatchConfig() + + def process_batch( + self, + items: List[Dict[str, Any]], + system_prompt: str, + template: str, + output_processor: Optional[Callable] = None + ) -> List[Any]: + """ + Process items in batches with consistent context + + Args: + items: List of dictionaries containing data to process + system_prompt: System prompt for context + template: Template string for formatting requests + output_processor: Optional function to process LLM responses + + Returns: + List of processed results + """ + results = [] + total_items = len(items) + + # Setup progress tracking + progress = ProgressBar( + total_items, + "Processing batches:", + "Complete" + ) + if self.config.progress_callback: + progress.set_output_callback(self.config.progress_callback) + progress.start() + + # Process in batches + for start_idx in range(0, total_items, self.config.batch_size): + end_idx = min(start_idx + self.config.batch_size, total_items) + batch_items = items[start_idx:end_idx] + + # Prepare batch request + batch_data = { + "items": batch_items, + "template": template + } + request_payload = json.dumps(batch_data) + + # Process batch with retries + for attempt in range(self.config.max_retries): + try: + response = self.llm_service.generate_text( + system_prompt=system_prompt, + user_prompt=request_payload + ) + + # Parse and process response + batch_results = self._process_response( + response, + output_processor + ) + + if len(batch_results) != len(batch_items): + raise ValueError( + "Response count doesn't match input count" + ) + + results.extend(batch_results) + break + + except Exception as e: + if attempt < self.config.max_retries - 1: + if self.config.progress_callback: + self.config.progress_callback( + f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..." + ) + time.sleep(self.config.retry_delay) + else: + if self.config.progress_callback: + self.config.progress_callback( + f"Error in batch {start_idx}-{end_idx}: {e}" + ) + # On final retry failure, add None results + results.extend([None] * len(batch_items)) + + # Update progress + progress.update(end_idx) + + progress.finish() + return results + + def _process_response( + self, + response: str, + output_processor: Optional[Callable] = None + ) -> List[Any]: + """Process LLM response""" + try: + # Parse JSON response + parsed = json.loads(response) + + # Apply custom processing if provided + if output_processor: + return [output_processor(item) for item in parsed] + return parsed + + except json.JSONDecodeError: + raise ValueError("Failed to parse LLM response as JSON") + +# Example specialized batch processor for translations +class TranslationBatchProcessor(BatchProcessor): + """Specialized batch processor for translations""" + + def translate_batch( + self, + texts: List[str], + source_lang: str, + target_lang: str + ) -> List[str]: + """ + Translate a batch of texts + + Args: + texts: List of texts to translate + source_lang: Source language code + target_lang: Target language code + + Returns: + List of translated texts + """ + # Prepare items + items = [{"text": text} for text in texts] + + # Setup prompts + system_prompt = ( + "You are a translator. Translate the provided texts " + "maintaining special fields like <> and <#>." + ) + + template = ( + "Translate the following texts from {source_lang} to {target_lang}. " + "Return translations as a JSON array of strings:" + "\n\n{text}" + ) + + # Process batch + results = self.process_batch( + items=items, + system_prompt=system_prompt, + template=template.format( + source_lang=source_lang, + target_lang=target_lang + ) + ) + + return results + +# Example usage: +""" +from services.llm.llm_factory import LLMFactory +from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor + +# Create LLM service +llm_service = LLMFactory.create_service("openai") + +# Setup batch processor with progress callback +def progress_callback(message: str): + print(message) + +config = BatchConfig( + batch_size=20, + progress_callback=progress_callback +) + +# General batch processor +processor = BatchProcessor(llm_service, config) + +# Example batch process for custom task +items = [ + {"text": "Hello", "context": "greeting"}, + {"text": "Goodbye", "context": "farewell"} +] + +system_prompt = "You are a helpful assistant." +template = "Process these items considering their context: {items}" + +results = processor.process_batch( + items=items, + system_prompt=system_prompt, + template=template +) + +# Example translation batch +translator = TranslationBatchProcessor(llm_service, config) +texts = ["Hello world", "How are you?"] +translations = translator.translate_batch( + texts=texts, + source_lang="en", + target_lang="es" +) +""" \ No newline at end of file diff --git a/services/llm/grok_service.py b/services/llm/grok_service.py new file mode 100644 index 0000000..b7e5434 --- /dev/null +++ b/services/llm/grok_service.py @@ -0,0 +1,63 @@ +# services/llm/grok_service.py +""" +Grok service implementation +""" +from typing import Dict, List, Optional +import json +from .base import LLMService +from config.api_keys import APIKeyManager + +class GrokService(LLMService): + def __init__(self, model: str = "grok-1", temperature: float = 0.3): + api_key = APIKeyManager.get_grok_key() + if not api_key: + raise ValueError("Grok API key not found. Please set up your API keys.") + + self.api_key = api_key + self.model = model + self.temperature = temperature + + def generate_text(self, prompt: str) -> str: + """ + Generate text using the Grok API + TODO: Update this method when Grok API is available + """ + try: + # Placeholder for Grok API implementation + # Update this when the API is released + raise NotImplementedError("Grok API is not implemented yet") + + except Exception as e: + print(f"Error in Grok API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + """ + Calculate similarity scores using the Grok API + TODO: Update this method when Grok API is available + """ + try: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts " + "in json format on a scale from 0 to 1. Return the similarity scores for every " + "row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + + # Placeholder for Grok API implementation + # Update this when the API is released + raise NotImplementedError("Grok API is not implemented yet") + + except Exception as e: + print(f"Error in Grok similarity calculation: {e}") + return None + +# Update config/api_keys.py to include Grok +@classmethod +def get_grok_key(cls) -> Optional[str]: + """Get Grok API key from environment or stored configuration""" + return ( + os.getenv('GROK_API_KEY') or + cls._get_stored_key('grok') + ) \ No newline at end of file diff --git a/services/llm/llm_factory.py b/services/llm/llm_factory.py new file mode 100644 index 0000000..9405c61 --- /dev/null +++ b/services/llm/llm_factory.py @@ -0,0 +1,33 @@ +# services/llm/llm_factory.py +""" +Factory class for creating LLM services +""" +from typing import Optional +from .openai_service import OpenAIService +from .ollama_service import OllamaService +from .grok_service import GrokService + +class LLMFactory: + """Factory class for creating LLM service instances""" + + @staticmethod + def create_service(service_type: str, **kwargs) -> Optional['LLMService']: + """ + Create an instance of the specified LLM service + + Args: + service_type: Type of LLM service ("openai", "ollama", "grok") + **kwargs: Additional arguments for service initialization + """ + services = { + "openai": OpenAIService, + "ollama": OllamaService, + "grok": GrokService + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(**kwargs) + else: + print(f"Unknown service type: {service_type}") + return None diff --git a/services/llm/ollama_service.py b/services/llm/ollama_service.py new file mode 100644 index 0000000..65a6885 --- /dev/null +++ b/services/llm/ollama_service.py @@ -0,0 +1,53 @@ +# services/llm/ollama_service.py +""" +Ollama service implementation +""" +import ollama +import json +from typing import Dict, List +from .base import LLMService + +class OllamaService(LLMService): + def __init__(self, model: str = "llama3.1"): + self.model = model + + def generate_text(self, prompt: str) -> str: + try: + response = ollama.generate( + model=self.model, + prompt=prompt + ) + return response["response"] + except Exception as e: + print(f"Error in Ollama API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. " + "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + prompt = f"{system_prompt}\n\n{request_payload}" + + try: + response = ollama.generate( + model=self.model, + prompt=prompt + ) + + try: + scores = json.loads(response["response"].strip()) + if isinstance(scores, dict) and "similarity_scores" in scores: + return scores["similarity_scores"] + elif isinstance(scores, list): + return scores + else: + raise ValueError("Unexpected response format") + except json.JSONDecodeError: + raise ValueError("Could not decode response as JSON") + + except Exception as e: + print(f"Error in Ollama similarity calculation: {e}") + return None \ No newline at end of file diff --git a/services/llm/openai_service.py b/services/llm/openai_service.py new file mode 100644 index 0000000..7363bbc --- /dev/null +++ b/services/llm/openai_service.py @@ -0,0 +1,69 @@ +# services/llm/openai_service.py +""" +OpenAI service implementation +""" +from openai import OpenAI +from typing import Dict, List +import json +from .base import LLMService +from config.api_keys import APIKeyManager + +class OpenAIService(LLMService): + def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3): + api_key = APIKeyManager.get_openai_key() + if not api_key: + raise ValueError("OpenAI API key not found. Please set up your API keys.") + + self.client = OpenAI(api_key=api_key) + self.model = model + self.temperature = temperature + + def generate_text(self, prompt: str) -> str: + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": prompt}], + temperature=self.temperature, + max_tokens=1500 + ) + return response.choices[0].message.content + except Exception as e: + print(f"Error in OpenAI API call: {e}") + return None + + def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]: + system_prompt = ( + "Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. " + "Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting." + ) + + request_payload = json.dumps(texts_pairs) + + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": request_payload} + ], + temperature=self.temperature, + max_tokens=1500 + ) + + response_content = response.choices[0].message.content + cleaned_response = response_content.strip().strip("'```json").strip("```") + + try: + scores = json.loads(cleaned_response) + if isinstance(scores, dict) and "similarity_scores" in scores: + return scores["similarity_scores"] + elif isinstance(scores, list): + return scores + else: + raise ValueError("Unexpected response format") + except json.JSONDecodeError: + raise ValueError("Could not decode response as JSON") + + except Exception as e: + print(f"Error in OpenAI similarity calculation: {e}") + return None \ No newline at end of file diff --git a/services/translation/base.py b/services/translation/base.py new file mode 100644 index 0000000..e3946ff --- /dev/null +++ b/services/translation/base.py @@ -0,0 +1,19 @@ +# services/translation/base.py +""" +Base class for translation services +""" +from abc import ABC, abstractmethod +from typing import Optional, List, Dict + +class TranslationService(ABC): + """Abstract base class for translation services""" + + @abstractmethod + def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str: + """Translate a single text""" + pass + + @abstractmethod + def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]: + """Translate a batch of texts""" + pass diff --git a/services/translation/google_translate.py b/services/translation/google_translate.py new file mode 100644 index 0000000..eb5f49d --- /dev/null +++ b/services/translation/google_translate.py @@ -0,0 +1,77 @@ +# services/translation/google_translate.py +""" +Google Translation service implementation +""" +from typing import Optional, List +import html +from google.cloud import translate_v2 as translate +from google.oauth2 import service_account +from config.api_keys import APIKeyManager +from .base import TranslationService + +class GoogleTranslateService(TranslationService): + def __init__(self, credentials_file: Optional[str] = None): + """ + Initialize Google Translate service + + Args: + credentials_file: Path to Google service account credentials JSON file. + If None, will use API key from APIKeyManager. + """ + if credentials_file: + # Use service account credentials + try: + credentials = service_account.Credentials.from_service_account_file(credentials_file) + self.client = translate.Client(credentials=credentials) + except Exception as e: + raise ValueError(f"Error initializing Google Translate with credentials: {e}") + else: + # Use API key + api_key = APIKeyManager.get_google_key() + if not api_key: + raise ValueError("Google API key not found. Please set up your API keys.") + self.client = translate.Client() + + def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str: + """ + Translate a single text. + + Args: + text: Text to translate + target_language: Target language code (e.g., 'es' for Spanish) + source_language: Source language code. If None, will be auto-detected. + + Returns: + Translated text + """ + try: + result = self.client.translate( + text, + target_language=target_language, + source_language=source_language + ) + return html.unescape(result["translatedText"]) + except Exception as e: + raise ValueError(f"Error in Google Translate: {e}") + + def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]: + """ + Translate multiple texts in batch. + + Args: + texts: List of texts to translate + target_language: Target language code (e.g., 'es' for Spanish) + source_language: Source language code. If None, will be auto-detected. + + Returns: + List of translated texts + """ + try: + results = self.client.translate( + texts, + target_language=target_language, + source_language=source_language + ) + return [html.unescape(result["translatedText"]) for result in results] + except Exception as e: + raise ValueError(f"Error in Google Translate batch: {e}") \ No newline at end of file diff --git a/services/translation/translation_factory.py b/services/translation/translation_factory.py new file mode 100644 index 0000000..3df8563 --- /dev/null +++ b/services/translation/translation_factory.py @@ -0,0 +1,32 @@ +# services/translation/translation_factory.py +""" +Factory class for creating translation services +""" +from typing import Optional +from .google_translate import GoogleTranslateService + +class TranslationFactory: + """Factory class for creating translation service instances""" + + @staticmethod + def create_service(service_type: str, **kwargs) -> Optional['TranslationService']: + """ + Create an instance of the specified translation service + + Args: + service_type: Type of translation service ("google", etc.) + **kwargs: Additional arguments for service initialization + + Returns: + TranslationService instance or None if service_type is not recognized + """ + services = { + "google": GoogleTranslateService, + # Add other translation services here + } + + service_class = services.get(service_type.lower()) + if service_class: + return service_class(**kwargs) + else: + raise ValueError(f"Unknown translation service type: {service_type}") \ No newline at end of file diff --git a/utils/__pycache__/file_utils.cpython-310.pyc b/utils/__pycache__/file_utils.cpython-310.pyc new file mode 100644 index 0000000..9547000 Binary files /dev/null and b/utils/__pycache__/file_utils.cpython-310.pyc differ diff --git a/utils/__pycache__/logger_utils.cpython-310.pyc b/utils/__pycache__/logger_utils.cpython-310.pyc new file mode 100644 index 0000000..919480d Binary files /dev/null and b/utils/__pycache__/logger_utils.cpython-310.pyc differ diff --git a/utils/__pycache__/logging_manager.cpython-310.pyc b/utils/__pycache__/logging_manager.cpython-310.pyc new file mode 100644 index 0000000..e160826 Binary files /dev/null and b/utils/__pycache__/logging_manager.cpython-310.pyc differ diff --git a/utils/__pycache__/output_redirector.cpython-310.pyc b/utils/__pycache__/output_redirector.cpython-310.pyc new file mode 100644 index 0000000..aa01a22 Binary files /dev/null and b/utils/__pycache__/output_redirector.cpython-310.pyc differ diff --git a/utils/__pycache__/progress_bar.cpython-310.pyc b/utils/__pycache__/progress_bar.cpython-310.pyc new file mode 100644 index 0000000..c4f1012 Binary files /dev/null and b/utils/__pycache__/progress_bar.cpython-310.pyc differ diff --git a/utils/file_utils.py b/utils/file_utils.py new file mode 100644 index 0000000..84e2309 --- /dev/null +++ b/utils/file_utils.py @@ -0,0 +1,39 @@ +# utils/file_utils.py +""" +File handling utilities +""" +import os +import tkinter as tk +from tkinter import filedialog +import pandas as pd + +def select_file(title="Select file", filetypes=None): + if filetypes is None: + filetypes = [ + ("Excel files", "*.xlsx;*.xls"), + ("All files", "*.*") + ] + + root = tk.Tk() + root.withdraw() + + file_path = filedialog.askopenfilename( + title=title, + filetypes=filetypes + ) + + return file_path if file_path else None + +def select_directory(title="Select directory"): + root = tk.Tk() + root.withdraw() + + dir_path = filedialog.askdirectory(title=title) + return dir_path if dir_path else None + +def safe_read_excel(file_path, **kwargs): + try: + return pd.read_excel(file_path, **kwargs) + except Exception as e: + print(f"Error reading Excel file: {e}") + return None \ No newline at end of file diff --git a/utils/logger_utils.py b/utils/logger_utils.py new file mode 100644 index 0000000..df2f815 --- /dev/null +++ b/utils/logger_utils.py @@ -0,0 +1,31 @@ +# utils/logger_utils.py +""" +Logging configuration and utilities +""" +import logging +import os +from config.settings import LOG_DIR + +def setup_logger(name, log_file=None): + logger = logging.getLogger(name) + + if not logger.handlers: + logger.setLevel(logging.INFO) + + if log_file: + os.makedirs(LOG_DIR, exist_ok=True) + file_handler = logging.FileHandler( + os.path.join(LOG_DIR, log_file) + ) + file_handler.setFormatter( + logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ) + logger.addHandler(file_handler) + + console_handler = logging.StreamHandler() + console_handler.setFormatter( + logging.Formatter('%(levelname)s: %(message)s') + ) + logger.addHandler(console_handler) + + return logger diff --git a/utils/logging_manager.py b/utils/logging_manager.py new file mode 100644 index 0000000..1901fc2 --- /dev/null +++ b/utils/logging_manager.py @@ -0,0 +1,69 @@ +# utils/logging_manager.py +""" +Logging manager with GUI integration +""" +import logging +import os +from typing import Optional +from datetime import datetime +from pathlib import Path +import tkinter as tk +from tkinter import ttk +from queue import Queue, Empty +import threading +from .output_redirector import OutputRedirector + +class LoggingManager: + def __init__(self, work_dir: str): + self.work_dir = work_dir + self.log_dir = os.path.join(work_dir, "logs") + os.makedirs(self.log_dir, exist_ok=True) + + self.logger = self._setup_logger() + self.queue: Optional[Queue] = None + self.text_widget: Optional[tk.Text] = None + + def _setup_logger(self) -> logging.Logger: + logger = logging.getLogger("app_logger") + logger.setLevel(logging.INFO) + + # File handler + log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log") + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(logging.INFO) + + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + file_handler.setFormatter(formatter) + + logger.addHandler(file_handler) + return logger + + def setup_gui_logging(self, text_widget: tk.Text, queue: Queue): + """Setup logging to GUI text widget""" + self.text_widget = text_widget + self.queue = queue + + # Add handler for GUI logging + gui_handler = logging.StreamHandler(OutputRedirector(queue)) + gui_handler.setLevel(logging.INFO) + formatter = logging.Formatter("%(message)s") + gui_handler.setFormatter(formatter) + + self.logger.addHandler(gui_handler) + + def process_log_queue(self): + """Process pending log messages""" + if self.queue and self.text_widget: + while True: + try: + message = self.queue.get_nowait() + self.text_widget.insert(tk.END, message) + self.text_widget.see(tk.END) + self.text_widget.update_idletasks() + except Empty: + break + + def clear_output(self): + """Clear the text widget""" + if self.text_widget: + self.text_widget.delete("1.0", tk.END) \ No newline at end of file diff --git a/utils/output_redirector.py b/utils/output_redirector.py new file mode 100644 index 0000000..953c067 --- /dev/null +++ b/utils/output_redirector.py @@ -0,0 +1,17 @@ +# utils/output_redirector.py +""" +Output redirector for capturing stdout/stderr +""" +import sys +from queue import Queue +from typing import Optional + +class OutputRedirector: + def __init__(self, queue: Queue): + self.queue = queue + + def write(self, string: str): + self.queue.put(string) + + def flush(self): + pass \ No newline at end of file diff --git a/utils/progress_bar.py b/utils/progress_bar.py new file mode 100644 index 0000000..0180577 --- /dev/null +++ b/utils/progress_bar.py @@ -0,0 +1,51 @@ +# utils/progress_bar.py +""" +Progress bar implementation +""" +import tkinter as tk +from tkinter import ttk +from typing import Optional, Callable +import sys +from queue import Queue +import threading + +class ProgressBar: + def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30): + self.total = total + self.prefix = prefix + self.suffix = suffix + self.max_points = max_points + self.current = 0 + self.last_points = 0 + self.output_callback: Optional[Callable] = None + + def set_output_callback(self, callback: Callable[[str], None]): + """Set callback function for output""" + self.output_callback = callback + + def update(self, current: int): + self.current = current + points = min(int((current / self.total) * self.max_points), self.max_points) + + if points > self.last_points: + new_points = points - self.last_points + self._write_output("." * new_points) + self.last_points = points + + def increment(self): + self.update(self.current + 1) + + def finish(self): + remaining_points = self.max_points - self.last_points + if remaining_points > 0: + self._write_output("." * remaining_points) + self._write_output(f"] {self.suffix}\n") + + def start(self): + self._write_output(f"\r{self.prefix} [") + + def _write_output(self, text: str): + if self.output_callback: + self.output_callback(text) + else: + print(text, end="", flush=True) \ No newline at end of file