Agregado de librerias
This commit is contained in:
parent
caa983c8da
commit
6fca251249
|
@ -0,0 +1,27 @@
|
||||||
|
# Python cache files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
|
||||||
|
# Environment directories
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
.env/
|
||||||
|
|
||||||
|
# IDE configurations
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Logs and data files
|
||||||
|
data/log.txt
|
||||||
|
data/data.json
|
||||||
|
|
||||||
|
# Allow script groups and their configurations
|
||||||
|
!backend/script_groups/
|
||||||
|
!backend/script_groups/*/
|
||||||
|
!backend/script_groups/*/*.py
|
||||||
|
!backend/script_groups/*/schema.json
|
||||||
|
!backend/script_groups/*/esquema.json
|
||||||
|
!backend/script_groups/*/description.json
|
||||||
|
|
||||||
|
# But ignore working directory configurations
|
||||||
|
backend/script_groups/*/work_dir.json
|
|
@ -0,0 +1,3 @@
|
||||||
|
# Crear .gitkeep en cada directorio de script grupo
|
||||||
|
mkdir -p backend/script_groups/example_group
|
||||||
|
touch backend/script_groups/example_group/.gitkeep
|
|
@ -0,0 +1,241 @@
|
||||||
|
# services/excel/excel_service.py
|
||||||
|
"""
|
||||||
|
Excel file handling service with retry and formatting capabilities
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
from typing import Optional, Union, Dict, Any
|
||||||
|
from pathlib import Path
|
||||||
|
import openpyxl
|
||||||
|
from openpyxl.utils import get_column_letter
|
||||||
|
from openpyxl.styles import PatternFill, Alignment, Font
|
||||||
|
from openpyxl.worksheet.worksheet import Worksheet
|
||||||
|
|
||||||
|
class ExcelService:
|
||||||
|
"""Service for handling Excel files with advanced features"""
|
||||||
|
|
||||||
|
def __init__(self, max_retries: int = 5, retry_delay: int = 5):
|
||||||
|
self.max_retries = max_retries
|
||||||
|
self.retry_delay = retry_delay
|
||||||
|
|
||||||
|
def read_excel(
|
||||||
|
self,
|
||||||
|
file_path: Union[str, Path],
|
||||||
|
sheet_name: str = "Sheet1",
|
||||||
|
**kwargs
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Read Excel file with retries and cleanup
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to Excel file
|
||||||
|
sheet_name: Name of sheet to read
|
||||||
|
**kwargs: Additional arguments for pd.read_excel
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataFrame with the Excel content
|
||||||
|
"""
|
||||||
|
retries = 0
|
||||||
|
while retries < self.max_retries:
|
||||||
|
try:
|
||||||
|
# Intentar leer el archivo con openpyxl
|
||||||
|
df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs)
|
||||||
|
|
||||||
|
# Limpiar caracteres especiales y normalizar saltos de línea
|
||||||
|
for col in df.columns:
|
||||||
|
df[col] = df[col].apply(
|
||||||
|
lambda x: self._clean_special_chars(x) if pd.notna(x) else x
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Archivo leído y limpiado exitosamente: {file_path}")
|
||||||
|
return df
|
||||||
|
|
||||||
|
except ValueError as ve:
|
||||||
|
if "must be either numerical or a string containing a wildcard" in str(ve):
|
||||||
|
print(f"Error al leer el archivo: {ve}")
|
||||||
|
print("Intentando eliminar filtros y leer el archivo nuevamente...")
|
||||||
|
try:
|
||||||
|
# Cargar el libro de trabajo
|
||||||
|
wb = openpyxl.load_workbook(filename=file_path)
|
||||||
|
sheet = wb.active
|
||||||
|
|
||||||
|
# Eliminar filtros si existen
|
||||||
|
if sheet.auto_filter:
|
||||||
|
sheet.auto_filter.ref = None
|
||||||
|
|
||||||
|
# Guardar el archivo temporalmente sin filtros
|
||||||
|
temp_file = str(file_path) + "_temp.xlsx"
|
||||||
|
wb.save(temp_file)
|
||||||
|
|
||||||
|
# Leer el archivo temporal
|
||||||
|
df = pd.read_excel(temp_file, engine="openpyxl", **kwargs)
|
||||||
|
|
||||||
|
# Eliminar el archivo temporal
|
||||||
|
Path(temp_file).unlink()
|
||||||
|
|
||||||
|
return df
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error al intentar eliminar filtros y leer el archivo: {e}")
|
||||||
|
else:
|
||||||
|
print(f"Error de valor: {ve}")
|
||||||
|
|
||||||
|
except PermissionError as e:
|
||||||
|
print(
|
||||||
|
f"Error de permiso: {e}. Por favor cierre el archivo. "
|
||||||
|
f"Reintentando en {self.retry_delay} segundos..."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
|
||||||
|
|
||||||
|
retries += 1
|
||||||
|
time.sleep(self.retry_delay)
|
||||||
|
|
||||||
|
raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.")
|
||||||
|
|
||||||
|
def save_excel(
|
||||||
|
self,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
file_path: Union[str, Path],
|
||||||
|
sheet_name: str = "Sheet1",
|
||||||
|
format_options: Optional[Dict[str, Any]] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Save DataFrame to Excel with formatting
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df: DataFrame to save
|
||||||
|
file_path: Path to save Excel file
|
||||||
|
sheet_name: Name of sheet
|
||||||
|
format_options: Dictionary with formatting options
|
||||||
|
**kwargs: Additional arguments for pd.to_excel
|
||||||
|
"""
|
||||||
|
if format_options is None:
|
||||||
|
format_options = {}
|
||||||
|
|
||||||
|
retries = 0
|
||||||
|
while retries < self.max_retries:
|
||||||
|
try:
|
||||||
|
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
|
||||||
|
# Save DataFrame
|
||||||
|
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||||
|
|
||||||
|
# Apply formatting
|
||||||
|
self._format_worksheet(
|
||||||
|
writer.sheets[sheet_name],
|
||||||
|
format_options
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Archivo guardado exitosamente en: {file_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
except PermissionError as e:
|
||||||
|
print(
|
||||||
|
f"Error de permiso: {e}. Por favor cierre el archivo. "
|
||||||
|
f"Reintentando en {self.retry_delay} segundos..."
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
|
||||||
|
|
||||||
|
retries += 1
|
||||||
|
time.sleep(self.retry_delay)
|
||||||
|
|
||||||
|
raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.")
|
||||||
|
|
||||||
|
def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Apply formatting to worksheet
|
||||||
|
|
||||||
|
Args:
|
||||||
|
worksheet: Worksheet to format
|
||||||
|
options: Formatting options
|
||||||
|
"""
|
||||||
|
# Freeze panes if specified
|
||||||
|
freeze_row = options.get('freeze_row', 2)
|
||||||
|
freeze_col = options.get('freeze_col', 1)
|
||||||
|
if freeze_row or freeze_col:
|
||||||
|
freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}"
|
||||||
|
worksheet.freeze_panes = freeze_cell
|
||||||
|
|
||||||
|
# Auto-adjust column widths
|
||||||
|
max_width = options.get('max_column_width', 50)
|
||||||
|
min_width = options.get('min_column_width', 8)
|
||||||
|
wrap_threshold = options.get('wrap_threshold', 50)
|
||||||
|
|
||||||
|
for col in worksheet.columns:
|
||||||
|
max_length = 0
|
||||||
|
column = col[0].column_letter
|
||||||
|
|
||||||
|
for cell in col:
|
||||||
|
try:
|
||||||
|
if cell.value:
|
||||||
|
text_length = len(str(cell.value))
|
||||||
|
if text_length > wrap_threshold:
|
||||||
|
cell.alignment = Alignment(wrap_text=True, vertical='top')
|
||||||
|
text_length = min(
|
||||||
|
wrap_threshold,
|
||||||
|
max(len(word) for word in str(cell.value).split())
|
||||||
|
)
|
||||||
|
max_length = max(max_length, text_length)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
adjusted_width = min(max_width, max(min_width, max_length + 2))
|
||||||
|
worksheet.column_dimensions[column].width = adjusted_width
|
||||||
|
|
||||||
|
# Apply custom styles
|
||||||
|
header_row = options.get('header_row', 1)
|
||||||
|
if header_row:
|
||||||
|
header_fill = PatternFill(
|
||||||
|
start_color=options.get('header_color', 'F2F2F2'),
|
||||||
|
end_color=options.get('header_color', 'F2F2F2'),
|
||||||
|
fill_type='solid'
|
||||||
|
)
|
||||||
|
header_font = Font(bold=True)
|
||||||
|
|
||||||
|
for cell in worksheet[header_row]:
|
||||||
|
cell.fill = header_fill
|
||||||
|
cell.font = header_font
|
||||||
|
|
||||||
|
def _clean_special_chars(self, text: Any) -> Any:
|
||||||
|
"""Clean special characters and normalize line breaks"""
|
||||||
|
if isinstance(text, str):
|
||||||
|
# Normalize line breaks
|
||||||
|
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
|
# Replace other special characters if needed
|
||||||
|
return text
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
"""
|
||||||
|
from services.excel.excel_service import ExcelService
|
||||||
|
|
||||||
|
# Create service
|
||||||
|
excel_service = ExcelService()
|
||||||
|
|
||||||
|
# Read Excel file
|
||||||
|
try:
|
||||||
|
df = excel_service.read_excel("input.xlsx")
|
||||||
|
print("Data loaded successfully")
|
||||||
|
|
||||||
|
# Modify data...
|
||||||
|
|
||||||
|
# Save with formatting
|
||||||
|
format_options = {
|
||||||
|
'freeze_row': 2,
|
||||||
|
'freeze_col': 1,
|
||||||
|
'max_column_width': 50,
|
||||||
|
'min_column_width': 8,
|
||||||
|
'wrap_threshold': 50,
|
||||||
|
'header_color': 'E6E6E6'
|
||||||
|
}
|
||||||
|
|
||||||
|
excel_service.save_excel(
|
||||||
|
df,
|
||||||
|
"output.xlsx",
|
||||||
|
format_options=format_options
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error handling Excel file: {e}")
|
||||||
|
"""
|
|
@ -0,0 +1,25 @@
|
||||||
|
# services/language/base.py
|
||||||
|
"""
|
||||||
|
Base class for language detection services
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional, List, Dict, Tuple
|
||||||
|
|
||||||
|
class LanguageDetectionService(ABC):
|
||||||
|
"""Abstract base class for language detection services"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def detect_language(self, text: str) -> Tuple[str, float]:
|
||||||
|
"""
|
||||||
|
Detect language of a text
|
||||||
|
Returns: Tuple of (language_code, confidence_score)
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
|
||||||
|
"""
|
||||||
|
Detect language of multiple texts
|
||||||
|
Returns: List of tuples (language_code, confidence_score)
|
||||||
|
"""
|
||||||
|
pass
|
|
@ -0,0 +1,52 @@
|
||||||
|
# services/language/langid_service.py
|
||||||
|
"""
|
||||||
|
Language detection service using langid
|
||||||
|
"""
|
||||||
|
from typing import List, Tuple, Optional, Set
|
||||||
|
import langid
|
||||||
|
from .base import LanguageDetectionService
|
||||||
|
|
||||||
|
class LangIdService(LanguageDetectionService):
|
||||||
|
def __init__(self, allowed_languages: Optional[Set[str]] = None):
|
||||||
|
"""
|
||||||
|
Initialize langid service
|
||||||
|
|
||||||
|
Args:
|
||||||
|
allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'})
|
||||||
|
If None, all languages supported by langid will be allowed
|
||||||
|
"""
|
||||||
|
if allowed_languages:
|
||||||
|
langid.set_languages(list(allowed_languages))
|
||||||
|
self.allowed_languages = allowed_languages
|
||||||
|
|
||||||
|
def detect_language(self, text: str) -> Tuple[str, float]:
|
||||||
|
"""
|
||||||
|
Detect language of a text using langid
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (language_code, confidence_score)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not text or len(text.strip()) < 3:
|
||||||
|
return ("unknown", 0.0)
|
||||||
|
|
||||||
|
lang, score = langid.classify(text.strip())
|
||||||
|
return (lang, score)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in language detection: {e}")
|
||||||
|
return ("unknown", 0.0)
|
||||||
|
|
||||||
|
def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
|
||||||
|
"""
|
||||||
|
Detect language of multiple texts
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to analyze
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples (language_code, confidence_score)
|
||||||
|
"""
|
||||||
|
return [self.detect_language(text) for text in texts]
|
|
@ -0,0 +1,33 @@
|
||||||
|
# services/language/language_factory.py
|
||||||
|
"""
|
||||||
|
Factory class for creating language detection services
|
||||||
|
"""
|
||||||
|
from typing import Optional, Set
|
||||||
|
from .langid_service import LangIdService
|
||||||
|
|
||||||
|
class LanguageFactory:
|
||||||
|
"""Factory class for creating language detection service instances"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']:
|
||||||
|
"""
|
||||||
|
Create an instance of the specified language detection service
|
||||||
|
|
||||||
|
Args:
|
||||||
|
service_type: Type of language detection service ("langid", etc.)
|
||||||
|
allowed_languages: Set of allowed language codes
|
||||||
|
**kwargs: Additional arguments for service initialization
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
LanguageDetectionService instance or None if service_type is not recognized
|
||||||
|
"""
|
||||||
|
services = {
|
||||||
|
"langid": LangIdService,
|
||||||
|
# Add other language detection services here
|
||||||
|
}
|
||||||
|
|
||||||
|
service_class = services.get(service_type.lower())
|
||||||
|
if service_class:
|
||||||
|
return service_class(allowed_languages=allowed_languages, **kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown language detection service type: {service_type}")
|
|
@ -0,0 +1,68 @@
|
||||||
|
# services/language/language_utils.py
|
||||||
|
"""
|
||||||
|
Utility functions for language detection and validation
|
||||||
|
"""
|
||||||
|
from typing import Dict, Set
|
||||||
|
|
||||||
|
class LanguageUtils:
|
||||||
|
# Common language codes
|
||||||
|
LANGUAGE_CODES = {
|
||||||
|
'it': ('Italian', 'it-IT'),
|
||||||
|
'en': ('English', 'en-GB'),
|
||||||
|
'pt': ('Portuguese', 'pt-PT'),
|
||||||
|
'es': ('Spanish', 'es-ES'),
|
||||||
|
'ru': ('Russian', 'ru-RU'),
|
||||||
|
'fr': ('French', 'fr-FR'),
|
||||||
|
'de': ('German', 'de-DE'),
|
||||||
|
'tr': ('Turkish', 'tr-TR'),
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_language_name(cls, code: str) -> str:
|
||||||
|
"""Get full language name from code"""
|
||||||
|
return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_full_code(cls, short_code: str) -> str:
|
||||||
|
"""Get full language code (e.g., 'en-GB' from 'en')"""
|
||||||
|
return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_short_code(cls, full_code: str) -> str:
|
||||||
|
"""Get short language code (e.g., 'en' from 'en-GB')"""
|
||||||
|
return full_code.split('-')[0] if '-' in full_code else full_code
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_valid_language(cls, code: str) -> bool:
|
||||||
|
"""Check if a language code is valid"""
|
||||||
|
short_code = cls.get_short_code(code)
|
||||||
|
return short_code in cls.LANGUAGE_CODES
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_available_languages(cls) -> Set[str]:
|
||||||
|
"""Get set of available language codes"""
|
||||||
|
return set(cls.LANGUAGE_CODES.keys())
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
"""
|
||||||
|
from services.language.language_factory import LanguageFactory
|
||||||
|
from services.language.language_utils import LanguageUtils
|
||||||
|
|
||||||
|
# Create language detection service with specific languages
|
||||||
|
allowed_languages = LanguageUtils.get_available_languages()
|
||||||
|
detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages)
|
||||||
|
|
||||||
|
# Detect language of a text
|
||||||
|
text = "Hello, how are you?"
|
||||||
|
lang, confidence = detector.detect_language(text)
|
||||||
|
print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||||||
|
print(f"Confidence: {confidence}")
|
||||||
|
|
||||||
|
# Detect language of multiple texts
|
||||||
|
texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"]
|
||||||
|
results = detector.detect_batch(texts)
|
||||||
|
for text, (lang, confidence) in zip(texts, results):
|
||||||
|
print(f"Text: {text}")
|
||||||
|
print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||||||
|
print(f"Confidence: {confidence}")
|
||||||
|
"""
|
|
@ -0,0 +1,20 @@
|
||||||
|
# services/llm/base.py
|
||||||
|
"""
|
||||||
|
Base class for LLM services
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
import json
|
||||||
|
from typing import List, Union, Dict, Any
|
||||||
|
|
||||||
|
class LLMService(ABC):
|
||||||
|
"""Abstract base class for LLM services"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def generate_text(self, prompt: str) -> str:
|
||||||
|
"""Generate text based on a prompt"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||||
|
"""Calculate similarity scores for pairs of texts"""
|
||||||
|
pass
|
|
@ -0,0 +1,228 @@
|
||||||
|
# services/llm/batch_processor.py
|
||||||
|
"""
|
||||||
|
Batch processing service for LLM operations
|
||||||
|
"""
|
||||||
|
from typing import List, Dict, Any, Optional, Callable
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
import time
|
||||||
|
from .base import LLMService
|
||||||
|
from utils.progress_bar import ProgressBar
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BatchConfig:
|
||||||
|
"""Configuration for batch processing"""
|
||||||
|
batch_size: int = 20
|
||||||
|
max_retries: int = 3
|
||||||
|
retry_delay: int = 3
|
||||||
|
progress_callback: Optional[Callable[[str], None]] = None
|
||||||
|
|
||||||
|
class BatchProcessor:
|
||||||
|
"""
|
||||||
|
Handles batch processing for LLM operations
|
||||||
|
"""
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
llm_service: LLMService,
|
||||||
|
config: Optional[BatchConfig] = None
|
||||||
|
):
|
||||||
|
self.llm_service = llm_service
|
||||||
|
self.config = config or BatchConfig()
|
||||||
|
|
||||||
|
def process_batch(
|
||||||
|
self,
|
||||||
|
items: List[Dict[str, Any]],
|
||||||
|
system_prompt: str,
|
||||||
|
template: str,
|
||||||
|
output_processor: Optional[Callable] = None
|
||||||
|
) -> List[Any]:
|
||||||
|
"""
|
||||||
|
Process items in batches with consistent context
|
||||||
|
|
||||||
|
Args:
|
||||||
|
items: List of dictionaries containing data to process
|
||||||
|
system_prompt: System prompt for context
|
||||||
|
template: Template string for formatting requests
|
||||||
|
output_processor: Optional function to process LLM responses
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of processed results
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
total_items = len(items)
|
||||||
|
|
||||||
|
# Setup progress tracking
|
||||||
|
progress = ProgressBar(
|
||||||
|
total_items,
|
||||||
|
"Processing batches:",
|
||||||
|
"Complete"
|
||||||
|
)
|
||||||
|
if self.config.progress_callback:
|
||||||
|
progress.set_output_callback(self.config.progress_callback)
|
||||||
|
progress.start()
|
||||||
|
|
||||||
|
# Process in batches
|
||||||
|
for start_idx in range(0, total_items, self.config.batch_size):
|
||||||
|
end_idx = min(start_idx + self.config.batch_size, total_items)
|
||||||
|
batch_items = items[start_idx:end_idx]
|
||||||
|
|
||||||
|
# Prepare batch request
|
||||||
|
batch_data = {
|
||||||
|
"items": batch_items,
|
||||||
|
"template": template
|
||||||
|
}
|
||||||
|
request_payload = json.dumps(batch_data)
|
||||||
|
|
||||||
|
# Process batch with retries
|
||||||
|
for attempt in range(self.config.max_retries):
|
||||||
|
try:
|
||||||
|
response = self.llm_service.generate_text(
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
user_prompt=request_payload
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse and process response
|
||||||
|
batch_results = self._process_response(
|
||||||
|
response,
|
||||||
|
output_processor
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(batch_results) != len(batch_items):
|
||||||
|
raise ValueError(
|
||||||
|
"Response count doesn't match input count"
|
||||||
|
)
|
||||||
|
|
||||||
|
results.extend(batch_results)
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if attempt < self.config.max_retries - 1:
|
||||||
|
if self.config.progress_callback:
|
||||||
|
self.config.progress_callback(
|
||||||
|
f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."
|
||||||
|
)
|
||||||
|
time.sleep(self.config.retry_delay)
|
||||||
|
else:
|
||||||
|
if self.config.progress_callback:
|
||||||
|
self.config.progress_callback(
|
||||||
|
f"Error in batch {start_idx}-{end_idx}: {e}"
|
||||||
|
)
|
||||||
|
# On final retry failure, add None results
|
||||||
|
results.extend([None] * len(batch_items))
|
||||||
|
|
||||||
|
# Update progress
|
||||||
|
progress.update(end_idx)
|
||||||
|
|
||||||
|
progress.finish()
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _process_response(
|
||||||
|
self,
|
||||||
|
response: str,
|
||||||
|
output_processor: Optional[Callable] = None
|
||||||
|
) -> List[Any]:
|
||||||
|
"""Process LLM response"""
|
||||||
|
try:
|
||||||
|
# Parse JSON response
|
||||||
|
parsed = json.loads(response)
|
||||||
|
|
||||||
|
# Apply custom processing if provided
|
||||||
|
if output_processor:
|
||||||
|
return [output_processor(item) for item in parsed]
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError("Failed to parse LLM response as JSON")
|
||||||
|
|
||||||
|
# Example specialized batch processor for translations
|
||||||
|
class TranslationBatchProcessor(BatchProcessor):
|
||||||
|
"""Specialized batch processor for translations"""
|
||||||
|
|
||||||
|
def translate_batch(
|
||||||
|
self,
|
||||||
|
texts: List[str],
|
||||||
|
source_lang: str,
|
||||||
|
target_lang: str
|
||||||
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Translate a batch of texts
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to translate
|
||||||
|
source_lang: Source language code
|
||||||
|
target_lang: Target language code
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of translated texts
|
||||||
|
"""
|
||||||
|
# Prepare items
|
||||||
|
items = [{"text": text} for text in texts]
|
||||||
|
|
||||||
|
# Setup prompts
|
||||||
|
system_prompt = (
|
||||||
|
"You are a translator. Translate the provided texts "
|
||||||
|
"maintaining special fields like <> and <#>."
|
||||||
|
)
|
||||||
|
|
||||||
|
template = (
|
||||||
|
"Translate the following texts from {source_lang} to {target_lang}. "
|
||||||
|
"Return translations as a JSON array of strings:"
|
||||||
|
"\n\n{text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process batch
|
||||||
|
results = self.process_batch(
|
||||||
|
items=items,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
template=template.format(
|
||||||
|
source_lang=source_lang,
|
||||||
|
target_lang=target_lang
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
"""
|
||||||
|
from services.llm.llm_factory import LLMFactory
|
||||||
|
from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor
|
||||||
|
|
||||||
|
# Create LLM service
|
||||||
|
llm_service = LLMFactory.create_service("openai")
|
||||||
|
|
||||||
|
# Setup batch processor with progress callback
|
||||||
|
def progress_callback(message: str):
|
||||||
|
print(message)
|
||||||
|
|
||||||
|
config = BatchConfig(
|
||||||
|
batch_size=20,
|
||||||
|
progress_callback=progress_callback
|
||||||
|
)
|
||||||
|
|
||||||
|
# General batch processor
|
||||||
|
processor = BatchProcessor(llm_service, config)
|
||||||
|
|
||||||
|
# Example batch process for custom task
|
||||||
|
items = [
|
||||||
|
{"text": "Hello", "context": "greeting"},
|
||||||
|
{"text": "Goodbye", "context": "farewell"}
|
||||||
|
]
|
||||||
|
|
||||||
|
system_prompt = "You are a helpful assistant."
|
||||||
|
template = "Process these items considering their context: {items}"
|
||||||
|
|
||||||
|
results = processor.process_batch(
|
||||||
|
items=items,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
template=template
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example translation batch
|
||||||
|
translator = TranslationBatchProcessor(llm_service, config)
|
||||||
|
texts = ["Hello world", "How are you?"]
|
||||||
|
translations = translator.translate_batch(
|
||||||
|
texts=texts,
|
||||||
|
source_lang="en",
|
||||||
|
target_lang="es"
|
||||||
|
)
|
||||||
|
"""
|
|
@ -0,0 +1,63 @@
|
||||||
|
# services/llm/grok_service.py
|
||||||
|
"""
|
||||||
|
Grok service implementation
|
||||||
|
"""
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
import json
|
||||||
|
from .base import LLMService
|
||||||
|
from config.api_keys import APIKeyManager
|
||||||
|
|
||||||
|
class GrokService(LLMService):
|
||||||
|
def __init__(self, model: str = "grok-1", temperature: float = 0.3):
|
||||||
|
api_key = APIKeyManager.get_grok_key()
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("Grok API key not found. Please set up your API keys.")
|
||||||
|
|
||||||
|
self.api_key = api_key
|
||||||
|
self.model = model
|
||||||
|
self.temperature = temperature
|
||||||
|
|
||||||
|
def generate_text(self, prompt: str) -> str:
|
||||||
|
"""
|
||||||
|
Generate text using the Grok API
|
||||||
|
TODO: Update this method when Grok API is available
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Placeholder for Grok API implementation
|
||||||
|
# Update this when the API is released
|
||||||
|
raise NotImplementedError("Grok API is not implemented yet")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in Grok API call: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||||
|
"""
|
||||||
|
Calculate similarity scores using the Grok API
|
||||||
|
TODO: Update this method when Grok API is available
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
system_prompt = (
|
||||||
|
"Evaluate the semantic similarity between the following table of pairs of texts "
|
||||||
|
"in json format on a scale from 0 to 1. Return the similarity scores for every "
|
||||||
|
"row in JSON format as a list of numbers, without any additional text or formatting."
|
||||||
|
)
|
||||||
|
|
||||||
|
request_payload = json.dumps(texts_pairs)
|
||||||
|
|
||||||
|
# Placeholder for Grok API implementation
|
||||||
|
# Update this when the API is released
|
||||||
|
raise NotImplementedError("Grok API is not implemented yet")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in Grok similarity calculation: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Update config/api_keys.py to include Grok
|
||||||
|
@classmethod
|
||||||
|
def get_grok_key(cls) -> Optional[str]:
|
||||||
|
"""Get Grok API key from environment or stored configuration"""
|
||||||
|
return (
|
||||||
|
os.getenv('GROK_API_KEY') or
|
||||||
|
cls._get_stored_key('grok')
|
||||||
|
)
|
|
@ -0,0 +1,33 @@
|
||||||
|
# services/llm/llm_factory.py
|
||||||
|
"""
|
||||||
|
Factory class for creating LLM services
|
||||||
|
"""
|
||||||
|
from typing import Optional
|
||||||
|
from .openai_service import OpenAIService
|
||||||
|
from .ollama_service import OllamaService
|
||||||
|
from .grok_service import GrokService
|
||||||
|
|
||||||
|
class LLMFactory:
|
||||||
|
"""Factory class for creating LLM service instances"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_service(service_type: str, **kwargs) -> Optional['LLMService']:
|
||||||
|
"""
|
||||||
|
Create an instance of the specified LLM service
|
||||||
|
|
||||||
|
Args:
|
||||||
|
service_type: Type of LLM service ("openai", "ollama", "grok")
|
||||||
|
**kwargs: Additional arguments for service initialization
|
||||||
|
"""
|
||||||
|
services = {
|
||||||
|
"openai": OpenAIService,
|
||||||
|
"ollama": OllamaService,
|
||||||
|
"grok": GrokService
|
||||||
|
}
|
||||||
|
|
||||||
|
service_class = services.get(service_type.lower())
|
||||||
|
if service_class:
|
||||||
|
return service_class(**kwargs)
|
||||||
|
else:
|
||||||
|
print(f"Unknown service type: {service_type}")
|
||||||
|
return None
|
|
@ -0,0 +1,53 @@
|
||||||
|
# services/llm/ollama_service.py
|
||||||
|
"""
|
||||||
|
Ollama service implementation
|
||||||
|
"""
|
||||||
|
import ollama
|
||||||
|
import json
|
||||||
|
from typing import Dict, List
|
||||||
|
from .base import LLMService
|
||||||
|
|
||||||
|
class OllamaService(LLMService):
|
||||||
|
def __init__(self, model: str = "llama3.1"):
|
||||||
|
self.model = model
|
||||||
|
|
||||||
|
def generate_text(self, prompt: str) -> str:
|
||||||
|
try:
|
||||||
|
response = ollama.generate(
|
||||||
|
model=self.model,
|
||||||
|
prompt=prompt
|
||||||
|
)
|
||||||
|
return response["response"]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in Ollama API call: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||||
|
system_prompt = (
|
||||||
|
"Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
|
||||||
|
"Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
|
||||||
|
)
|
||||||
|
|
||||||
|
request_payload = json.dumps(texts_pairs)
|
||||||
|
prompt = f"{system_prompt}\n\n{request_payload}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = ollama.generate(
|
||||||
|
model=self.model,
|
||||||
|
prompt=prompt
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
scores = json.loads(response["response"].strip())
|
||||||
|
if isinstance(scores, dict) and "similarity_scores" in scores:
|
||||||
|
return scores["similarity_scores"]
|
||||||
|
elif isinstance(scores, list):
|
||||||
|
return scores
|
||||||
|
else:
|
||||||
|
raise ValueError("Unexpected response format")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError("Could not decode response as JSON")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in Ollama similarity calculation: {e}")
|
||||||
|
return None
|
|
@ -0,0 +1,69 @@
|
||||||
|
# services/llm/openai_service.py
|
||||||
|
"""
|
||||||
|
OpenAI service implementation
|
||||||
|
"""
|
||||||
|
from openai import OpenAI
|
||||||
|
from typing import Dict, List
|
||||||
|
import json
|
||||||
|
from .base import LLMService
|
||||||
|
from config.api_keys import APIKeyManager
|
||||||
|
|
||||||
|
class OpenAIService(LLMService):
|
||||||
|
def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3):
|
||||||
|
api_key = APIKeyManager.get_openai_key()
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("OpenAI API key not found. Please set up your API keys.")
|
||||||
|
|
||||||
|
self.client = OpenAI(api_key=api_key)
|
||||||
|
self.model = model
|
||||||
|
self.temperature = temperature
|
||||||
|
|
||||||
|
def generate_text(self, prompt: str) -> str:
|
||||||
|
try:
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=self.temperature,
|
||||||
|
max_tokens=1500
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in OpenAI API call: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||||
|
system_prompt = (
|
||||||
|
"Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
|
||||||
|
"Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
|
||||||
|
)
|
||||||
|
|
||||||
|
request_payload = json.dumps(texts_pairs)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": request_payload}
|
||||||
|
],
|
||||||
|
temperature=self.temperature,
|
||||||
|
max_tokens=1500
|
||||||
|
)
|
||||||
|
|
||||||
|
response_content = response.choices[0].message.content
|
||||||
|
cleaned_response = response_content.strip().strip("'```json").strip("```")
|
||||||
|
|
||||||
|
try:
|
||||||
|
scores = json.loads(cleaned_response)
|
||||||
|
if isinstance(scores, dict) and "similarity_scores" in scores:
|
||||||
|
return scores["similarity_scores"]
|
||||||
|
elif isinstance(scores, list):
|
||||||
|
return scores
|
||||||
|
else:
|
||||||
|
raise ValueError("Unexpected response format")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError("Could not decode response as JSON")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error in OpenAI similarity calculation: {e}")
|
||||||
|
return None
|
|
@ -0,0 +1,19 @@
|
||||||
|
# services/translation/base.py
|
||||||
|
"""
|
||||||
|
Base class for translation services
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Optional, List, Dict
|
||||||
|
|
||||||
|
class TranslationService(ABC):
|
||||||
|
"""Abstract base class for translation services"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
|
||||||
|
"""Translate a single text"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
|
||||||
|
"""Translate a batch of texts"""
|
||||||
|
pass
|
|
@ -0,0 +1,77 @@
|
||||||
|
# services/translation/google_translate.py
|
||||||
|
"""
|
||||||
|
Google Translation service implementation
|
||||||
|
"""
|
||||||
|
from typing import Optional, List
|
||||||
|
import html
|
||||||
|
from google.cloud import translate_v2 as translate
|
||||||
|
from google.oauth2 import service_account
|
||||||
|
from config.api_keys import APIKeyManager
|
||||||
|
from .base import TranslationService
|
||||||
|
|
||||||
|
class GoogleTranslateService(TranslationService):
|
||||||
|
def __init__(self, credentials_file: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Initialize Google Translate service
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials_file: Path to Google service account credentials JSON file.
|
||||||
|
If None, will use API key from APIKeyManager.
|
||||||
|
"""
|
||||||
|
if credentials_file:
|
||||||
|
# Use service account credentials
|
||||||
|
try:
|
||||||
|
credentials = service_account.Credentials.from_service_account_file(credentials_file)
|
||||||
|
self.client = translate.Client(credentials=credentials)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error initializing Google Translate with credentials: {e}")
|
||||||
|
else:
|
||||||
|
# Use API key
|
||||||
|
api_key = APIKeyManager.get_google_key()
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("Google API key not found. Please set up your API keys.")
|
||||||
|
self.client = translate.Client()
|
||||||
|
|
||||||
|
def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Translate a single text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to translate
|
||||||
|
target_language: Target language code (e.g., 'es' for Spanish)
|
||||||
|
source_language: Source language code. If None, will be auto-detected.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Translated text
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = self.client.translate(
|
||||||
|
text,
|
||||||
|
target_language=target_language,
|
||||||
|
source_language=source_language
|
||||||
|
)
|
||||||
|
return html.unescape(result["translatedText"])
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error in Google Translate: {e}")
|
||||||
|
|
||||||
|
def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
|
||||||
|
"""
|
||||||
|
Translate multiple texts in batch.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to translate
|
||||||
|
target_language: Target language code (e.g., 'es' for Spanish)
|
||||||
|
source_language: Source language code. If None, will be auto-detected.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of translated texts
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
results = self.client.translate(
|
||||||
|
texts,
|
||||||
|
target_language=target_language,
|
||||||
|
source_language=source_language
|
||||||
|
)
|
||||||
|
return [html.unescape(result["translatedText"]) for result in results]
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Error in Google Translate batch: {e}")
|
|
@ -0,0 +1,32 @@
|
||||||
|
# services/translation/translation_factory.py
|
||||||
|
"""
|
||||||
|
Factory class for creating translation services
|
||||||
|
"""
|
||||||
|
from typing import Optional
|
||||||
|
from .google_translate import GoogleTranslateService
|
||||||
|
|
||||||
|
class TranslationFactory:
|
||||||
|
"""Factory class for creating translation service instances"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def create_service(service_type: str, **kwargs) -> Optional['TranslationService']:
|
||||||
|
"""
|
||||||
|
Create an instance of the specified translation service
|
||||||
|
|
||||||
|
Args:
|
||||||
|
service_type: Type of translation service ("google", etc.)
|
||||||
|
**kwargs: Additional arguments for service initialization
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
TranslationService instance or None if service_type is not recognized
|
||||||
|
"""
|
||||||
|
services = {
|
||||||
|
"google": GoogleTranslateService,
|
||||||
|
# Add other translation services here
|
||||||
|
}
|
||||||
|
|
||||||
|
service_class = services.get(service_type.lower())
|
||||||
|
if service_class:
|
||||||
|
return service_class(**kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown translation service type: {service_type}")
|
|
@ -0,0 +1,39 @@
|
||||||
|
# utils/file_utils.py
|
||||||
|
"""
|
||||||
|
File handling utilities
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def select_file(title="Select file", filetypes=None):
|
||||||
|
if filetypes is None:
|
||||||
|
filetypes = [
|
||||||
|
("Excel files", "*.xlsx;*.xls"),
|
||||||
|
("All files", "*.*")
|
||||||
|
]
|
||||||
|
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
|
||||||
|
file_path = filedialog.askopenfilename(
|
||||||
|
title=title,
|
||||||
|
filetypes=filetypes
|
||||||
|
)
|
||||||
|
|
||||||
|
return file_path if file_path else None
|
||||||
|
|
||||||
|
def select_directory(title="Select directory"):
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw()
|
||||||
|
|
||||||
|
dir_path = filedialog.askdirectory(title=title)
|
||||||
|
return dir_path if dir_path else None
|
||||||
|
|
||||||
|
def safe_read_excel(file_path, **kwargs):
|
||||||
|
try:
|
||||||
|
return pd.read_excel(file_path, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading Excel file: {e}")
|
||||||
|
return None
|
|
@ -0,0 +1,31 @@
|
||||||
|
# utils/logger_utils.py
|
||||||
|
"""
|
||||||
|
Logging configuration and utilities
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from config.settings import LOG_DIR
|
||||||
|
|
||||||
|
def setup_logger(name, log_file=None):
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
if log_file:
|
||||||
|
os.makedirs(LOG_DIR, exist_ok=True)
|
||||||
|
file_handler = logging.FileHandler(
|
||||||
|
os.path.join(LOG_DIR, log_file)
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(
|
||||||
|
logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(
|
||||||
|
logging.Formatter('%(levelname)s: %(message)s')
|
||||||
|
)
|
||||||
|
logger.addHandler(console_handler)
|
||||||
|
|
||||||
|
return logger
|
|
@ -0,0 +1,64 @@
|
||||||
|
# utils/logging_manager.py
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import datetime
|
||||||
|
import tkinter as tk
|
||||||
|
from queue import Queue, Empty
|
||||||
|
from .output_redirector import OutputRedirector
|
||||||
|
|
||||||
|
class LoggingManager:
|
||||||
|
def __init__(self, work_dir: str):
|
||||||
|
self.work_dir = work_dir
|
||||||
|
self.log_dir = os.path.join(work_dir, "logs")
|
||||||
|
os.makedirs(self.log_dir, exist_ok=True)
|
||||||
|
|
||||||
|
self.logger = self._setup_logger()
|
||||||
|
self.queue: Optional[Queue] = None
|
||||||
|
self.text_widget: Optional[tk.Text] = None
|
||||||
|
|
||||||
|
def _setup_logger(self) -> logging.Logger:
|
||||||
|
logger = logging.getLogger("app_logger")
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# File handler
|
||||||
|
log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log")
|
||||||
|
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
||||||
|
file_handler.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
return logger
|
||||||
|
|
||||||
|
def setup_gui_logging(self, text_widget: tk.Text, queue: Queue):
|
||||||
|
"""Setup logging to GUI text widget"""
|
||||||
|
self.text_widget = text_widget
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
# Add handler for GUI logging
|
||||||
|
gui_handler = logging.StreamHandler(OutputRedirector(queue))
|
||||||
|
gui_handler.setLevel(logging.INFO)
|
||||||
|
formatter = logging.Formatter("%(message)s")
|
||||||
|
gui_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
self.logger.addHandler(gui_handler)
|
||||||
|
|
||||||
|
def process_log_queue(self):
|
||||||
|
"""Process pending log messages"""
|
||||||
|
if self.queue and self.text_widget:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
message = self.queue.get_nowait()
|
||||||
|
self.text_widget.insert(tk.END, message)
|
||||||
|
self.text_widget.see(tk.END)
|
||||||
|
self.text_widget.update_idletasks()
|
||||||
|
except Empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
def clear_output(self):
|
||||||
|
"""Clear the text widget"""
|
||||||
|
if self.text_widget:
|
||||||
|
self.text_widget.delete("1.0", tk.END)
|
||||||
|
self.text_widget.update_idletasks()
|
|
@ -0,0 +1,17 @@
|
||||||
|
# utils/output_redirector.py
|
||||||
|
"""
|
||||||
|
Output redirector for capturing stdout/stderr
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
from queue import Queue
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
class OutputRedirector:
|
||||||
|
def __init__(self, queue: Queue):
|
||||||
|
self.queue = queue
|
||||||
|
|
||||||
|
def write(self, string: str):
|
||||||
|
self.queue.put(string)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
pass
|
|
@ -0,0 +1,51 @@
|
||||||
|
# utils/progress_bar.py
|
||||||
|
"""
|
||||||
|
Progress bar implementation
|
||||||
|
"""
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import ttk
|
||||||
|
from typing import Optional, Callable
|
||||||
|
import sys
|
||||||
|
from queue import Queue
|
||||||
|
import threading
|
||||||
|
|
||||||
|
class ProgressBar:
|
||||||
|
def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30):
|
||||||
|
self.total = total
|
||||||
|
self.prefix = prefix
|
||||||
|
self.suffix = suffix
|
||||||
|
self.max_points = max_points
|
||||||
|
self.current = 0
|
||||||
|
self.last_points = 0
|
||||||
|
self.output_callback: Optional[Callable] = None
|
||||||
|
|
||||||
|
def set_output_callback(self, callback: Callable[[str], None]):
|
||||||
|
"""Set callback function for output"""
|
||||||
|
self.output_callback = callback
|
||||||
|
|
||||||
|
def update(self, current: int):
|
||||||
|
self.current = current
|
||||||
|
points = min(int((current / self.total) * self.max_points), self.max_points)
|
||||||
|
|
||||||
|
if points > self.last_points:
|
||||||
|
new_points = points - self.last_points
|
||||||
|
self._write_output("." * new_points)
|
||||||
|
self.last_points = points
|
||||||
|
|
||||||
|
def increment(self):
|
||||||
|
self.update(self.current + 1)
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
remaining_points = self.max_points - self.last_points
|
||||||
|
if remaining_points > 0:
|
||||||
|
self._write_output("." * remaining_points)
|
||||||
|
self._write_output(f"] {self.suffix}\n")
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self._write_output(f"\r{self.prefix} [")
|
||||||
|
|
||||||
|
def _write_output(self, text: str):
|
||||||
|
if self.output_callback:
|
||||||
|
self.output_callback(text)
|
||||||
|
else:
|
||||||
|
print(text, end="", flush=True)
|
|
@ -0,0 +1,68 @@
|
||||||
|
# utils/script_registry.py
|
||||||
|
from typing import Dict, Callable, List, Optional
|
||||||
|
import importlib
|
||||||
|
import inspect
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from config.profile_manager import Profile, ProfileManager
|
||||||
|
|
||||||
|
class ScriptRegistry:
|
||||||
|
"""Registry for script operations"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.operations: Dict[str, Callable] = {}
|
||||||
|
self.descriptions: Dict[str, str] = {}
|
||||||
|
|
||||||
|
def register(self, name: str, operation: Callable, description: str = ""):
|
||||||
|
"""Register a new operation"""
|
||||||
|
self.operations[name] = operation
|
||||||
|
self.descriptions[name] = description
|
||||||
|
|
||||||
|
def auto_discover(self, scripts_dir: str = "scripts"):
|
||||||
|
"""Auto-discover scripts in the scripts directory"""
|
||||||
|
scripts_path = Path(__file__).parent.parent / scripts_dir
|
||||||
|
|
||||||
|
for file in scripts_path.glob("script_*.py"):
|
||||||
|
module_name = f"{scripts_dir}.{file.stem}"
|
||||||
|
try:
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
|
||||||
|
# Look for main function and docstring
|
||||||
|
if hasattr(module, 'main'):
|
||||||
|
name = file.stem.replace('script_', '')
|
||||||
|
description = module.__doc__ or ""
|
||||||
|
self.register(name, module.main, description)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading script {file}: {e}")
|
||||||
|
|
||||||
|
def get_operations(self) -> List[tuple]:
|
||||||
|
"""Get list of available operations"""
|
||||||
|
return [(name, self.descriptions[name]) for name in self.operations]
|
||||||
|
|
||||||
|
def run_operation(self, name: str, profile: Optional[Profile] = None, **kwargs):
|
||||||
|
"""
|
||||||
|
Run a registered operation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Name of the operation to run
|
||||||
|
profile: Current profile instance (optional)
|
||||||
|
**kwargs: Additional arguments for the operation
|
||||||
|
"""
|
||||||
|
if name in self.operations:
|
||||||
|
# Prepare arguments
|
||||||
|
operation = self.operations[name]
|
||||||
|
sig = inspect.signature(operation)
|
||||||
|
|
||||||
|
# Check if operation accepts profile parameter
|
||||||
|
call_args = {}
|
||||||
|
if 'profile' in sig.parameters:
|
||||||
|
call_args['profile'] = profile
|
||||||
|
|
||||||
|
# Add other kwargs that match the signature
|
||||||
|
for param_name in sig.parameters:
|
||||||
|
if param_name in kwargs:
|
||||||
|
call_args[param_name] = kwargs[param_name]
|
||||||
|
|
||||||
|
return operation(**call_args)
|
||||||
|
raise ValueError(f"Unknown operation: {name}")
|
Loading…
Reference in New Issue