Agregado de librerias
This commit is contained in:
parent
caa983c8da
commit
6fca251249
|
@ -0,0 +1,27 @@
|
|||
# Python cache files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# Environment directories
|
||||
venv/
|
||||
env/
|
||||
.env/
|
||||
|
||||
# IDE configurations
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Logs and data files
|
||||
data/log.txt
|
||||
data/data.json
|
||||
|
||||
# Allow script groups and their configurations
|
||||
!backend/script_groups/
|
||||
!backend/script_groups/*/
|
||||
!backend/script_groups/*/*.py
|
||||
!backend/script_groups/*/schema.json
|
||||
!backend/script_groups/*/esquema.json
|
||||
!backend/script_groups/*/description.json
|
||||
|
||||
# But ignore working directory configurations
|
||||
backend/script_groups/*/work_dir.json
|
|
@ -0,0 +1,3 @@
|
|||
# Crear .gitkeep en cada directorio de script grupo
|
||||
mkdir -p backend/script_groups/example_group
|
||||
touch backend/script_groups/example_group/.gitkeep
|
|
@ -0,0 +1,241 @@
|
|||
# services/excel/excel_service.py
|
||||
"""
|
||||
Excel file handling service with retry and formatting capabilities
|
||||
"""
|
||||
import pandas as pd
|
||||
import time
|
||||
from typing import Optional, Union, Dict, Any
|
||||
from pathlib import Path
|
||||
import openpyxl
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.styles import PatternFill, Alignment, Font
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
class ExcelService:
|
||||
"""Service for handling Excel files with advanced features"""
|
||||
|
||||
def __init__(self, max_retries: int = 5, retry_delay: int = 5):
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
|
||||
def read_excel(
|
||||
self,
|
||||
file_path: Union[str, Path],
|
||||
sheet_name: str = "Sheet1",
|
||||
**kwargs
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Read Excel file with retries and cleanup
|
||||
|
||||
Args:
|
||||
file_path: Path to Excel file
|
||||
sheet_name: Name of sheet to read
|
||||
**kwargs: Additional arguments for pd.read_excel
|
||||
|
||||
Returns:
|
||||
DataFrame with the Excel content
|
||||
"""
|
||||
retries = 0
|
||||
while retries < self.max_retries:
|
||||
try:
|
||||
# Intentar leer el archivo con openpyxl
|
||||
df = pd.read_excel(file_path, engine="openpyxl", sheet_name=sheet_name, **kwargs)
|
||||
|
||||
# Limpiar caracteres especiales y normalizar saltos de línea
|
||||
for col in df.columns:
|
||||
df[col] = df[col].apply(
|
||||
lambda x: self._clean_special_chars(x) if pd.notna(x) else x
|
||||
)
|
||||
|
||||
print(f"Archivo leído y limpiado exitosamente: {file_path}")
|
||||
return df
|
||||
|
||||
except ValueError as ve:
|
||||
if "must be either numerical or a string containing a wildcard" in str(ve):
|
||||
print(f"Error al leer el archivo: {ve}")
|
||||
print("Intentando eliminar filtros y leer el archivo nuevamente...")
|
||||
try:
|
||||
# Cargar el libro de trabajo
|
||||
wb = openpyxl.load_workbook(filename=file_path)
|
||||
sheet = wb.active
|
||||
|
||||
# Eliminar filtros si existen
|
||||
if sheet.auto_filter:
|
||||
sheet.auto_filter.ref = None
|
||||
|
||||
# Guardar el archivo temporalmente sin filtros
|
||||
temp_file = str(file_path) + "_temp.xlsx"
|
||||
wb.save(temp_file)
|
||||
|
||||
# Leer el archivo temporal
|
||||
df = pd.read_excel(temp_file, engine="openpyxl", **kwargs)
|
||||
|
||||
# Eliminar el archivo temporal
|
||||
Path(temp_file).unlink()
|
||||
|
||||
return df
|
||||
except Exception as e:
|
||||
print(f"Error al intentar eliminar filtros y leer el archivo: {e}")
|
||||
else:
|
||||
print(f"Error de valor: {ve}")
|
||||
|
||||
except PermissionError as e:
|
||||
print(
|
||||
f"Error de permiso: {e}. Por favor cierre el archivo. "
|
||||
f"Reintentando en {self.retry_delay} segundos..."
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
|
||||
|
||||
retries += 1
|
||||
time.sleep(self.retry_delay)
|
||||
|
||||
raise Exception(f"No se pudo leer el archivo después de {self.max_retries} intentos.")
|
||||
|
||||
def save_excel(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
file_path: Union[str, Path],
|
||||
sheet_name: str = "Sheet1",
|
||||
format_options: Optional[Dict[str, Any]] = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
"""
|
||||
Save DataFrame to Excel with formatting
|
||||
|
||||
Args:
|
||||
df: DataFrame to save
|
||||
file_path: Path to save Excel file
|
||||
sheet_name: Name of sheet
|
||||
format_options: Dictionary with formatting options
|
||||
**kwargs: Additional arguments for pd.to_excel
|
||||
"""
|
||||
if format_options is None:
|
||||
format_options = {}
|
||||
|
||||
retries = 0
|
||||
while retries < self.max_retries:
|
||||
try:
|
||||
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
|
||||
# Save DataFrame
|
||||
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
|
||||
# Apply formatting
|
||||
self._format_worksheet(
|
||||
writer.sheets[sheet_name],
|
||||
format_options
|
||||
)
|
||||
|
||||
print(f"Archivo guardado exitosamente en: {file_path}")
|
||||
return
|
||||
|
||||
except PermissionError as e:
|
||||
print(
|
||||
f"Error de permiso: {e}. Por favor cierre el archivo. "
|
||||
f"Reintentando en {self.retry_delay} segundos..."
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error inesperado: {e}. Reintentando en {self.retry_delay} segundos...")
|
||||
|
||||
retries += 1
|
||||
time.sleep(self.retry_delay)
|
||||
|
||||
raise Exception(f"No se pudo guardar el archivo después de {self.max_retries} intentos.")
|
||||
|
||||
def _format_worksheet(self, worksheet: Worksheet, options: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Apply formatting to worksheet
|
||||
|
||||
Args:
|
||||
worksheet: Worksheet to format
|
||||
options: Formatting options
|
||||
"""
|
||||
# Freeze panes if specified
|
||||
freeze_row = options.get('freeze_row', 2)
|
||||
freeze_col = options.get('freeze_col', 1)
|
||||
if freeze_row or freeze_col:
|
||||
freeze_cell = f"{get_column_letter(freeze_col)}{freeze_row}"
|
||||
worksheet.freeze_panes = freeze_cell
|
||||
|
||||
# Auto-adjust column widths
|
||||
max_width = options.get('max_column_width', 50)
|
||||
min_width = options.get('min_column_width', 8)
|
||||
wrap_threshold = options.get('wrap_threshold', 50)
|
||||
|
||||
for col in worksheet.columns:
|
||||
max_length = 0
|
||||
column = col[0].column_letter
|
||||
|
||||
for cell in col:
|
||||
try:
|
||||
if cell.value:
|
||||
text_length = len(str(cell.value))
|
||||
if text_length > wrap_threshold:
|
||||
cell.alignment = Alignment(wrap_text=True, vertical='top')
|
||||
text_length = min(
|
||||
wrap_threshold,
|
||||
max(len(word) for word in str(cell.value).split())
|
||||
)
|
||||
max_length = max(max_length, text_length)
|
||||
except:
|
||||
pass
|
||||
|
||||
adjusted_width = min(max_width, max(min_width, max_length + 2))
|
||||
worksheet.column_dimensions[column].width = adjusted_width
|
||||
|
||||
# Apply custom styles
|
||||
header_row = options.get('header_row', 1)
|
||||
if header_row:
|
||||
header_fill = PatternFill(
|
||||
start_color=options.get('header_color', 'F2F2F2'),
|
||||
end_color=options.get('header_color', 'F2F2F2'),
|
||||
fill_type='solid'
|
||||
)
|
||||
header_font = Font(bold=True)
|
||||
|
||||
for cell in worksheet[header_row]:
|
||||
cell.fill = header_fill
|
||||
cell.font = header_font
|
||||
|
||||
def _clean_special_chars(self, text: Any) -> Any:
|
||||
"""Clean special characters and normalize line breaks"""
|
||||
if isinstance(text, str):
|
||||
# Normalize line breaks
|
||||
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
||||
# Replace other special characters if needed
|
||||
return text
|
||||
return text
|
||||
|
||||
# Example usage:
|
||||
"""
|
||||
from services.excel.excel_service import ExcelService
|
||||
|
||||
# Create service
|
||||
excel_service = ExcelService()
|
||||
|
||||
# Read Excel file
|
||||
try:
|
||||
df = excel_service.read_excel("input.xlsx")
|
||||
print("Data loaded successfully")
|
||||
|
||||
# Modify data...
|
||||
|
||||
# Save with formatting
|
||||
format_options = {
|
||||
'freeze_row': 2,
|
||||
'freeze_col': 1,
|
||||
'max_column_width': 50,
|
||||
'min_column_width': 8,
|
||||
'wrap_threshold': 50,
|
||||
'header_color': 'E6E6E6'
|
||||
}
|
||||
|
||||
excel_service.save_excel(
|
||||
df,
|
||||
"output.xlsx",
|
||||
format_options=format_options
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error handling Excel file: {e}")
|
||||
"""
|
|
@ -0,0 +1,25 @@
|
|||
# services/language/base.py
|
||||
"""
|
||||
Base class for language detection services
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
|
||||
class LanguageDetectionService(ABC):
|
||||
"""Abstract base class for language detection services"""
|
||||
|
||||
@abstractmethod
|
||||
def detect_language(self, text: str) -> Tuple[str, float]:
|
||||
"""
|
||||
Detect language of a text
|
||||
Returns: Tuple of (language_code, confidence_score)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
Detect language of multiple texts
|
||||
Returns: List of tuples (language_code, confidence_score)
|
||||
"""
|
||||
pass
|
|
@ -0,0 +1,52 @@
|
|||
# services/language/langid_service.py
|
||||
"""
|
||||
Language detection service using langid
|
||||
"""
|
||||
from typing import List, Tuple, Optional, Set
|
||||
import langid
|
||||
from .base import LanguageDetectionService
|
||||
|
||||
class LangIdService(LanguageDetectionService):
|
||||
def __init__(self, allowed_languages: Optional[Set[str]] = None):
|
||||
"""
|
||||
Initialize langid service
|
||||
|
||||
Args:
|
||||
allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'})
|
||||
If None, all languages supported by langid will be allowed
|
||||
"""
|
||||
if allowed_languages:
|
||||
langid.set_languages(list(allowed_languages))
|
||||
self.allowed_languages = allowed_languages
|
||||
|
||||
def detect_language(self, text: str) -> Tuple[str, float]:
|
||||
"""
|
||||
Detect language of a text using langid
|
||||
|
||||
Args:
|
||||
text: Text to analyze
|
||||
|
||||
Returns:
|
||||
Tuple of (language_code, confidence_score)
|
||||
"""
|
||||
try:
|
||||
if not text or len(text.strip()) < 3:
|
||||
return ("unknown", 0.0)
|
||||
|
||||
lang, score = langid.classify(text.strip())
|
||||
return (lang, score)
|
||||
except Exception as e:
|
||||
print(f"Error in language detection: {e}")
|
||||
return ("unknown", 0.0)
|
||||
|
||||
def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
|
||||
"""
|
||||
Detect language of multiple texts
|
||||
|
||||
Args:
|
||||
texts: List of texts to analyze
|
||||
|
||||
Returns:
|
||||
List of tuples (language_code, confidence_score)
|
||||
"""
|
||||
return [self.detect_language(text) for text in texts]
|
|
@ -0,0 +1,33 @@
|
|||
# services/language/language_factory.py
|
||||
"""
|
||||
Factory class for creating language detection services
|
||||
"""
|
||||
from typing import Optional, Set
|
||||
from .langid_service import LangIdService
|
||||
|
||||
class LanguageFactory:
|
||||
"""Factory class for creating language detection service instances"""
|
||||
|
||||
@staticmethod
|
||||
def create_service(service_type: str, allowed_languages: Optional[Set[str]] = None, **kwargs) -> Optional['LanguageDetectionService']:
|
||||
"""
|
||||
Create an instance of the specified language detection service
|
||||
|
||||
Args:
|
||||
service_type: Type of language detection service ("langid", etc.)
|
||||
allowed_languages: Set of allowed language codes
|
||||
**kwargs: Additional arguments for service initialization
|
||||
|
||||
Returns:
|
||||
LanguageDetectionService instance or None if service_type is not recognized
|
||||
"""
|
||||
services = {
|
||||
"langid": LangIdService,
|
||||
# Add other language detection services here
|
||||
}
|
||||
|
||||
service_class = services.get(service_type.lower())
|
||||
if service_class:
|
||||
return service_class(allowed_languages=allowed_languages, **kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown language detection service type: {service_type}")
|
|
@ -0,0 +1,68 @@
|
|||
# services/language/language_utils.py
|
||||
"""
|
||||
Utility functions for language detection and validation
|
||||
"""
|
||||
from typing import Dict, Set
|
||||
|
||||
class LanguageUtils:
|
||||
# Common language codes
|
||||
LANGUAGE_CODES = {
|
||||
'it': ('Italian', 'it-IT'),
|
||||
'en': ('English', 'en-GB'),
|
||||
'pt': ('Portuguese', 'pt-PT'),
|
||||
'es': ('Spanish', 'es-ES'),
|
||||
'ru': ('Russian', 'ru-RU'),
|
||||
'fr': ('French', 'fr-FR'),
|
||||
'de': ('German', 'de-DE'),
|
||||
'tr': ('Turkish', 'tr-TR'),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_language_name(cls, code: str) -> str:
|
||||
"""Get full language name from code"""
|
||||
return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0]
|
||||
|
||||
@classmethod
|
||||
def get_full_code(cls, short_code: str) -> str:
|
||||
"""Get full language code (e.g., 'en-GB' from 'en')"""
|
||||
return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1]
|
||||
|
||||
@classmethod
|
||||
def get_short_code(cls, full_code: str) -> str:
|
||||
"""Get short language code (e.g., 'en' from 'en-GB')"""
|
||||
return full_code.split('-')[0] if '-' in full_code else full_code
|
||||
|
||||
@classmethod
|
||||
def is_valid_language(cls, code: str) -> bool:
|
||||
"""Check if a language code is valid"""
|
||||
short_code = cls.get_short_code(code)
|
||||
return short_code in cls.LANGUAGE_CODES
|
||||
|
||||
@classmethod
|
||||
def get_available_languages(cls) -> Set[str]:
|
||||
"""Get set of available language codes"""
|
||||
return set(cls.LANGUAGE_CODES.keys())
|
||||
|
||||
# Example usage:
|
||||
"""
|
||||
from services.language.language_factory import LanguageFactory
|
||||
from services.language.language_utils import LanguageUtils
|
||||
|
||||
# Create language detection service with specific languages
|
||||
allowed_languages = LanguageUtils.get_available_languages()
|
||||
detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages)
|
||||
|
||||
# Detect language of a text
|
||||
text = "Hello, how are you?"
|
||||
lang, confidence = detector.detect_language(text)
|
||||
print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||||
print(f"Confidence: {confidence}")
|
||||
|
||||
# Detect language of multiple texts
|
||||
texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"]
|
||||
results = detector.detect_batch(texts)
|
||||
for text, (lang, confidence) in zip(texts, results):
|
||||
print(f"Text: {text}")
|
||||
print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||||
print(f"Confidence: {confidence}")
|
||||
"""
|
|
@ -0,0 +1,20 @@
|
|||
# services/llm/base.py
|
||||
"""
|
||||
Base class for LLM services
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
import json
|
||||
from typing import List, Union, Dict, Any
|
||||
|
||||
class LLMService(ABC):
|
||||
"""Abstract base class for LLM services"""
|
||||
|
||||
@abstractmethod
|
||||
def generate_text(self, prompt: str) -> str:
|
||||
"""Generate text based on a prompt"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||
"""Calculate similarity scores for pairs of texts"""
|
||||
pass
|
|
@ -0,0 +1,228 @@
|
|||
# services/llm/batch_processor.py
|
||||
"""
|
||||
Batch processing service for LLM operations
|
||||
"""
|
||||
from typing import List, Dict, Any, Optional, Callable
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
import time
|
||||
from .base import LLMService
|
||||
from utils.progress_bar import ProgressBar
|
||||
|
||||
@dataclass
|
||||
class BatchConfig:
|
||||
"""Configuration for batch processing"""
|
||||
batch_size: int = 20
|
||||
max_retries: int = 3
|
||||
retry_delay: int = 3
|
||||
progress_callback: Optional[Callable[[str], None]] = None
|
||||
|
||||
class BatchProcessor:
|
||||
"""
|
||||
Handles batch processing for LLM operations
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
llm_service: LLMService,
|
||||
config: Optional[BatchConfig] = None
|
||||
):
|
||||
self.llm_service = llm_service
|
||||
self.config = config or BatchConfig()
|
||||
|
||||
def process_batch(
|
||||
self,
|
||||
items: List[Dict[str, Any]],
|
||||
system_prompt: str,
|
||||
template: str,
|
||||
output_processor: Optional[Callable] = None
|
||||
) -> List[Any]:
|
||||
"""
|
||||
Process items in batches with consistent context
|
||||
|
||||
Args:
|
||||
items: List of dictionaries containing data to process
|
||||
system_prompt: System prompt for context
|
||||
template: Template string for formatting requests
|
||||
output_processor: Optional function to process LLM responses
|
||||
|
||||
Returns:
|
||||
List of processed results
|
||||
"""
|
||||
results = []
|
||||
total_items = len(items)
|
||||
|
||||
# Setup progress tracking
|
||||
progress = ProgressBar(
|
||||
total_items,
|
||||
"Processing batches:",
|
||||
"Complete"
|
||||
)
|
||||
if self.config.progress_callback:
|
||||
progress.set_output_callback(self.config.progress_callback)
|
||||
progress.start()
|
||||
|
||||
# Process in batches
|
||||
for start_idx in range(0, total_items, self.config.batch_size):
|
||||
end_idx = min(start_idx + self.config.batch_size, total_items)
|
||||
batch_items = items[start_idx:end_idx]
|
||||
|
||||
# Prepare batch request
|
||||
batch_data = {
|
||||
"items": batch_items,
|
||||
"template": template
|
||||
}
|
||||
request_payload = json.dumps(batch_data)
|
||||
|
||||
# Process batch with retries
|
||||
for attempt in range(self.config.max_retries):
|
||||
try:
|
||||
response = self.llm_service.generate_text(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=request_payload
|
||||
)
|
||||
|
||||
# Parse and process response
|
||||
batch_results = self._process_response(
|
||||
response,
|
||||
output_processor
|
||||
)
|
||||
|
||||
if len(batch_results) != len(batch_items):
|
||||
raise ValueError(
|
||||
"Response count doesn't match input count"
|
||||
)
|
||||
|
||||
results.extend(batch_results)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
if attempt < self.config.max_retries - 1:
|
||||
if self.config.progress_callback:
|
||||
self.config.progress_callback(
|
||||
f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."
|
||||
)
|
||||
time.sleep(self.config.retry_delay)
|
||||
else:
|
||||
if self.config.progress_callback:
|
||||
self.config.progress_callback(
|
||||
f"Error in batch {start_idx}-{end_idx}: {e}"
|
||||
)
|
||||
# On final retry failure, add None results
|
||||
results.extend([None] * len(batch_items))
|
||||
|
||||
# Update progress
|
||||
progress.update(end_idx)
|
||||
|
||||
progress.finish()
|
||||
return results
|
||||
|
||||
def _process_response(
|
||||
self,
|
||||
response: str,
|
||||
output_processor: Optional[Callable] = None
|
||||
) -> List[Any]:
|
||||
"""Process LLM response"""
|
||||
try:
|
||||
# Parse JSON response
|
||||
parsed = json.loads(response)
|
||||
|
||||
# Apply custom processing if provided
|
||||
if output_processor:
|
||||
return [output_processor(item) for item in parsed]
|
||||
return parsed
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Failed to parse LLM response as JSON")
|
||||
|
||||
# Example specialized batch processor for translations
|
||||
class TranslationBatchProcessor(BatchProcessor):
|
||||
"""Specialized batch processor for translations"""
|
||||
|
||||
def translate_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
source_lang: str,
|
||||
target_lang: str
|
||||
) -> List[str]:
|
||||
"""
|
||||
Translate a batch of texts
|
||||
|
||||
Args:
|
||||
texts: List of texts to translate
|
||||
source_lang: Source language code
|
||||
target_lang: Target language code
|
||||
|
||||
Returns:
|
||||
List of translated texts
|
||||
"""
|
||||
# Prepare items
|
||||
items = [{"text": text} for text in texts]
|
||||
|
||||
# Setup prompts
|
||||
system_prompt = (
|
||||
"You are a translator. Translate the provided texts "
|
||||
"maintaining special fields like <> and <#>."
|
||||
)
|
||||
|
||||
template = (
|
||||
"Translate the following texts from {source_lang} to {target_lang}. "
|
||||
"Return translations as a JSON array of strings:"
|
||||
"\n\n{text}"
|
||||
)
|
||||
|
||||
# Process batch
|
||||
results = self.process_batch(
|
||||
items=items,
|
||||
system_prompt=system_prompt,
|
||||
template=template.format(
|
||||
source_lang=source_lang,
|
||||
target_lang=target_lang
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
# Example usage:
|
||||
"""
|
||||
from services.llm.llm_factory import LLMFactory
|
||||
from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor
|
||||
|
||||
# Create LLM service
|
||||
llm_service = LLMFactory.create_service("openai")
|
||||
|
||||
# Setup batch processor with progress callback
|
||||
def progress_callback(message: str):
|
||||
print(message)
|
||||
|
||||
config = BatchConfig(
|
||||
batch_size=20,
|
||||
progress_callback=progress_callback
|
||||
)
|
||||
|
||||
# General batch processor
|
||||
processor = BatchProcessor(llm_service, config)
|
||||
|
||||
# Example batch process for custom task
|
||||
items = [
|
||||
{"text": "Hello", "context": "greeting"},
|
||||
{"text": "Goodbye", "context": "farewell"}
|
||||
]
|
||||
|
||||
system_prompt = "You are a helpful assistant."
|
||||
template = "Process these items considering their context: {items}"
|
||||
|
||||
results = processor.process_batch(
|
||||
items=items,
|
||||
system_prompt=system_prompt,
|
||||
template=template
|
||||
)
|
||||
|
||||
# Example translation batch
|
||||
translator = TranslationBatchProcessor(llm_service, config)
|
||||
texts = ["Hello world", "How are you?"]
|
||||
translations = translator.translate_batch(
|
||||
texts=texts,
|
||||
source_lang="en",
|
||||
target_lang="es"
|
||||
)
|
||||
"""
|
|
@ -0,0 +1,63 @@
|
|||
# services/llm/grok_service.py
|
||||
"""
|
||||
Grok service implementation
|
||||
"""
|
||||
from typing import Dict, List, Optional
|
||||
import json
|
||||
from .base import LLMService
|
||||
from config.api_keys import APIKeyManager
|
||||
|
||||
class GrokService(LLMService):
|
||||
def __init__(self, model: str = "grok-1", temperature: float = 0.3):
|
||||
api_key = APIKeyManager.get_grok_key()
|
||||
if not api_key:
|
||||
raise ValueError("Grok API key not found. Please set up your API keys.")
|
||||
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.temperature = temperature
|
||||
|
||||
def generate_text(self, prompt: str) -> str:
|
||||
"""
|
||||
Generate text using the Grok API
|
||||
TODO: Update this method when Grok API is available
|
||||
"""
|
||||
try:
|
||||
# Placeholder for Grok API implementation
|
||||
# Update this when the API is released
|
||||
raise NotImplementedError("Grok API is not implemented yet")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in Grok API call: {e}")
|
||||
return None
|
||||
|
||||
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||
"""
|
||||
Calculate similarity scores using the Grok API
|
||||
TODO: Update this method when Grok API is available
|
||||
"""
|
||||
try:
|
||||
system_prompt = (
|
||||
"Evaluate the semantic similarity between the following table of pairs of texts "
|
||||
"in json format on a scale from 0 to 1. Return the similarity scores for every "
|
||||
"row in JSON format as a list of numbers, without any additional text or formatting."
|
||||
)
|
||||
|
||||
request_payload = json.dumps(texts_pairs)
|
||||
|
||||
# Placeholder for Grok API implementation
|
||||
# Update this when the API is released
|
||||
raise NotImplementedError("Grok API is not implemented yet")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in Grok similarity calculation: {e}")
|
||||
return None
|
||||
|
||||
# Update config/api_keys.py to include Grok
|
||||
@classmethod
|
||||
def get_grok_key(cls) -> Optional[str]:
|
||||
"""Get Grok API key from environment or stored configuration"""
|
||||
return (
|
||||
os.getenv('GROK_API_KEY') or
|
||||
cls._get_stored_key('grok')
|
||||
)
|
|
@ -0,0 +1,33 @@
|
|||
# services/llm/llm_factory.py
|
||||
"""
|
||||
Factory class for creating LLM services
|
||||
"""
|
||||
from typing import Optional
|
||||
from .openai_service import OpenAIService
|
||||
from .ollama_service import OllamaService
|
||||
from .grok_service import GrokService
|
||||
|
||||
class LLMFactory:
|
||||
"""Factory class for creating LLM service instances"""
|
||||
|
||||
@staticmethod
|
||||
def create_service(service_type: str, **kwargs) -> Optional['LLMService']:
|
||||
"""
|
||||
Create an instance of the specified LLM service
|
||||
|
||||
Args:
|
||||
service_type: Type of LLM service ("openai", "ollama", "grok")
|
||||
**kwargs: Additional arguments for service initialization
|
||||
"""
|
||||
services = {
|
||||
"openai": OpenAIService,
|
||||
"ollama": OllamaService,
|
||||
"grok": GrokService
|
||||
}
|
||||
|
||||
service_class = services.get(service_type.lower())
|
||||
if service_class:
|
||||
return service_class(**kwargs)
|
||||
else:
|
||||
print(f"Unknown service type: {service_type}")
|
||||
return None
|
|
@ -0,0 +1,53 @@
|
|||
# services/llm/ollama_service.py
|
||||
"""
|
||||
Ollama service implementation
|
||||
"""
|
||||
import ollama
|
||||
import json
|
||||
from typing import Dict, List
|
||||
from .base import LLMService
|
||||
|
||||
class OllamaService(LLMService):
|
||||
def __init__(self, model: str = "llama3.1"):
|
||||
self.model = model
|
||||
|
||||
def generate_text(self, prompt: str) -> str:
|
||||
try:
|
||||
response = ollama.generate(
|
||||
model=self.model,
|
||||
prompt=prompt
|
||||
)
|
||||
return response["response"]
|
||||
except Exception as e:
|
||||
print(f"Error in Ollama API call: {e}")
|
||||
return None
|
||||
|
||||
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||
system_prompt = (
|
||||
"Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
|
||||
"Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
|
||||
)
|
||||
|
||||
request_payload = json.dumps(texts_pairs)
|
||||
prompt = f"{system_prompt}\n\n{request_payload}"
|
||||
|
||||
try:
|
||||
response = ollama.generate(
|
||||
model=self.model,
|
||||
prompt=prompt
|
||||
)
|
||||
|
||||
try:
|
||||
scores = json.loads(response["response"].strip())
|
||||
if isinstance(scores, dict) and "similarity_scores" in scores:
|
||||
return scores["similarity_scores"]
|
||||
elif isinstance(scores, list):
|
||||
return scores
|
||||
else:
|
||||
raise ValueError("Unexpected response format")
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Could not decode response as JSON")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in Ollama similarity calculation: {e}")
|
||||
return None
|
|
@ -0,0 +1,69 @@
|
|||
# services/llm/openai_service.py
|
||||
"""
|
||||
OpenAI service implementation
|
||||
"""
|
||||
from openai import OpenAI
|
||||
from typing import Dict, List
|
||||
import json
|
||||
from .base import LLMService
|
||||
from config.api_keys import APIKeyManager
|
||||
|
||||
class OpenAIService(LLMService):
|
||||
def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0.3):
|
||||
api_key = APIKeyManager.get_openai_key()
|
||||
if not api_key:
|
||||
raise ValueError("OpenAI API key not found. Please set up your API keys.")
|
||||
|
||||
self.client = OpenAI(api_key=api_key)
|
||||
self.model = model
|
||||
self.temperature = temperature
|
||||
|
||||
def generate_text(self, prompt: str) -> str:
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=self.temperature,
|
||||
max_tokens=1500
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
except Exception as e:
|
||||
print(f"Error in OpenAI API call: {e}")
|
||||
return None
|
||||
|
||||
def get_similarity_scores(self, texts_pairs: Dict[str, List[str]]) -> List[float]:
|
||||
system_prompt = (
|
||||
"Evaluate the semantic similarity between the following table of pairs of texts in json format on a scale from 0 to 1. "
|
||||
"Return the similarity scores for every row in JSON format as a list of numbers, without any additional text or formatting."
|
||||
)
|
||||
|
||||
request_payload = json.dumps(texts_pairs)
|
||||
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": request_payload}
|
||||
],
|
||||
temperature=self.temperature,
|
||||
max_tokens=1500
|
||||
)
|
||||
|
||||
response_content = response.choices[0].message.content
|
||||
cleaned_response = response_content.strip().strip("'```json").strip("```")
|
||||
|
||||
try:
|
||||
scores = json.loads(cleaned_response)
|
||||
if isinstance(scores, dict) and "similarity_scores" in scores:
|
||||
return scores["similarity_scores"]
|
||||
elif isinstance(scores, list):
|
||||
return scores
|
||||
else:
|
||||
raise ValueError("Unexpected response format")
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Could not decode response as JSON")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error in OpenAI similarity calculation: {e}")
|
||||
return None
|
|
@ -0,0 +1,19 @@
|
|||
# services/translation/base.py
|
||||
"""
|
||||
Base class for translation services
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, List, Dict
|
||||
|
||||
class TranslationService(ABC):
|
||||
"""Abstract base class for translation services"""
|
||||
|
||||
@abstractmethod
|
||||
def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
|
||||
"""Translate a single text"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
|
||||
"""Translate a batch of texts"""
|
||||
pass
|
|
@ -0,0 +1,77 @@
|
|||
# services/translation/google_translate.py
|
||||
"""
|
||||
Google Translation service implementation
|
||||
"""
|
||||
from typing import Optional, List
|
||||
import html
|
||||
from google.cloud import translate_v2 as translate
|
||||
from google.oauth2 import service_account
|
||||
from config.api_keys import APIKeyManager
|
||||
from .base import TranslationService
|
||||
|
||||
class GoogleTranslateService(TranslationService):
|
||||
def __init__(self, credentials_file: Optional[str] = None):
|
||||
"""
|
||||
Initialize Google Translate service
|
||||
|
||||
Args:
|
||||
credentials_file: Path to Google service account credentials JSON file.
|
||||
If None, will use API key from APIKeyManager.
|
||||
"""
|
||||
if credentials_file:
|
||||
# Use service account credentials
|
||||
try:
|
||||
credentials = service_account.Credentials.from_service_account_file(credentials_file)
|
||||
self.client = translate.Client(credentials=credentials)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error initializing Google Translate with credentials: {e}")
|
||||
else:
|
||||
# Use API key
|
||||
api_key = APIKeyManager.get_google_key()
|
||||
if not api_key:
|
||||
raise ValueError("Google API key not found. Please set up your API keys.")
|
||||
self.client = translate.Client()
|
||||
|
||||
def translate_text(self, text: str, target_language: str, source_language: Optional[str] = None) -> str:
|
||||
"""
|
||||
Translate a single text.
|
||||
|
||||
Args:
|
||||
text: Text to translate
|
||||
target_language: Target language code (e.g., 'es' for Spanish)
|
||||
source_language: Source language code. If None, will be auto-detected.
|
||||
|
||||
Returns:
|
||||
Translated text
|
||||
"""
|
||||
try:
|
||||
result = self.client.translate(
|
||||
text,
|
||||
target_language=target_language,
|
||||
source_language=source_language
|
||||
)
|
||||
return html.unescape(result["translatedText"])
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in Google Translate: {e}")
|
||||
|
||||
def translate_batch(self, texts: List[str], target_language: str, source_language: Optional[str] = None) -> List[str]:
|
||||
"""
|
||||
Translate multiple texts in batch.
|
||||
|
||||
Args:
|
||||
texts: List of texts to translate
|
||||
target_language: Target language code (e.g., 'es' for Spanish)
|
||||
source_language: Source language code. If None, will be auto-detected.
|
||||
|
||||
Returns:
|
||||
List of translated texts
|
||||
"""
|
||||
try:
|
||||
results = self.client.translate(
|
||||
texts,
|
||||
target_language=target_language,
|
||||
source_language=source_language
|
||||
)
|
||||
return [html.unescape(result["translatedText"]) for result in results]
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error in Google Translate batch: {e}")
|
|
@ -0,0 +1,32 @@
|
|||
# services/translation/translation_factory.py
|
||||
"""
|
||||
Factory class for creating translation services
|
||||
"""
|
||||
from typing import Optional
|
||||
from .google_translate import GoogleTranslateService
|
||||
|
||||
class TranslationFactory:
|
||||
"""Factory class for creating translation service instances"""
|
||||
|
||||
@staticmethod
|
||||
def create_service(service_type: str, **kwargs) -> Optional['TranslationService']:
|
||||
"""
|
||||
Create an instance of the specified translation service
|
||||
|
||||
Args:
|
||||
service_type: Type of translation service ("google", etc.)
|
||||
**kwargs: Additional arguments for service initialization
|
||||
|
||||
Returns:
|
||||
TranslationService instance or None if service_type is not recognized
|
||||
"""
|
||||
services = {
|
||||
"google": GoogleTranslateService,
|
||||
# Add other translation services here
|
||||
}
|
||||
|
||||
service_class = services.get(service_type.lower())
|
||||
if service_class:
|
||||
return service_class(**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown translation service type: {service_type}")
|
|
@ -0,0 +1,39 @@
|
|||
# utils/file_utils.py
|
||||
"""
|
||||
File handling utilities
|
||||
"""
|
||||
import os
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
import pandas as pd
|
||||
|
||||
def select_file(title="Select file", filetypes=None):
|
||||
if filetypes is None:
|
||||
filetypes = [
|
||||
("Excel files", "*.xlsx;*.xls"),
|
||||
("All files", "*.*")
|
||||
]
|
||||
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
|
||||
file_path = filedialog.askopenfilename(
|
||||
title=title,
|
||||
filetypes=filetypes
|
||||
)
|
||||
|
||||
return file_path if file_path else None
|
||||
|
||||
def select_directory(title="Select directory"):
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
|
||||
dir_path = filedialog.askdirectory(title=title)
|
||||
return dir_path if dir_path else None
|
||||
|
||||
def safe_read_excel(file_path, **kwargs):
|
||||
try:
|
||||
return pd.read_excel(file_path, **kwargs)
|
||||
except Exception as e:
|
||||
print(f"Error reading Excel file: {e}")
|
||||
return None
|
|
@ -0,0 +1,31 @@
|
|||
# utils/logger_utils.py
|
||||
"""
|
||||
Logging configuration and utilities
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
from config.settings import LOG_DIR
|
||||
|
||||
def setup_logger(name, log_file=None):
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
if not logger.handlers:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if log_file:
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
file_handler = logging.FileHandler(
|
||||
os.path.join(LOG_DIR, log_file)
|
||||
)
|
||||
file_handler.setFormatter(
|
||||
logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(
|
||||
logging.Formatter('%(levelname)s: %(message)s')
|
||||
)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
|
@ -0,0 +1,64 @@
|
|||
# utils/logging_manager.py
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
import tkinter as tk
|
||||
from queue import Queue, Empty
|
||||
from .output_redirector import OutputRedirector
|
||||
|
||||
class LoggingManager:
|
||||
def __init__(self, work_dir: str):
|
||||
self.work_dir = work_dir
|
||||
self.log_dir = os.path.join(work_dir, "logs")
|
||||
os.makedirs(self.log_dir, exist_ok=True)
|
||||
|
||||
self.logger = self._setup_logger()
|
||||
self.queue: Optional[Queue] = None
|
||||
self.text_widget: Optional[tk.Text] = None
|
||||
|
||||
def _setup_logger(self) -> logging.Logger:
|
||||
logger = logging.getLogger("app_logger")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# File handler
|
||||
log_file = os.path.join(self.log_dir, f"app_{datetime.now():%Y%m%d}.log")
|
||||
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
||||
file_handler.setLevel(logging.INFO)
|
||||
|
||||
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(file_handler)
|
||||
return logger
|
||||
|
||||
def setup_gui_logging(self, text_widget: tk.Text, queue: Queue):
|
||||
"""Setup logging to GUI text widget"""
|
||||
self.text_widget = text_widget
|
||||
self.queue = queue
|
||||
|
||||
# Add handler for GUI logging
|
||||
gui_handler = logging.StreamHandler(OutputRedirector(queue))
|
||||
gui_handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter("%(message)s")
|
||||
gui_handler.setFormatter(formatter)
|
||||
|
||||
self.logger.addHandler(gui_handler)
|
||||
|
||||
def process_log_queue(self):
|
||||
"""Process pending log messages"""
|
||||
if self.queue and self.text_widget:
|
||||
while True:
|
||||
try:
|
||||
message = self.queue.get_nowait()
|
||||
self.text_widget.insert(tk.END, message)
|
||||
self.text_widget.see(tk.END)
|
||||
self.text_widget.update_idletasks()
|
||||
except Empty:
|
||||
break
|
||||
|
||||
def clear_output(self):
|
||||
"""Clear the text widget"""
|
||||
if self.text_widget:
|
||||
self.text_widget.delete("1.0", tk.END)
|
||||
self.text_widget.update_idletasks()
|
|
@ -0,0 +1,17 @@
|
|||
# utils/output_redirector.py
|
||||
"""
|
||||
Output redirector for capturing stdout/stderr
|
||||
"""
|
||||
import sys
|
||||
from queue import Queue
|
||||
from typing import Optional
|
||||
|
||||
class OutputRedirector:
|
||||
def __init__(self, queue: Queue):
|
||||
self.queue = queue
|
||||
|
||||
def write(self, string: str):
|
||||
self.queue.put(string)
|
||||
|
||||
def flush(self):
|
||||
pass
|
|
@ -0,0 +1,51 @@
|
|||
# utils/progress_bar.py
|
||||
"""
|
||||
Progress bar implementation
|
||||
"""
|
||||
import tkinter as tk
|
||||
from tkinter import ttk
|
||||
from typing import Optional, Callable
|
||||
import sys
|
||||
from queue import Queue
|
||||
import threading
|
||||
|
||||
class ProgressBar:
|
||||
def __init__(self, total: int, prefix: str = "", suffix: str = "", max_points: int = 30):
|
||||
self.total = total
|
||||
self.prefix = prefix
|
||||
self.suffix = suffix
|
||||
self.max_points = max_points
|
||||
self.current = 0
|
||||
self.last_points = 0
|
||||
self.output_callback: Optional[Callable] = None
|
||||
|
||||
def set_output_callback(self, callback: Callable[[str], None]):
|
||||
"""Set callback function for output"""
|
||||
self.output_callback = callback
|
||||
|
||||
def update(self, current: int):
|
||||
self.current = current
|
||||
points = min(int((current / self.total) * self.max_points), self.max_points)
|
||||
|
||||
if points > self.last_points:
|
||||
new_points = points - self.last_points
|
||||
self._write_output("." * new_points)
|
||||
self.last_points = points
|
||||
|
||||
def increment(self):
|
||||
self.update(self.current + 1)
|
||||
|
||||
def finish(self):
|
||||
remaining_points = self.max_points - self.last_points
|
||||
if remaining_points > 0:
|
||||
self._write_output("." * remaining_points)
|
||||
self._write_output(f"] {self.suffix}\n")
|
||||
|
||||
def start(self):
|
||||
self._write_output(f"\r{self.prefix} [")
|
||||
|
||||
def _write_output(self, text: str):
|
||||
if self.output_callback:
|
||||
self.output_callback(text)
|
||||
else:
|
||||
print(text, end="", flush=True)
|
|
@ -0,0 +1,68 @@
|
|||
# utils/script_registry.py
|
||||
from typing import Dict, Callable, List, Optional
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
from pathlib import Path
|
||||
from config.profile_manager import Profile, ProfileManager
|
||||
|
||||
class ScriptRegistry:
|
||||
"""Registry for script operations"""
|
||||
|
||||
def __init__(self):
|
||||
self.operations: Dict[str, Callable] = {}
|
||||
self.descriptions: Dict[str, str] = {}
|
||||
|
||||
def register(self, name: str, operation: Callable, description: str = ""):
|
||||
"""Register a new operation"""
|
||||
self.operations[name] = operation
|
||||
self.descriptions[name] = description
|
||||
|
||||
def auto_discover(self, scripts_dir: str = "scripts"):
|
||||
"""Auto-discover scripts in the scripts directory"""
|
||||
scripts_path = Path(__file__).parent.parent / scripts_dir
|
||||
|
||||
for file in scripts_path.glob("script_*.py"):
|
||||
module_name = f"{scripts_dir}.{file.stem}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
|
||||
# Look for main function and docstring
|
||||
if hasattr(module, 'main'):
|
||||
name = file.stem.replace('script_', '')
|
||||
description = module.__doc__ or ""
|
||||
self.register(name, module.main, description)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error loading script {file}: {e}")
|
||||
|
||||
def get_operations(self) -> List[tuple]:
|
||||
"""Get list of available operations"""
|
||||
return [(name, self.descriptions[name]) for name in self.operations]
|
||||
|
||||
def run_operation(self, name: str, profile: Optional[Profile] = None, **kwargs):
|
||||
"""
|
||||
Run a registered operation
|
||||
|
||||
Args:
|
||||
name: Name of the operation to run
|
||||
profile: Current profile instance (optional)
|
||||
**kwargs: Additional arguments for the operation
|
||||
"""
|
||||
if name in self.operations:
|
||||
# Prepare arguments
|
||||
operation = self.operations[name]
|
||||
sig = inspect.signature(operation)
|
||||
|
||||
# Check if operation accepts profile parameter
|
||||
call_args = {}
|
||||
if 'profile' in sig.parameters:
|
||||
call_args['profile'] = profile
|
||||
|
||||
# Add other kwargs that match the signature
|
||||
for param_name in sig.parameters:
|
||||
if param_name in kwargs:
|
||||
call_args[param_name] = kwargs[param_name]
|
||||
|
||||
return operation(**call_args)
|
||||
raise ValueError(f"Unknown operation: {name}")
|
Loading…
Reference in New Issue