MenuBase/services/llm/batch_processor.py

# services/llm/batch_processor.py
"""
Batch processing service for LLM operations
"""
from typing import List, Dict, Any, Optional, Callable
import json
from dataclasses import dataclass
import time
from .base import LLMService
from utils.progress_bar import ProgressBar

@dataclass
class BatchConfig:
    """Configuration for batch processing"""
    batch_size: int = 20
    max_retries: int = 3
    retry_delay: int = 3
    progress_callback: Optional[Callable[[str], None]] = None

class BatchProcessor:
    """
    Handles batch processing for LLM operations
    """
    def __init__(
        self,
        llm_service: LLMService,
        config: Optional[BatchConfig] = None
    ):
        self.llm_service = llm_service
        self.config = config or BatchConfig()

    def process_batch(
        self,
        items: List[Dict[str, Any]],
        system_prompt: str,
        template: str,
        output_processor: Optional[Callable] = None
    ) -> List[Any]:
        """
        Process items in batches with consistent context
        
        Args:
            items: List of dictionaries containing data to process
            system_prompt: System prompt for context
            template: Template string for formatting requests
            output_processor: Optional function to process LLM responses
            
        Returns:
            List of processed results
        """
        results = []
        total_items = len(items)
        
        # Setup progress tracking
        progress = ProgressBar(
            total_items,
            "Processing batches:",
            "Complete"
        )
        if self.config.progress_callback:
            progress.set_output_callback(self.config.progress_callback)
        progress.start()
        
        # Process in batches
        for start_idx in range(0, total_items, self.config.batch_size):
            end_idx = min(start_idx + self.config.batch_size, total_items)
            batch_items = items[start_idx:end_idx]
            
            # Prepare batch request
            batch_data = {
                "items": batch_items,
                "template": template
            }
            request_payload = json.dumps(batch_data)
            
            # Process batch with retries
            for attempt in range(self.config.max_retries):
                try:
                    response = self.llm_service.generate_text(
                        system_prompt=system_prompt,
                        user_prompt=request_payload
                    )
                    
                    # Parse and process response
                    batch_results = self._process_response(
                        response,
                        output_processor
                    )
                    
                    if len(batch_results) != len(batch_items):
                        raise ValueError(
                            "Response count doesn't match input count"
                        )
                    
                    results.extend(batch_results)
                    break
                    
                except Exception as e:
                    if attempt < self.config.max_retries - 1:
                        if self.config.progress_callback:
                            self.config.progress_callback(
                                f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."
                            )
                        time.sleep(self.config.retry_delay)
                    else:
                        if self.config.progress_callback:
                            self.config.progress_callback(
                                f"Error in batch {start_idx}-{end_idx}: {e}"
                            )
                        # On final retry failure, add None results
                        results.extend([None] * len(batch_items))
            
            # Update progress
            progress.update(end_idx)
        
        progress.finish()
        return results

    def _process_response(
        self,
        response: str,
        output_processor: Optional[Callable] = None
    ) -> List[Any]:
        """Process LLM response"""
        try:
            # Parse JSON response
            parsed = json.loads(response)
            
            # Apply custom processing if provided
            if output_processor:
                return [output_processor(item) for item in parsed]
            return parsed
            
        except json.JSONDecodeError:
            raise ValueError("Failed to parse LLM response as JSON")

# Example specialized batch processor for translations
class TranslationBatchProcessor(BatchProcessor):
    """Specialized batch processor for translations"""
    
    def translate_batch(
        self,
        texts: List[str],
        source_lang: str,
        target_lang: str
    ) -> List[str]:
        """
        Translate a batch of texts
        
        Args:
            texts: List of texts to translate
            source_lang: Source language code
            target_lang: Target language code
            
        Returns:
            List of translated texts
        """
        # Prepare items
        items = [{"text": text} for text in texts]
        
        # Setup prompts
        system_prompt = (
            "You are a translator. Translate the provided texts "
            "maintaining special fields like <> and <#>."
        )
        
        template = (
            "Translate the following texts from {source_lang} to {target_lang}. "
            "Return translations as a JSON array of strings:"
            "\n\n{text}"
        )
        
        # Process batch
        results = self.process_batch(
            items=items,
            system_prompt=system_prompt,
            template=template.format(
                source_lang=source_lang,
                target_lang=target_lang
            )
        )
        
        return results

# Example usage:
"""
from services.llm.llm_factory import LLMFactory
from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor

# Create LLM service
llm_service = LLMFactory.create_service("openai")

# Setup batch processor with progress callback
def progress_callback(message: str):
    print(message)

config = BatchConfig(
    batch_size=20,
    progress_callback=progress_callback
)

# General batch processor
processor = BatchProcessor(llm_service, config)

# Example batch process for custom task
items = [
    {"text": "Hello", "context": "greeting"},
    {"text": "Goodbye", "context": "farewell"}
]

system_prompt = "You are a helpful assistant."
template = "Process these items considering their context: {items}"

results = processor.process_batch(
    items=items,
    system_prompt=system_prompt,
    template=template
)

# Example translation batch
translator = TranslationBatchProcessor(llm_service, config)
texts = ["Hello world", "How are you?"]
translations = translator.translate_batch(
    texts=texts,
    source_lang="en",
    target_lang="es"
)
"""
Original 2025-02-06 10:11:57 -03:00			`# services/llm/batch_processor.py`
			`"""`
			`Batch processing service for LLM operations`
			`"""`
			`from typing import List, Dict, Any, Optional, Callable`
			`import json`
			`from dataclasses import dataclass`
			`import time`
			`from .base import LLMService`
			`from utils.progress_bar import ProgressBar`

			`@dataclass`
			`class BatchConfig:`
			`"""Configuration for batch processing"""`
			`batch_size: int = 20`
			`max_retries: int = 3`
			`retry_delay: int = 3`
			`progress_callback: Optional[Callable[[str], None]] = None`

			`class BatchProcessor:`
			`"""`
			`Handles batch processing for LLM operations`
			`"""`
			`def __init__(`
			`self,`
			`llm_service: LLMService,`
			`config: Optional[BatchConfig] = None`
			`):`
			`self.llm_service = llm_service`
			`self.config = config or BatchConfig()`

			`def process_batch(`
			`self,`
			`items: List[Dict[str, Any]],`
			`system_prompt: str,`
			`template: str,`
			`output_processor: Optional[Callable] = None`
			`) -> List[Any]:`
			`"""`
			`Process items in batches with consistent context`

			`Args:`
			`items: List of dictionaries containing data to process`
			`system_prompt: System prompt for context`
			`template: Template string for formatting requests`
			`output_processor: Optional function to process LLM responses`

			`Returns:`
			`List of processed results`
			`"""`
			`results = []`
			`total_items = len(items)`

			`# Setup progress tracking`
			`progress = ProgressBar(`
			`total_items,`
			`"Processing batches:",`
			`"Complete"`
			`)`
			`if self.config.progress_callback:`
			`progress.set_output_callback(self.config.progress_callback)`
			`progress.start()`

			`# Process in batches`
			`for start_idx in range(0, total_items, self.config.batch_size):`
			`end_idx = min(start_idx + self.config.batch_size, total_items)`
			`batch_items = items[start_idx:end_idx]`

			`# Prepare batch request`
			`batch_data = {`
			`"items": batch_items,`
			`"template": template`
			`}`
			`request_payload = json.dumps(batch_data)`

			`# Process batch with retries`
			`for attempt in range(self.config.max_retries):`
			`try:`
			`response = self.llm_service.generate_text(`
			`system_prompt=system_prompt,`
			`user_prompt=request_payload`
			`)`

			`# Parse and process response`
			`batch_results = self._process_response(`
			`response,`
			`output_processor`
			`)`

			`if len(batch_results) != len(batch_items):`
			`raise ValueError(`
			`"Response count doesn't match input count"`
			`)`

			`results.extend(batch_results)`
			`break`

			`except Exception as e:`
			`if attempt < self.config.max_retries - 1:`
			`if self.config.progress_callback:`
			`self.config.progress_callback(`
			`f"Error in batch {start_idx}-{end_idx}: {e}. Retrying..."`
			`)`
			`time.sleep(self.config.retry_delay)`
			`else:`
			`if self.config.progress_callback:`
			`self.config.progress_callback(`
			`f"Error in batch {start_idx}-{end_idx}: {e}"`
			`)`
			`# On final retry failure, add None results`
			`results.extend([None] * len(batch_items))`

			`# Update progress`
			`progress.update(end_idx)`

			`progress.finish()`
			`return results`

			`def _process_response(`
			`self,`
			`response: str,`
			`output_processor: Optional[Callable] = None`
			`) -> List[Any]:`
			`"""Process LLM response"""`
			`try:`
			`# Parse JSON response`
			`parsed = json.loads(response)`

			`# Apply custom processing if provided`
			`if output_processor:`
			`return [output_processor(item) for item in parsed]`
			`return parsed`

			`except json.JSONDecodeError:`
			`raise ValueError("Failed to parse LLM response as JSON")`

			`# Example specialized batch processor for translations`
			`class TranslationBatchProcessor(BatchProcessor):`
			`"""Specialized batch processor for translations"""`

			`def translate_batch(`
			`self,`
			`texts: List[str],`
			`source_lang: str,`
			`target_lang: str`
			`) -> List[str]:`
			`"""`
			`Translate a batch of texts`

			`Args:`
			`texts: List of texts to translate`
			`source_lang: Source language code`
			`target_lang: Target language code`

			`Returns:`
			`List of translated texts`
			`"""`
			`# Prepare items`
			`items = [{"text": text} for text in texts]`

			`# Setup prompts`
			`system_prompt = (`
			`"You are a translator. Translate the provided texts "`
			`"maintaining special fields like <> and <#>."`
			`)`

			`template = (`
			`"Translate the following texts from {source_lang} to {target_lang}. "`
			`"Return translations as a JSON array of strings:"`
			`"\n\n{text}"`
			`)`

			`# Process batch`
			`results = self.process_batch(`
			`items=items,`
			`system_prompt=system_prompt,`
			`template=template.format(`
			`source_lang=source_lang,`
			`target_lang=target_lang`
			`)`
			`)`

			`return results`

			`# Example usage:`
			`"""`
			`from services.llm.llm_factory import LLMFactory`
			`from services.llm.batch_processor import BatchProcessor, BatchConfig, TranslationBatchProcessor`

			`# Create LLM service`
			`llm_service = LLMFactory.create_service("openai")`

			`# Setup batch processor with progress callback`
			`def progress_callback(message: str):`
			`print(message)`

			`config = BatchConfig(`
			`batch_size=20,`
			`progress_callback=progress_callback`
			`)`

			`# General batch processor`
			`processor = BatchProcessor(llm_service, config)`

			`# Example batch process for custom task`
			`items = [`
			`{"text": "Hello", "context": "greeting"},`
			`{"text": "Goodbye", "context": "farewell"}`
			`]`

			`system_prompt = "You are a helpful assistant."`
			`template = "Process these items considering their context: {items}"`

			`results = processor.process_batch(`
			`items=items,`
			`system_prompt=system_prompt,`
			`template=template`
			`)`

			`# Example translation batch`
			`translator = TranslationBatchProcessor(llm_service, config)`
			`texts = ["Hello world", "How are you?"]`
			`translations = translator.translate_batch(`
			`texts=texts,`
			`source_lang="en",`
			`target_lang="es"`
			`)`
			`"""`