# services/language/langid_service.py """ Language detection service using langid """ from typing import List, Tuple, Optional, Set import langid from .base import LanguageDetectionService class LangIdService(LanguageDetectionService): def __init__(self, allowed_languages: Optional[Set[str]] = None): """ Initialize langid service Args: allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'}) If None, all languages supported by langid will be allowed """ if allowed_languages: langid.set_languages(list(allowed_languages)) self.allowed_languages = allowed_languages def detect_language(self, text: str) -> Tuple[str, float]: """ Detect language of a text using langid Args: text: Text to analyze Returns: Tuple of (language_code, confidence_score) """ try: if not text or len(text.strip()) < 3: return ("unknown", 0.0) lang, score = langid.classify(text.strip()) return (lang, score) except Exception as e: print(f"Error in language detection: {e}") return ("unknown", 0.0) def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]: """ Detect language of multiple texts Args: texts: List of texts to analyze Returns: List of tuples (language_code, confidence_score) """ return [self.detect_language(text) for text in texts]