MenuBase/services/language/langid_service.py

52 lines
1.6 KiB
Python
Raw Normal View History

2025-02-06 10:11:57 -03:00
# services/language/langid_service.py
"""
Language detection service using langid
"""
from typing import List, Tuple, Optional, Set
import langid
from .base import LanguageDetectionService
class LangIdService(LanguageDetectionService):
def __init__(self, allowed_languages: Optional[Set[str]] = None):
"""
Initialize langid service
Args:
allowed_languages: Set of allowed language codes (e.g., {'en', 'es', 'fr'})
If None, all languages supported by langid will be allowed
"""
if allowed_languages:
langid.set_languages(list(allowed_languages))
self.allowed_languages = allowed_languages
def detect_language(self, text: str) -> Tuple[str, float]:
"""
Detect language of a text using langid
Args:
text: Text to analyze
Returns:
Tuple of (language_code, confidence_score)
"""
try:
if not text or len(text.strip()) < 3:
return ("unknown", 0.0)
lang, score = langid.classify(text.strip())
return (lang, score)
except Exception as e:
print(f"Error in language detection: {e}")
return ("unknown", 0.0)
def detect_batch(self, texts: List[str]) -> List[Tuple[str, float]]:
"""
Detect language of multiple texts
Args:
texts: List of texts to analyze
Returns:
List of tuples (language_code, confidence_score)
"""
return [self.detect_language(text) for text in texts]