# services/language/language_utils.py """ Utility functions for language detection and validation """ from typing import Dict, Set class LanguageUtils: # Common language codes LANGUAGE_CODES = { 'it': ('Italian', 'it-IT'), 'en': ('English', 'en-GB'), 'pt': ('Portuguese', 'pt-PT'), 'es': ('Spanish', 'es-ES'), 'ru': ('Russian', 'ru-RU'), 'fr': ('French', 'fr-FR'), 'de': ('German', 'de-DE'), 'tr': ('Turkish', 'tr-TR'), } @classmethod def get_language_name(cls, code: str) -> str: """Get full language name from code""" return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0] @classmethod def get_full_code(cls, short_code: str) -> str: """Get full language code (e.g., 'en-GB' from 'en')""" return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1] @classmethod def get_short_code(cls, full_code: str) -> str: """Get short language code (e.g., 'en' from 'en-GB')""" return full_code.split('-')[0] if '-' in full_code else full_code @classmethod def is_valid_language(cls, code: str) -> bool: """Check if a language code is valid""" short_code = cls.get_short_code(code) return short_code in cls.LANGUAGE_CODES @classmethod def get_available_languages(cls) -> Set[str]: """Get set of available language codes""" return set(cls.LANGUAGE_CODES.keys()) # Example usage: """ from services.language.language_factory import LanguageFactory from services.language.language_utils import LanguageUtils # Create language detection service with specific languages allowed_languages = LanguageUtils.get_available_languages() detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages) # Detect language of a text text = "Hello, how are you?" lang, confidence = detector.detect_language(text) print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})") print(f"Confidence: {confidence}") # Detect language of multiple texts texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"] results = detector.detect_batch(texts) for text, (lang, confidence) in zip(texts, results): print(f"Text: {text}") print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})") print(f"Confidence: {confidence}") """