68 lines
2.3 KiB
Python
68 lines
2.3 KiB
Python
|
# services/language/language_utils.py
|
||
|
"""
|
||
|
Utility functions for language detection and validation
|
||
|
"""
|
||
|
from typing import Dict, Set
|
||
|
|
||
|
class LanguageUtils:
|
||
|
# Common language codes
|
||
|
LANGUAGE_CODES = {
|
||
|
'it': ('Italian', 'it-IT'),
|
||
|
'en': ('English', 'en-GB'),
|
||
|
'pt': ('Portuguese', 'pt-PT'),
|
||
|
'es': ('Spanish', 'es-ES'),
|
||
|
'ru': ('Russian', 'ru-RU'),
|
||
|
'fr': ('French', 'fr-FR'),
|
||
|
'de': ('German', 'de-DE'),
|
||
|
'tr': ('Turkish', 'tr-TR'),
|
||
|
}
|
||
|
|
||
|
@classmethod
|
||
|
def get_language_name(cls, code: str) -> str:
|
||
|
"""Get full language name from code"""
|
||
|
return cls.LANGUAGE_CODES.get(code, ('Unknown', ''))[0]
|
||
|
|
||
|
@classmethod
|
||
|
def get_full_code(cls, short_code: str) -> str:
|
||
|
"""Get full language code (e.g., 'en-GB' from 'en')"""
|
||
|
return cls.LANGUAGE_CODES.get(short_code, ('Unknown', 'unknown'))[1]
|
||
|
|
||
|
@classmethod
|
||
|
def get_short_code(cls, full_code: str) -> str:
|
||
|
"""Get short language code (e.g., 'en' from 'en-GB')"""
|
||
|
return full_code.split('-')[0] if '-' in full_code else full_code
|
||
|
|
||
|
@classmethod
|
||
|
def is_valid_language(cls, code: str) -> bool:
|
||
|
"""Check if a language code is valid"""
|
||
|
short_code = cls.get_short_code(code)
|
||
|
return short_code in cls.LANGUAGE_CODES
|
||
|
|
||
|
@classmethod
|
||
|
def get_available_languages(cls) -> Set[str]:
|
||
|
"""Get set of available language codes"""
|
||
|
return set(cls.LANGUAGE_CODES.keys())
|
||
|
|
||
|
# Example usage:
|
||
|
"""
|
||
|
from services.language.language_factory import LanguageFactory
|
||
|
from services.language.language_utils import LanguageUtils
|
||
|
|
||
|
# Create language detection service with specific languages
|
||
|
allowed_languages = LanguageUtils.get_available_languages()
|
||
|
detector = LanguageFactory.create_service("langid", allowed_languages=allowed_languages)
|
||
|
|
||
|
# Detect language of a text
|
||
|
text = "Hello, how are you?"
|
||
|
lang, confidence = detector.detect_language(text)
|
||
|
print(f"Detected language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||
|
print(f"Confidence: {confidence}")
|
||
|
|
||
|
# Detect language of multiple texts
|
||
|
texts = ["Hello, world!", "Hola mundo", "Bonjour le monde"]
|
||
|
results = detector.detect_batch(texts)
|
||
|
for text, (lang, confidence) in zip(texts, results):
|
||
|
print(f"Text: {text}")
|
||
|
print(f"Language: {LanguageUtils.get_language_name(lang)} ({lang})")
|
||
|
print(f"Confidence: {confidence}")
|
||
|
"""
|