2024-07-30 07:46:33 -03:00
|
|
|
import pandas as pd
|
|
|
|
import os
|
2024-09-27 11:08:13 -03:00
|
|
|
import PyLibrary.funciones_comunes as fc
|
2024-10-12 09:06:22 -03:00
|
|
|
from translation_config import TranslationConfig
|
2024-10-14 10:47:49 -03:00
|
|
|
import langid
|
|
|
|
from openpyxl import load_workbook
|
2024-10-15 10:00:51 -03:00
|
|
|
from openpyxl.styles import PatternFill, Alignment, Font
|
|
|
|
from collections import defaultdict
|
2024-07-30 07:46:33 -03:00
|
|
|
|
2024-10-12 09:06:22 -03:00
|
|
|
# Definir el logger a nivel de módulo
|
|
|
|
logger = None
|
2024-10-08 11:58:04 -03:00
|
|
|
|
2024-10-12 09:06:22 -03:00
|
|
|
|
2024-10-14 10:47:49 -03:00
|
|
|
def configurar_detector_idiomas():
|
|
|
|
codigos_idioma = [code.split("-")[0] for _, code in fc.IDIOMAS.values()]
|
|
|
|
langid.set_languages(codigos_idioma)
|
|
|
|
|
|
|
|
|
|
|
|
def detectar_idioma(texto, tipo_PLC):
|
|
|
|
texto_limpio = fc.limpiar_texto(tipo_PLC, texto)
|
|
|
|
if len(texto_limpio.strip()) < 3: # No detectar idioma en textos muy cortos
|
|
|
|
return "unknown"
|
|
|
|
try:
|
|
|
|
idioma, _ = langid.classify(texto_limpio)
|
|
|
|
return idioma
|
|
|
|
except:
|
|
|
|
return "unknown"
|
|
|
|
|
|
|
|
|
|
|
|
def obtener_nombre_idioma(codigo_corto):
|
|
|
|
for nombre, codigo in fc.IDIOMAS.values():
|
|
|
|
if codigo.startswith(codigo_corto):
|
|
|
|
return nombre
|
|
|
|
return "Desconocido"
|
|
|
|
|
|
|
|
|
2024-10-12 09:06:22 -03:00
|
|
|
def exportar_para_traduccion(config: TranslationConfig):
|
|
|
|
master_path = config.get_master_path()
|
|
|
|
if not os.path.exists(master_path):
|
2024-07-30 07:46:33 -03:00
|
|
|
print("El archivo maestro no existe.")
|
|
|
|
return
|
|
|
|
|
2024-10-14 10:47:49 -03:00
|
|
|
configurar_detector_idiomas()
|
|
|
|
|
2024-10-12 09:06:22 -03:00
|
|
|
df_maestro = fc.read_dataframe_with_cleanup_retries(master_path)
|
2024-07-30 07:46:33 -03:00
|
|
|
|
|
|
|
df_export = pd.DataFrame()
|
2024-10-14 10:47:49 -03:00
|
|
|
primera_columna = df_maestro.columns[0]
|
|
|
|
df_export[primera_columna] = df_maestro[primera_columna]
|
|
|
|
df_export[config.codigo_idioma_seleccionado] = df_maestro[
|
|
|
|
config.codigo_idioma_seleccionado
|
|
|
|
]
|
|
|
|
df_export["Idioma_Detectado"] = ""
|
|
|
|
|
|
|
|
ruta_export = config.get_translate_path()
|
|
|
|
|
|
|
|
with pd.ExcelWriter(ruta_export, engine="openpyxl") as writer:
|
|
|
|
df_export.to_excel(writer, index=False, sheet_name="Sheet1")
|
|
|
|
|
|
|
|
workbook = writer.book
|
|
|
|
worksheet = writer.sheets["Sheet1"]
|
2024-07-30 07:46:33 -03:00
|
|
|
|
2024-10-14 10:47:49 -03:00
|
|
|
wrap_alignment = Alignment(wrap_text=True, vertical="top")
|
|
|
|
for col in ["A", "B"]:
|
|
|
|
for cell in worksheet[col]:
|
|
|
|
cell.alignment = wrap_alignment
|
|
|
|
worksheet.column_dimensions[col].width = 50
|
|
|
|
|
|
|
|
idioma_esperado = fc.idiomas_shortcodefromcode(
|
|
|
|
config.codigo_idioma_seleccionado
|
|
|
|
)
|
|
|
|
fill = PatternFill(start_color="ADD8E6", end_color="ADD8E6", fill_type="solid")
|
2024-10-15 10:00:51 -03:00
|
|
|
bold_font = Font(bold=True)
|
2024-10-14 10:47:49 -03:00
|
|
|
|
|
|
|
total_rows = worksheet.max_row - 1 # Excluimos la fila de encabezado
|
|
|
|
progress_bar = fc.ProgressBar(
|
|
|
|
total_rows, prefix="Procesando filas:", suffix="Completado"
|
|
|
|
)
|
|
|
|
|
2024-10-15 10:00:51 -03:00
|
|
|
print("Iniciando procesamiento de filas...")
|
|
|
|
|
|
|
|
texto_a_filas = defaultdict(list)
|
2024-10-14 10:47:49 -03:00
|
|
|
for row in range(2, worksheet.max_row + 1):
|
|
|
|
texto = worksheet.cell(row=row, column=2).value
|
|
|
|
if texto:
|
2024-10-15 10:00:51 -03:00
|
|
|
texto_limpio = fc.limpiar_texto(config.codigo_tipo_PLC, texto)
|
2024-10-14 10:47:49 -03:00
|
|
|
|
2024-10-15 10:00:51 -03:00
|
|
|
# Solo considerar para duplicados si el texto limpio es igual al original
|
|
|
|
if texto == texto_limpio:
|
|
|
|
texto_a_filas[texto].append(row)
|
2024-10-14 10:47:49 -03:00
|
|
|
|
2024-10-15 10:00:51 -03:00
|
|
|
# Detectar idioma y marcar si es incorrecto
|
2024-10-14 10:47:49 -03:00
|
|
|
idioma_detectado = detectar_idioma(texto, config.codigo_tipo_PLC)
|
|
|
|
if (
|
|
|
|
idioma_detectado != "unknown"
|
|
|
|
and idioma_detectado != idioma_esperado
|
|
|
|
):
|
|
|
|
worksheet.cell(row=row, column=2).fill = fill
|
|
|
|
nombre_idioma = obtener_nombre_idioma(idioma_detectado)
|
|
|
|
worksheet.cell(row=row, column=3).value = nombre_idioma
|
2024-10-15 10:00:51 -03:00
|
|
|
|
2024-10-14 10:47:49 -03:00
|
|
|
progress_bar.increment()
|
|
|
|
|
2024-10-15 10:00:51 -03:00
|
|
|
# Marcar celdas duplicadas en negrita
|
|
|
|
celdas_duplicadas = 0
|
|
|
|
for filas in texto_a_filas.values():
|
|
|
|
if len(filas) > 1:
|
|
|
|
for row in filas:
|
|
|
|
cell = worksheet.cell(row=row, column=2)
|
|
|
|
cell.font = bold_font
|
|
|
|
celdas_duplicadas += len(filas)
|
|
|
|
|
2024-10-14 10:47:49 -03:00
|
|
|
progress_bar.finish()
|
|
|
|
|
|
|
|
print(f"\nArchivo exportado para traducción: {ruta_export}")
|
|
|
|
print("Las celdas con idioma incorrecto han sido marcadas en azul.")
|
|
|
|
print(
|
|
|
|
"Se ha añadido el nombre del idioma detectado cuando es diferente del esperado."
|
|
|
|
)
|
2024-10-15 10:00:51 -03:00
|
|
|
print(
|
|
|
|
f"Se han marcado {celdas_duplicadas} celdas en negrita por tener texto duplicado en la columna del idioma seleccionado."
|
|
|
|
)
|
2024-07-30 07:46:33 -03:00
|
|
|
|
2024-10-12 09:06:22 -03:00
|
|
|
|
|
|
|
def run(config: TranslationConfig):
|
|
|
|
global logger
|
|
|
|
logger = fc.configurar_logger(config.work_dir)
|
2024-10-14 10:47:49 -03:00
|
|
|
script_name = os.path.basename(__file__)
|
|
|
|
print(f"\rIniciando: {script_name}\r")
|
2024-10-12 09:06:22 -03:00
|
|
|
exportar_para_traduccion(config)
|
|
|
|
|
2024-08-01 12:57:04 -03:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-10-12 09:06:22 -03:00
|
|
|
import menu_pasos_traduccion
|
|
|
|
|
|
|
|
menu_pasos_traduccion.main()
|