Agregado segunda traduccion de Google para control
This commit is contained in:
parent
8994e2177c
commit
bed4ac84e6
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
||||||
|
|
||||||
|
def google_api_key():
|
||||||
|
return 'AIzaSyB1Yk1faZjLBQ5JjEWuRFQ2_XdRSB9aH7A'
|
|
@ -1,3 +1,3 @@
|
||||||
# Configura tu clave API de OpenAI
|
# Configura tu clave API de OpenAI
|
||||||
def api_key():
|
def openai_api_key():
|
||||||
return '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'
|
return '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'
|
|
@ -0,0 +1,13 @@
|
||||||
|
{
|
||||||
|
"type": "service_account",
|
||||||
|
"project_id": "translate-431108",
|
||||||
|
"private_key_id": "020c17463fbb2877ec3bfee0ebfe56873f85c40f",
|
||||||
|
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAGI6iumWcM3dK\nySVqPgO9kxYhy+FUy4D455cHLNAIsigIIA0HNhcRnEUQTxoMtcMYHNCk7fxlbNNp\nvNyWiNs0LAFbIrW+ahIOqiWD2DulKisw8fxPXJsoyoDbReAI1i9rtOzHnxLP7yvs\nPxxEB6EabTTGYlAf+BWgLZ7aNdiRqTKQDkAGLWfe+5yq2G3m8iVTpafsteovm/em\n//8UJojrm/GAkRnd8CIN+4MmsFdUT+jvEHz0KVFqx0qHymkYAA3+jHrh73qsqLsZ\nfiSuzLmcYoFTchuoIlO7I31btGxUQpAdRVzJ9HMyDrSJczplQPwDrL3KpCMjEJ/N\nWpBGI3y7AgMBAAECggEAFWRGsgwZglEK5kr1vdnlFxM494+EQCn0wAMjmEV0b0Yj\nsMVHniIhudt8p2nKUq2vvvt/0qC7/DF9GOfSwCLCbRIUyT4uyLh4L/uLokiOBSu9\nKT4GunHAx5nxdTmtTrPyDkeqHJAe4LACOf9MhFjofGW6kaMOHYPw0z3GqbHYXwlW\nUKdwBg8nCOylj5o6L+7yzdoMGI7eg0UmS9xS24D3RpkV15pntV/9gPDVBn4ARQK6\nz9oft76xOmaifAaYj/cgjcmXpU0y+tUCxG+Uyv6EzeOTsbD6Ux/sfWUJKkUqJw2l\nEy+b2NJwpj+VpQY+2BRe2DA7YZbJOi9BYUNR/k6wiQKBgQD4cFnHh1FevxrkiW5k\nLqijP0ndeiombJwNzmpdb3MJCCK+HTJ5zemMoOMMzfVEzIYp1GmGXzjQBkenK2Qg\n9yuoxkNndaKwx7YBw2EQnrz15ha9Frw/qeZouwwnEQwg8aFvylGu++4EJI/pXCJb\nRX5vzbqL5ujyP4Ri18YLRjaqOQKBgQDF8Tl6Qt8Dj+QL2G/9rsbERYTNyaxg1T7w\nbzJF7ZkWlxF5MPRyb0tnXcFHYl7H2bYNDIGw7cCktk88R9jmS6WFDjYAg6Y7U89u\nxo2oI5C4uXUDuTeTHE3bIl3bnxUu150MBSxW3L3fHjsLrKx7BoIqfThwyhnz8W1l\npfiWCXWukwKBgFbY/6mOllVmvQHzQRHoda9Lqoiv8hgok40IAYrUMQSRQYPwNPr7\njo9IltadEQaR8Lyispj0pKmj6Fx6ALC9dxey51E9gDgIOIn5QO9MboYPfxdu4TPo\nMtGdwavOe8zwaxznPxLKovPzzBMp0CDT9FUlN/c8K2az6ZWcFHFQhD+RAoGAXa5a\nHqQRelUagASylCSoy/F+9gpRvA6Q2Cg4aeIWhxt+WJYzywkjbctwk0FaTEmcoLZf\nlwYfG5VxEL2MSQpaTwu3wSyNexyiWvI7zuzuLI8Rc26wf75wlprAQxYaZ24CVNMJ\n9h5I+pULKu5RP3SIHGXhVXhBKjQEK4yOhyv492MCgYEAoraYhhHBL2jSVGIqgwn/\nkOGcBpgl4QinJfm0UPwvvwkx6tAJt9Ta3RH1XsVb55dVSgeAWgve0z5b8TpCg+UI\nT2/Y7jvAGRI9TMlEGpEI2l0vBx82H/fyjvseg34O0mOTaKfhUDv8v627LqlWwbjG\nwQy/HXN7M5OFm8wxzJophoY=\n-----END PRIVATE KEY-----\n",
|
||||||
|
"client_email": "tranlate@translate-431108.iam.gserviceaccount.com",
|
||||||
|
"client_id": "118043735176235215029",
|
||||||
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||||
|
"token_uri": "https://oauth2.googleapis.com/token",
|
||||||
|
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||||
|
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/tranlate%40translate-431108.iam.gserviceaccount.com",
|
||||||
|
"universe_domain": "googleapis.com"
|
||||||
|
}
|
|
@ -3,12 +3,17 @@ from openai import OpenAI
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from openai_api_key import api_key
|
from openai_api_key import openai_api_key
|
||||||
|
from google_api_key import google_api_key
|
||||||
from x2_master_export2translate import transformar_texto
|
from x2_master_export2translate import transformar_texto
|
||||||
import ollama
|
import ollama
|
||||||
import json
|
import json
|
||||||
|
from google.cloud import translate_v2 as translate
|
||||||
|
from google.oauth2 import service_account
|
||||||
|
import html
|
||||||
|
|
||||||
client = OpenAI(api_key=api_key())
|
client = OpenAI(api_key=openai_api_key())
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS ="translate-431108-020c17463fbb.json"
|
||||||
|
|
||||||
# Diccionario de idiomas
|
# Diccionario de idiomas
|
||||||
IDIOMAS = {
|
IDIOMAS = {
|
||||||
|
@ -20,10 +25,9 @@ IDIOMAS = {
|
||||||
6: ("German", "de-DE"),
|
6: ("German", "de-DE"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def configurar_logger():
|
def configurar_logger():
|
||||||
logger = logging.getLogger("translate_logger")
|
logger = logging.getLogger("translate_logger")
|
||||||
logger.setLevel(logging.DEBUG) # Cambiado a DEBUG para más información
|
logger.setLevel(logging.DEBUG)
|
||||||
os.makedirs(".\\data", exist_ok=True)
|
os.makedirs(".\\data", exist_ok=True)
|
||||||
fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8")
|
fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8")
|
||||||
fh.setLevel(logging.DEBUG)
|
fh.setLevel(logging.DEBUG)
|
||||||
|
@ -32,62 +36,50 @@ def configurar_logger():
|
||||||
logger.addHandler(fh)
|
logger.addHandler(fh)
|
||||||
return logger
|
return logger
|
||||||
|
|
||||||
|
def init_google_translate_client():
|
||||||
|
if os.path.exists(GOOGLE_APPLICATION_CREDENTIALS):
|
||||||
|
# Usar credenciales de cuenta de servicio
|
||||||
|
credentials = service_account.Credentials.from_service_account_file(
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS
|
||||||
|
)
|
||||||
|
return translate.Client(credentials=credentials)
|
||||||
|
else:
|
||||||
|
raise ValueError("No se han proporcionado credenciales válidas para Google Translate")
|
||||||
|
|
||||||
|
google_translate_client = init_google_translate_client()
|
||||||
|
|
||||||
|
def google_translate(text, target_language):
|
||||||
|
result = google_translate_client.translate(text, target_language=target_language)
|
||||||
|
translated_text = result['translatedText']
|
||||||
|
return html.unescape(translated_text)
|
||||||
|
|
||||||
logger = configurar_logger()
|
logger = configurar_logger()
|
||||||
|
|
||||||
|
|
||||||
def mostrar_idiomas():
|
def mostrar_idiomas():
|
||||||
print("Selecciona el idioma de destino:")
|
print("Selecciona el idioma de destino:")
|
||||||
for numero, (nombre, _) in IDIOMAS.items():
|
for numero, (nombre, _) in IDIOMAS.items():
|
||||||
print(f"{numero}: {nombre}")
|
print(f"{numero}: {nombre}")
|
||||||
|
|
||||||
|
|
||||||
def translate_text(text, source_lang, target_lang):
|
|
||||||
logger.info(
|
|
||||||
f"Solicitando traducción de {source_lang} a {target_lang} para el texto: {text}"
|
|
||||||
)
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": f"You are a translator."},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": f"Translate the following text from {source_lang} to {target_lang} while preserving special fields like <> and <#>. This texts are for an HMI industrial machine: {text}",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
max_tokens=150,
|
|
||||||
temperature=0.3,
|
|
||||||
)
|
|
||||||
translated_text = response.choices[0].message.content.strip()
|
|
||||||
logger.info(f"Respuesta recibida: {translated_text}")
|
|
||||||
return translated_text
|
|
||||||
|
|
||||||
def read_system_prompt():
|
def read_system_prompt():
|
||||||
try:
|
try:
|
||||||
with open("/data/system_prompt.txt", "r", encoding="utf-8") as file:
|
with open(".\\data\\system_prompt.txt", "r", encoding="utf-8") as file:
|
||||||
return file.read().strip()
|
return file.read().strip()
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.")
|
logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.")
|
||||||
return "You are a translator."
|
return "You are a translator."
|
||||||
|
|
||||||
|
def translate_batch_openai(texts_dict, source_lang, target_lang):
|
||||||
def translate_batch_openai(batch_texts, source_lang, target_lang):
|
|
||||||
# Aquí se asume que esta función maneja una lista de textos y devuelve una lista de traducciones
|
|
||||||
translations = []
|
|
||||||
for text in batch_texts:
|
|
||||||
translation = translate_text(text, source_lang, target_lang)
|
|
||||||
translations.append(translation)
|
|
||||||
return translations
|
|
||||||
|
|
||||||
def translate_batch_openai(texts, source_lang, target_lang):
|
|
||||||
joined_text = "\n".join(texts)
|
|
||||||
system_prompt = read_system_prompt()
|
system_prompt = read_system_prompt()
|
||||||
|
texts_list = list(texts_dict.values())
|
||||||
|
joined_text = "\n".join(texts_list)
|
||||||
|
|
||||||
|
request_payload = json.dumps({"texts": texts_list, "source_lang": source_lang, "target_lang": target_lang})
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{joined_text}"
|
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{request_payload}"
|
||||||
)
|
)
|
||||||
request_payload = json.dumps({"texts": texts, "source_lang": source_lang, "target_lang": target_lang})
|
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model= "gpt-4o-mini", # "gpt-3.5-turbo",
|
model="gpt-4o-mini",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": f"You are a translator.{system_prompt}."},
|
{"role": "system", "content": f"You are a translator.{system_prompt}."},
|
||||||
{"role": "user", "content": request_payload}
|
{"role": "user", "content": request_payload}
|
||||||
|
@ -98,11 +90,13 @@ def translate_batch_openai(texts, source_lang, target_lang):
|
||||||
response_payload = json.loads(response.choices[0].message.content.strip())
|
response_payload = json.loads(response.choices[0].message.content.strip())
|
||||||
translations = response_payload.get("texts", [])
|
translations = response_payload.get("texts", [])
|
||||||
logger.info(f"Respuestas recibidas:\n{translations}")
|
logger.info(f"Respuestas recibidas:\n{translations}")
|
||||||
if len(translations) != len(texts):
|
|
||||||
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
|
|
||||||
return translations
|
|
||||||
|
|
||||||
def translate_batch(texts, source_lang, target_lang):
|
if len(translations) != len(texts_list):
|
||||||
|
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
|
||||||
|
|
||||||
|
return dict(zip(texts_dict.keys(), translations))
|
||||||
|
|
||||||
|
def translate_batch_ollama(texts, source_lang, target_lang):
|
||||||
joined_text = "\n".join(texts)
|
joined_text = "\n".join(texts)
|
||||||
system_prompt = read_system_prompt()
|
system_prompt = read_system_prompt()
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@ -125,72 +119,79 @@ def texto_requiere_traduccion(texto):
|
||||||
)
|
)
|
||||||
return requiere_traduccion
|
return requiere_traduccion
|
||||||
|
|
||||||
|
def main(file_path, target_lang_code, target_lang, traducir_todo, batch_size=10):
|
||||||
def main(file_path, target_lang_code,target_lang, traducir_todo, batch_size=10):
|
|
||||||
df = pd.read_excel(file_path)
|
df = pd.read_excel(file_path)
|
||||||
source_col = "it-IT"
|
source_col = "it-IT"
|
||||||
source_translated_col = target_lang_code
|
source_translated_col = target_lang_code
|
||||||
target_col = f"{target_lang_code} Translated"
|
target_col = f"{target_lang_code} Translated"
|
||||||
|
check_translate_col = f"{target_lang_code} CheckTranslate"
|
||||||
|
|
||||||
# Asegurarse de que la columna de destino existe
|
# Asegurarse de que la columna de destino existe
|
||||||
if target_col not in df.columns:
|
if target_col not in df.columns:
|
||||||
df[target_col] = None
|
df[target_col] = None
|
||||||
|
if check_translate_col not in df.columns:
|
||||||
|
df[check_translate_col] = None
|
||||||
|
|
||||||
texts_to_translate = []
|
texts_to_translate = {}
|
||||||
indices_to_translate = []
|
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
source_text = str(row[source_col])
|
source_text = str(row[source_col])
|
||||||
source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else ""
|
source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else ""
|
||||||
processed_text = transformar_texto(source_text)
|
processed_text = transformar_texto(source_text)
|
||||||
|
|
||||||
if traducir_todo:
|
if traducir_todo:
|
||||||
# Traducir todas las celdas del idioma de destino
|
|
||||||
if texto_requiere_traduccion(processed_text):
|
if texto_requiere_traduccion(processed_text):
|
||||||
texts_to_translate.append(processed_text)
|
texts_to_translate[source_text] = processed_text
|
||||||
indices_to_translate.append(index)
|
|
||||||
else:
|
else:
|
||||||
# Traducir solo las celdas vacías en el idioma de destino original
|
|
||||||
if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "":
|
if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "":
|
||||||
if texto_requiere_traduccion(processed_text):
|
if texto_requiere_traduccion(processed_text):
|
||||||
texts_to_translate.append(processed_text)
|
texts_to_translate[source_text] = processed_text
|
||||||
indices_to_translate.append(index)
|
|
||||||
|
|
||||||
num_texts = len(texts_to_translate)
|
num_texts = len(texts_to_translate)
|
||||||
logger.info(f"Número total de textos a traducir: {num_texts}")
|
logger.info(f"Número total de textos a traducir: {num_texts}")
|
||||||
print(f"Número total de textos a traducir: {num_texts}")
|
print(f"Número total de textos a traducir: {num_texts}")
|
||||||
|
|
||||||
translations = []
|
translations = {}
|
||||||
for start_idx in range(0, num_texts, batch_size):
|
for start_idx in range(0, num_texts, batch_size):
|
||||||
end_idx = min(start_idx + batch_size, num_texts)
|
end_idx = min(start_idx + batch_size, num_texts)
|
||||||
batch_texts = texts_to_translate[start_idx:end_idx]
|
batch_texts = dict(list(texts_to_translate.items())[start_idx:end_idx])
|
||||||
logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.")
|
logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.")
|
||||||
print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.")
|
print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.")
|
||||||
|
|
||||||
|
retries = 2 # Número de intentos totales (1 inicial + 1 reintento)
|
||||||
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang_code)
|
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang)
|
||||||
translations.extend(batch_translations)
|
translations.update(batch_translations)
|
||||||
|
break # Si la traducción es exitosa, salimos del bucle de reintentos
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
if attempt < retries - 1: # Si no es el último intento
|
||||||
print(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
logger.warning(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
|
||||||
continue
|
print(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
|
||||||
|
else: # Si es el último intento
|
||||||
|
logger.error(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||||
|
print(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||||
|
|
||||||
logger.info(f"Número total de traducciones recibidas: {len(translations)}")
|
logger.info(f"Número total de traducciones recibidas: {len(translations)}")
|
||||||
|
|
||||||
if len(translations) != len(indices_to_translate):
|
# Actualizar el DataFrame con las traducciones
|
||||||
logger.warning(f"Desajuste entre el número de traducciones ({len(translations)}) y el número de índices ({len(indices_to_translate)})")
|
for index, row in df.iterrows():
|
||||||
|
source_text = str(row[source_col])
|
||||||
for i, index in enumerate(indices_to_translate):
|
if source_text in translations:
|
||||||
if i < len(translations):
|
df.at[index, target_col] = translations[source_text]
|
||||||
df.at[index, target_col] = translations[i]
|
# Realizar la traducción de verificación con Google Translate
|
||||||
else:
|
try:
|
||||||
logger.error(f"No hay traducción disponible para el índice {index}")
|
google_translation = google_translate(translations[source_text], 'it')
|
||||||
|
df.at[index, check_translate_col] = google_translation
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error en la traducción de Google para el texto '{source_text}': {e}")
|
||||||
|
df.at[index, check_translate_col] = "Error en la traducción"
|
||||||
|
|
||||||
output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx')
|
output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx')
|
||||||
df.to_excel(output_path, index=False)
|
df.to_excel(output_path, index=False)
|
||||||
logger.info(f"Archivo traducido guardado en: {output_path}")
|
logger.info(f"Archivo traducido guardado en: {output_path}")
|
||||||
print(f"Archivo traducido guardado en: {output_path}")
|
print(f"Archivo traducido guardado en: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
batch_size = 20
|
batch_size = 20
|
||||||
translate_file = ".\\data\\2_master_export2translate.xlsx"
|
translate_file = ".\\data\\2_master_export2translate.xlsx"
|
||||||
|
@ -204,4 +205,4 @@ if __name__ == "__main__":
|
||||||
traducir_todo = (
|
traducir_todo = (
|
||||||
input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s"
|
input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s"
|
||||||
)
|
)
|
||||||
main(translate_file, target_lang_code,target_lang, traducir_todo, batch_size)
|
main(translate_file, target_lang_code, target_lang, traducir_todo, batch_size)
|
Loading…
Reference in New Issue