Agregado segunda traduccion de Google para control

This commit is contained in:
Miguel 2024-07-31 12:02:49 +02:00
parent 8994e2177c
commit bed4ac84e6
10 changed files with 1240 additions and 638 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

3
google_api_key.py Normal file
View File

@ -0,0 +1,3 @@
def google_api_key():
return 'AIzaSyB1Yk1faZjLBQ5JjEWuRFQ2_XdRSB9aH7A'

View File

@ -1,3 +1,3 @@
# Configura tu clave API de OpenAI # Configura tu clave API de OpenAI
def api_key(): def openai_api_key():
return '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' return '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'

View File

@ -0,0 +1,13 @@
{
"type": "service_account",
"project_id": "translate-431108",
"private_key_id": "020c17463fbb2877ec3bfee0ebfe56873f85c40f",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAGI6iumWcM3dK\nySVqPgO9kxYhy+FUy4D455cHLNAIsigIIA0HNhcRnEUQTxoMtcMYHNCk7fxlbNNp\nvNyWiNs0LAFbIrW+ahIOqiWD2DulKisw8fxPXJsoyoDbReAI1i9rtOzHnxLP7yvs\nPxxEB6EabTTGYlAf+BWgLZ7aNdiRqTKQDkAGLWfe+5yq2G3m8iVTpafsteovm/em\n//8UJojrm/GAkRnd8CIN+4MmsFdUT+jvEHz0KVFqx0qHymkYAA3+jHrh73qsqLsZ\nfiSuzLmcYoFTchuoIlO7I31btGxUQpAdRVzJ9HMyDrSJczplQPwDrL3KpCMjEJ/N\nWpBGI3y7AgMBAAECggEAFWRGsgwZglEK5kr1vdnlFxM494+EQCn0wAMjmEV0b0Yj\nsMVHniIhudt8p2nKUq2vvvt/0qC7/DF9GOfSwCLCbRIUyT4uyLh4L/uLokiOBSu9\nKT4GunHAx5nxdTmtTrPyDkeqHJAe4LACOf9MhFjofGW6kaMOHYPw0z3GqbHYXwlW\nUKdwBg8nCOylj5o6L+7yzdoMGI7eg0UmS9xS24D3RpkV15pntV/9gPDVBn4ARQK6\nz9oft76xOmaifAaYj/cgjcmXpU0y+tUCxG+Uyv6EzeOTsbD6Ux/sfWUJKkUqJw2l\nEy+b2NJwpj+VpQY+2BRe2DA7YZbJOi9BYUNR/k6wiQKBgQD4cFnHh1FevxrkiW5k\nLqijP0ndeiombJwNzmpdb3MJCCK+HTJ5zemMoOMMzfVEzIYp1GmGXzjQBkenK2Qg\n9yuoxkNndaKwx7YBw2EQnrz15ha9Frw/qeZouwwnEQwg8aFvylGu++4EJI/pXCJb\nRX5vzbqL5ujyP4Ri18YLRjaqOQKBgQDF8Tl6Qt8Dj+QL2G/9rsbERYTNyaxg1T7w\nbzJF7ZkWlxF5MPRyb0tnXcFHYl7H2bYNDIGw7cCktk88R9jmS6WFDjYAg6Y7U89u\nxo2oI5C4uXUDuTeTHE3bIl3bnxUu150MBSxW3L3fHjsLrKx7BoIqfThwyhnz8W1l\npfiWCXWukwKBgFbY/6mOllVmvQHzQRHoda9Lqoiv8hgok40IAYrUMQSRQYPwNPr7\njo9IltadEQaR8Lyispj0pKmj6Fx6ALC9dxey51E9gDgIOIn5QO9MboYPfxdu4TPo\nMtGdwavOe8zwaxznPxLKovPzzBMp0CDT9FUlN/c8K2az6ZWcFHFQhD+RAoGAXa5a\nHqQRelUagASylCSoy/F+9gpRvA6Q2Cg4aeIWhxt+WJYzywkjbctwk0FaTEmcoLZf\nlwYfG5VxEL2MSQpaTwu3wSyNexyiWvI7zuzuLI8Rc26wf75wlprAQxYaZ24CVNMJ\n9h5I+pULKu5RP3SIHGXhVXhBKjQEK4yOhyv492MCgYEAoraYhhHBL2jSVGIqgwn/\nkOGcBpgl4QinJfm0UPwvvwkx6tAJt9Ta3RH1XsVb55dVSgeAWgve0z5b8TpCg+UI\nT2/Y7jvAGRI9TMlEGpEI2l0vBx82H/fyjvseg34O0mOTaKfhUDv8v627LqlWwbjG\nwQy/HXN7M5OFm8wxzJophoY=\n-----END PRIVATE KEY-----\n",
"client_email": "tranlate@translate-431108.iam.gserviceaccount.com",
"client_id": "118043735176235215029",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/tranlate%40translate-431108.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View File

@ -3,12 +3,17 @@ from openai import OpenAI
import os import os
import re import re
import logging import logging
from openai_api_key import api_key from openai_api_key import openai_api_key
from google_api_key import google_api_key
from x2_master_export2translate import transformar_texto from x2_master_export2translate import transformar_texto
import ollama import ollama
import json import json
from google.cloud import translate_v2 as translate
from google.oauth2 import service_account
import html
client = OpenAI(api_key=api_key()) client = OpenAI(api_key=openai_api_key())
GOOGLE_APPLICATION_CREDENTIALS ="translate-431108-020c17463fbb.json"
# Diccionario de idiomas # Diccionario de idiomas
IDIOMAS = { IDIOMAS = {
@ -20,10 +25,9 @@ IDIOMAS = {
6: ("German", "de-DE"), 6: ("German", "de-DE"),
} }
def configurar_logger(): def configurar_logger():
logger = logging.getLogger("translate_logger") logger = logging.getLogger("translate_logger")
logger.setLevel(logging.DEBUG) # Cambiado a DEBUG para más información logger.setLevel(logging.DEBUG)
os.makedirs(".\\data", exist_ok=True) os.makedirs(".\\data", exist_ok=True)
fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8") fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8")
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)
@ -32,62 +36,50 @@ def configurar_logger():
logger.addHandler(fh) logger.addHandler(fh)
return logger return logger
def init_google_translate_client():
if os.path.exists(GOOGLE_APPLICATION_CREDENTIALS):
# Usar credenciales de cuenta de servicio
credentials = service_account.Credentials.from_service_account_file(
GOOGLE_APPLICATION_CREDENTIALS
)
return translate.Client(credentials=credentials)
else:
raise ValueError("No se han proporcionado credenciales válidas para Google Translate")
google_translate_client = init_google_translate_client()
def google_translate(text, target_language):
result = google_translate_client.translate(text, target_language=target_language)
translated_text = result['translatedText']
return html.unescape(translated_text)
logger = configurar_logger() logger = configurar_logger()
def mostrar_idiomas(): def mostrar_idiomas():
print("Selecciona el idioma de destino:") print("Selecciona el idioma de destino:")
for numero, (nombre, _) in IDIOMAS.items(): for numero, (nombre, _) in IDIOMAS.items():
print(f"{numero}: {nombre}") print(f"{numero}: {nombre}")
def translate_text(text, source_lang, target_lang):
logger.info(
f"Solicitando traducción de {source_lang} a {target_lang} para el texto: {text}"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are a translator."},
{
"role": "user",
"content": f"Translate the following text from {source_lang} to {target_lang} while preserving special fields like <> and <#>. This texts are for an HMI industrial machine: {text}",
},
],
max_tokens=150,
temperature=0.3,
)
translated_text = response.choices[0].message.content.strip()
logger.info(f"Respuesta recibida: {translated_text}")
return translated_text
def read_system_prompt(): def read_system_prompt():
try: try:
with open("/data/system_prompt.txt", "r", encoding="utf-8") as file: with open(".\\data\\system_prompt.txt", "r", encoding="utf-8") as file:
return file.read().strip() return file.read().strip()
except FileNotFoundError: except FileNotFoundError:
logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.") logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.")
return "You are a translator." return "You are a translator."
def translate_batch_openai(texts_dict, source_lang, target_lang):
def translate_batch_openai(batch_texts, source_lang, target_lang):
# Aquí se asume que esta función maneja una lista de textos y devuelve una lista de traducciones
translations = []
for text in batch_texts:
translation = translate_text(text, source_lang, target_lang)
translations.append(translation)
return translations
def translate_batch_openai(texts, source_lang, target_lang):
joined_text = "\n".join(texts)
system_prompt = read_system_prompt() system_prompt = read_system_prompt()
texts_list = list(texts_dict.values())
joined_text = "\n".join(texts_list)
request_payload = json.dumps({"texts": texts_list, "source_lang": source_lang, "target_lang": target_lang})
logger.info( logger.info(
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{joined_text}" f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{request_payload}"
) )
request_payload = json.dumps({"texts": texts, "source_lang": source_lang, "target_lang": target_lang})
response = client.chat.completions.create( response = client.chat.completions.create(
model= "gpt-4o-mini", # "gpt-3.5-turbo", model="gpt-4o-mini",
messages=[ messages=[
{"role": "system", "content": f"You are a translator.{system_prompt}."}, {"role": "system", "content": f"You are a translator.{system_prompt}."},
{"role": "user", "content": request_payload} {"role": "user", "content": request_payload}
@ -98,11 +90,13 @@ def translate_batch_openai(texts, source_lang, target_lang):
response_payload = json.loads(response.choices[0].message.content.strip()) response_payload = json.loads(response.choices[0].message.content.strip())
translations = response_payload.get("texts", []) translations = response_payload.get("texts", [])
logger.info(f"Respuestas recibidas:\n{translations}") logger.info(f"Respuestas recibidas:\n{translations}")
if len(translations) != len(texts):
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
return translations
def translate_batch(texts, source_lang, target_lang): if len(translations) != len(texts_list):
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
return dict(zip(texts_dict.keys(), translations))
def translate_batch_ollama(texts, source_lang, target_lang):
joined_text = "\n".join(texts) joined_text = "\n".join(texts)
system_prompt = read_system_prompt() system_prompt = read_system_prompt()
logger.info( logger.info(
@ -125,72 +119,79 @@ def texto_requiere_traduccion(texto):
) )
return requiere_traduccion return requiere_traduccion
def main(file_path, target_lang_code, target_lang, traducir_todo, batch_size=10):
def main(file_path, target_lang_code,target_lang, traducir_todo, batch_size=10):
df = pd.read_excel(file_path) df = pd.read_excel(file_path)
source_col = "it-IT" source_col = "it-IT"
source_translated_col = target_lang_code source_translated_col = target_lang_code
target_col = f"{target_lang_code} Translated" target_col = f"{target_lang_code} Translated"
check_translate_col = f"{target_lang_code} CheckTranslate"
# Asegurarse de que la columna de destino existe # Asegurarse de que la columna de destino existe
if target_col not in df.columns: if target_col not in df.columns:
df[target_col] = None df[target_col] = None
if check_translate_col not in df.columns:
df[check_translate_col] = None
texts_to_translate = [] texts_to_translate = {}
indices_to_translate = []
for index, row in df.iterrows(): for _, row in df.iterrows():
source_text = str(row[source_col]) source_text = str(row[source_col])
source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else "" source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else ""
processed_text = transformar_texto(source_text) processed_text = transformar_texto(source_text)
if traducir_todo: if traducir_todo:
# Traducir todas las celdas del idioma de destino
if texto_requiere_traduccion(processed_text): if texto_requiere_traduccion(processed_text):
texts_to_translate.append(processed_text) texts_to_translate[source_text] = processed_text
indices_to_translate.append(index)
else: else:
# Traducir solo las celdas vacías en el idioma de destino original
if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "": if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "":
if texto_requiere_traduccion(processed_text): if texto_requiere_traduccion(processed_text):
texts_to_translate.append(processed_text) texts_to_translate[source_text] = processed_text
indices_to_translate.append(index)
num_texts = len(texts_to_translate) num_texts = len(texts_to_translate)
logger.info(f"Número total de textos a traducir: {num_texts}") logger.info(f"Número total de textos a traducir: {num_texts}")
print(f"Número total de textos a traducir: {num_texts}") print(f"Número total de textos a traducir: {num_texts}")
translations = [] translations = {}
for start_idx in range(0, num_texts, batch_size): for start_idx in range(0, num_texts, batch_size):
end_idx = min(start_idx + batch_size, num_texts) end_idx = min(start_idx + batch_size, num_texts)
batch_texts = texts_to_translate[start_idx:end_idx] batch_texts = dict(list(texts_to_translate.items())[start_idx:end_idx])
logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.") logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.")
print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.") print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.")
retries = 2 # Número de intentos totales (1 inicial + 1 reintento)
for attempt in range(retries):
try: try:
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang_code) batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang)
translations.extend(batch_translations) translations.update(batch_translations)
break # Si la traducción es exitosa, salimos del bucle de reintentos
except Exception as e: except Exception as e:
logger.error(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}") if attempt < retries - 1: # Si no es el último intento
print(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}") logger.warning(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
continue print(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
else: # Si es el último intento
logger.error(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
print(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
logger.info(f"Número total de traducciones recibidas: {len(translations)}") logger.info(f"Número total de traducciones recibidas: {len(translations)}")
if len(translations) != len(indices_to_translate): # Actualizar el DataFrame con las traducciones
logger.warning(f"Desajuste entre el número de traducciones ({len(translations)}) y el número de índices ({len(indices_to_translate)})") for index, row in df.iterrows():
source_text = str(row[source_col])
for i, index in enumerate(indices_to_translate): if source_text in translations:
if i < len(translations): df.at[index, target_col] = translations[source_text]
df.at[index, target_col] = translations[i] # Realizar la traducción de verificación con Google Translate
else: try:
logger.error(f"No hay traducción disponible para el índice {index}") google_translation = google_translate(translations[source_text], 'it')
df.at[index, check_translate_col] = google_translation
except Exception as e:
logger.error(f"Error en la traducción de Google para el texto '{source_text}': {e}")
df.at[index, check_translate_col] = "Error en la traducción"
output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx') output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx')
df.to_excel(output_path, index=False) df.to_excel(output_path, index=False)
logger.info(f"Archivo traducido guardado en: {output_path}") logger.info(f"Archivo traducido guardado en: {output_path}")
print(f"Archivo traducido guardado en: {output_path}") print(f"Archivo traducido guardado en: {output_path}")
if __name__ == "__main__": if __name__ == "__main__":
batch_size = 20 batch_size = 20
translate_file = ".\\data\\2_master_export2translate.xlsx" translate_file = ".\\data\\2_master_export2translate.xlsx"
@ -204,4 +205,4 @@ if __name__ == "__main__":
traducir_todo = ( traducir_todo = (
input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s" input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s"
) )
main(translate_file, target_lang_code,target_lang, traducir_todo, batch_size) main(translate_file, target_lang_code, target_lang, traducir_todo, batch_size)