Agregado segunda traduccion de Google para control

This commit is contained in:
Miguel 2024-07-31 12:02:49 +02:00
parent 8994e2177c
commit bed4ac84e6
10 changed files with 1240 additions and 638 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

3
google_api_key.py Normal file
View File

@ -0,0 +1,3 @@
def google_api_key():
return 'AIzaSyB1Yk1faZjLBQ5JjEWuRFQ2_XdRSB9aH7A'

View File

@ -1,3 +1,3 @@
# Configura tu clave API de OpenAI
def api_key():
def openai_api_key():
return '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'

View File

@ -0,0 +1,13 @@
{
"type": "service_account",
"project_id": "translate-431108",
"private_key_id": "020c17463fbb2877ec3bfee0ebfe56873f85c40f",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAGI6iumWcM3dK\nySVqPgO9kxYhy+FUy4D455cHLNAIsigIIA0HNhcRnEUQTxoMtcMYHNCk7fxlbNNp\nvNyWiNs0LAFbIrW+ahIOqiWD2DulKisw8fxPXJsoyoDbReAI1i9rtOzHnxLP7yvs\nPxxEB6EabTTGYlAf+BWgLZ7aNdiRqTKQDkAGLWfe+5yq2G3m8iVTpafsteovm/em\n//8UJojrm/GAkRnd8CIN+4MmsFdUT+jvEHz0KVFqx0qHymkYAA3+jHrh73qsqLsZ\nfiSuzLmcYoFTchuoIlO7I31btGxUQpAdRVzJ9HMyDrSJczplQPwDrL3KpCMjEJ/N\nWpBGI3y7AgMBAAECggEAFWRGsgwZglEK5kr1vdnlFxM494+EQCn0wAMjmEV0b0Yj\nsMVHniIhudt8p2nKUq2vvvt/0qC7/DF9GOfSwCLCbRIUyT4uyLh4L/uLokiOBSu9\nKT4GunHAx5nxdTmtTrPyDkeqHJAe4LACOf9MhFjofGW6kaMOHYPw0z3GqbHYXwlW\nUKdwBg8nCOylj5o6L+7yzdoMGI7eg0UmS9xS24D3RpkV15pntV/9gPDVBn4ARQK6\nz9oft76xOmaifAaYj/cgjcmXpU0y+tUCxG+Uyv6EzeOTsbD6Ux/sfWUJKkUqJw2l\nEy+b2NJwpj+VpQY+2BRe2DA7YZbJOi9BYUNR/k6wiQKBgQD4cFnHh1FevxrkiW5k\nLqijP0ndeiombJwNzmpdb3MJCCK+HTJ5zemMoOMMzfVEzIYp1GmGXzjQBkenK2Qg\n9yuoxkNndaKwx7YBw2EQnrz15ha9Frw/qeZouwwnEQwg8aFvylGu++4EJI/pXCJb\nRX5vzbqL5ujyP4Ri18YLRjaqOQKBgQDF8Tl6Qt8Dj+QL2G/9rsbERYTNyaxg1T7w\nbzJF7ZkWlxF5MPRyb0tnXcFHYl7H2bYNDIGw7cCktk88R9jmS6WFDjYAg6Y7U89u\nxo2oI5C4uXUDuTeTHE3bIl3bnxUu150MBSxW3L3fHjsLrKx7BoIqfThwyhnz8W1l\npfiWCXWukwKBgFbY/6mOllVmvQHzQRHoda9Lqoiv8hgok40IAYrUMQSRQYPwNPr7\njo9IltadEQaR8Lyispj0pKmj6Fx6ALC9dxey51E9gDgIOIn5QO9MboYPfxdu4TPo\nMtGdwavOe8zwaxznPxLKovPzzBMp0CDT9FUlN/c8K2az6ZWcFHFQhD+RAoGAXa5a\nHqQRelUagASylCSoy/F+9gpRvA6Q2Cg4aeIWhxt+WJYzywkjbctwk0FaTEmcoLZf\nlwYfG5VxEL2MSQpaTwu3wSyNexyiWvI7zuzuLI8Rc26wf75wlprAQxYaZ24CVNMJ\n9h5I+pULKu5RP3SIHGXhVXhBKjQEK4yOhyv492MCgYEAoraYhhHBL2jSVGIqgwn/\nkOGcBpgl4QinJfm0UPwvvwkx6tAJt9Ta3RH1XsVb55dVSgeAWgve0z5b8TpCg+UI\nT2/Y7jvAGRI9TMlEGpEI2l0vBx82H/fyjvseg34O0mOTaKfhUDv8v627LqlWwbjG\nwQy/HXN7M5OFm8wxzJophoY=\n-----END PRIVATE KEY-----\n",
"client_email": "tranlate@translate-431108.iam.gserviceaccount.com",
"client_id": "118043735176235215029",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/tranlate%40translate-431108.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View File

@ -3,12 +3,17 @@ from openai import OpenAI
import os
import re
import logging
from openai_api_key import api_key
from openai_api_key import openai_api_key
from google_api_key import google_api_key
from x2_master_export2translate import transformar_texto
import ollama
import json
from google.cloud import translate_v2 as translate
from google.oauth2 import service_account
import html
client = OpenAI(api_key=api_key())
client = OpenAI(api_key=openai_api_key())
GOOGLE_APPLICATION_CREDENTIALS ="translate-431108-020c17463fbb.json"
# Diccionario de idiomas
IDIOMAS = {
@ -20,10 +25,9 @@ IDIOMAS = {
6: ("German", "de-DE"),
}
def configurar_logger():
logger = logging.getLogger("translate_logger")
logger.setLevel(logging.DEBUG) # Cambiado a DEBUG para más información
logger.setLevel(logging.DEBUG)
os.makedirs(".\\data", exist_ok=True)
fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8")
fh.setLevel(logging.DEBUG)
@ -32,62 +36,50 @@ def configurar_logger():
logger.addHandler(fh)
return logger
def init_google_translate_client():
if os.path.exists(GOOGLE_APPLICATION_CREDENTIALS):
# Usar credenciales de cuenta de servicio
credentials = service_account.Credentials.from_service_account_file(
GOOGLE_APPLICATION_CREDENTIALS
)
return translate.Client(credentials=credentials)
else:
raise ValueError("No se han proporcionado credenciales válidas para Google Translate")
google_translate_client = init_google_translate_client()
def google_translate(text, target_language):
result = google_translate_client.translate(text, target_language=target_language)
translated_text = result['translatedText']
return html.unescape(translated_text)
logger = configurar_logger()
def mostrar_idiomas():
print("Selecciona el idioma de destino:")
for numero, (nombre, _) in IDIOMAS.items():
print(f"{numero}: {nombre}")
def translate_text(text, source_lang, target_lang):
logger.info(
f"Solicitando traducción de {source_lang} a {target_lang} para el texto: {text}"
)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are a translator."},
{
"role": "user",
"content": f"Translate the following text from {source_lang} to {target_lang} while preserving special fields like <> and <#>. This texts are for an HMI industrial machine: {text}",
},
],
max_tokens=150,
temperature=0.3,
)
translated_text = response.choices[0].message.content.strip()
logger.info(f"Respuesta recibida: {translated_text}")
return translated_text
def read_system_prompt():
try:
with open("/data/system_prompt.txt", "r", encoding="utf-8") as file:
with open(".\\data\\system_prompt.txt", "r", encoding="utf-8") as file:
return file.read().strip()
except FileNotFoundError:
logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.")
return "You are a translator."
def translate_batch_openai(batch_texts, source_lang, target_lang):
# Aquí se asume que esta función maneja una lista de textos y devuelve una lista de traducciones
translations = []
for text in batch_texts:
translation = translate_text(text, source_lang, target_lang)
translations.append(translation)
return translations
def translate_batch_openai(texts, source_lang, target_lang):
joined_text = "\n".join(texts)
def translate_batch_openai(texts_dict, source_lang, target_lang):
system_prompt = read_system_prompt()
texts_list = list(texts_dict.values())
joined_text = "\n".join(texts_list)
request_payload = json.dumps({"texts": texts_list, "source_lang": source_lang, "target_lang": target_lang})
logger.info(
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{joined_text}"
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{request_payload}"
)
request_payload = json.dumps({"texts": texts, "source_lang": source_lang, "target_lang": target_lang})
response = client.chat.completions.create(
model= "gpt-4o-mini", # "gpt-3.5-turbo",
model="gpt-4o-mini",
messages=[
{"role": "system", "content": f"You are a translator.{system_prompt}."},
{"role": "user", "content": request_payload}
@ -98,11 +90,13 @@ def translate_batch_openai(texts, source_lang, target_lang):
response_payload = json.loads(response.choices[0].message.content.strip())
translations = response_payload.get("texts", [])
logger.info(f"Respuestas recibidas:\n{translations}")
if len(translations) != len(texts):
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
return translations
def translate_batch(texts, source_lang, target_lang):
if len(translations) != len(texts_list):
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
return dict(zip(texts_dict.keys(), translations))
def translate_batch_ollama(texts, source_lang, target_lang):
joined_text = "\n".join(texts)
system_prompt = read_system_prompt()
logger.info(
@ -125,72 +119,79 @@ def texto_requiere_traduccion(texto):
)
return requiere_traduccion
def main(file_path, target_lang_code,target_lang, traducir_todo, batch_size=10):
def main(file_path, target_lang_code, target_lang, traducir_todo, batch_size=10):
df = pd.read_excel(file_path)
source_col = "it-IT"
source_translated_col = target_lang_code
target_col = f"{target_lang_code} Translated"
check_translate_col = f"{target_lang_code} CheckTranslate"
# Asegurarse de que la columna de destino existe
if target_col not in df.columns:
df[target_col] = None
if check_translate_col not in df.columns:
df[check_translate_col] = None
texts_to_translate = []
indices_to_translate = []
texts_to_translate = {}
for index, row in df.iterrows():
for _, row in df.iterrows():
source_text = str(row[source_col])
source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else ""
processed_text = transformar_texto(source_text)
if traducir_todo:
# Traducir todas las celdas del idioma de destino
if texto_requiere_traduccion(processed_text):
texts_to_translate.append(processed_text)
indices_to_translate.append(index)
texts_to_translate[source_text] = processed_text
else:
# Traducir solo las celdas vacías en el idioma de destino original
if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "":
if texto_requiere_traduccion(processed_text):
texts_to_translate.append(processed_text)
indices_to_translate.append(index)
texts_to_translate[source_text] = processed_text
num_texts = len(texts_to_translate)
logger.info(f"Número total de textos a traducir: {num_texts}")
print(f"Número total de textos a traducir: {num_texts}")
translations = []
translations = {}
for start_idx in range(0, num_texts, batch_size):
end_idx = min(start_idx + batch_size, num_texts)
batch_texts = texts_to_translate[start_idx:end_idx]
batch_texts = dict(list(texts_to_translate.items())[start_idx:end_idx])
logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.")
print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.")
try:
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang_code)
translations.extend(batch_translations)
except Exception as e:
logger.error(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
print(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
continue
retries = 2 # Número de intentos totales (1 inicial + 1 reintento)
for attempt in range(retries):
try:
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang)
translations.update(batch_translations)
break # Si la traducción es exitosa, salimos del bucle de reintentos
except Exception as e:
if attempt < retries - 1: # Si no es el último intento
logger.warning(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
print(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
else: # Si es el último intento
logger.error(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
print(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
logger.info(f"Número total de traducciones recibidas: {len(translations)}")
if len(translations) != len(indices_to_translate):
logger.warning(f"Desajuste entre el número de traducciones ({len(translations)}) y el número de índices ({len(indices_to_translate)})")
for i, index in enumerate(indices_to_translate):
if i < len(translations):
df.at[index, target_col] = translations[i]
else:
logger.error(f"No hay traducción disponible para el índice {index}")
# Actualizar el DataFrame con las traducciones
for index, row in df.iterrows():
source_text = str(row[source_col])
if source_text in translations:
df.at[index, target_col] = translations[source_text]
# Realizar la traducción de verificación con Google Translate
try:
google_translation = google_translate(translations[source_text], 'it')
df.at[index, check_translate_col] = google_translation
except Exception as e:
logger.error(f"Error en la traducción de Google para el texto '{source_text}': {e}")
df.at[index, check_translate_col] = "Error en la traducción"
output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx')
df.to_excel(output_path, index=False)
logger.info(f"Archivo traducido guardado en: {output_path}")
print(f"Archivo traducido guardado en: {output_path}")
if __name__ == "__main__":
batch_size = 20
translate_file = ".\\data\\2_master_export2translate.xlsx"
@ -204,4 +205,4 @@ if __name__ == "__main__":
traducir_todo = (
input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s"
)
main(translate_file, target_lang_code,target_lang, traducir_todo, batch_size)
main(translate_file, target_lang_code, target_lang, traducir_todo, batch_size)