Agregado segunda traduccion de Google para control
This commit is contained in:
parent
8994e2177c
commit
bed4ac84e6
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
|||
|
||||
def google_api_key():
|
||||
return 'AIzaSyB1Yk1faZjLBQ5JjEWuRFQ2_XdRSB9aH7A'
|
|
@ -1,3 +1,3 @@
|
|||
# Configura tu clave API de OpenAI
|
||||
def api_key():
|
||||
def openai_api_key():
|
||||
return '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'
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"type": "service_account",
|
||||
"project_id": "translate-431108",
|
||||
"private_key_id": "020c17463fbb2877ec3bfee0ebfe56873f85c40f",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAGI6iumWcM3dK\nySVqPgO9kxYhy+FUy4D455cHLNAIsigIIA0HNhcRnEUQTxoMtcMYHNCk7fxlbNNp\nvNyWiNs0LAFbIrW+ahIOqiWD2DulKisw8fxPXJsoyoDbReAI1i9rtOzHnxLP7yvs\nPxxEB6EabTTGYlAf+BWgLZ7aNdiRqTKQDkAGLWfe+5yq2G3m8iVTpafsteovm/em\n//8UJojrm/GAkRnd8CIN+4MmsFdUT+jvEHz0KVFqx0qHymkYAA3+jHrh73qsqLsZ\nfiSuzLmcYoFTchuoIlO7I31btGxUQpAdRVzJ9HMyDrSJczplQPwDrL3KpCMjEJ/N\nWpBGI3y7AgMBAAECggEAFWRGsgwZglEK5kr1vdnlFxM494+EQCn0wAMjmEV0b0Yj\nsMVHniIhudt8p2nKUq2vvvt/0qC7/DF9GOfSwCLCbRIUyT4uyLh4L/uLokiOBSu9\nKT4GunHAx5nxdTmtTrPyDkeqHJAe4LACOf9MhFjofGW6kaMOHYPw0z3GqbHYXwlW\nUKdwBg8nCOylj5o6L+7yzdoMGI7eg0UmS9xS24D3RpkV15pntV/9gPDVBn4ARQK6\nz9oft76xOmaifAaYj/cgjcmXpU0y+tUCxG+Uyv6EzeOTsbD6Ux/sfWUJKkUqJw2l\nEy+b2NJwpj+VpQY+2BRe2DA7YZbJOi9BYUNR/k6wiQKBgQD4cFnHh1FevxrkiW5k\nLqijP0ndeiombJwNzmpdb3MJCCK+HTJ5zemMoOMMzfVEzIYp1GmGXzjQBkenK2Qg\n9yuoxkNndaKwx7YBw2EQnrz15ha9Frw/qeZouwwnEQwg8aFvylGu++4EJI/pXCJb\nRX5vzbqL5ujyP4Ri18YLRjaqOQKBgQDF8Tl6Qt8Dj+QL2G/9rsbERYTNyaxg1T7w\nbzJF7ZkWlxF5MPRyb0tnXcFHYl7H2bYNDIGw7cCktk88R9jmS6WFDjYAg6Y7U89u\nxo2oI5C4uXUDuTeTHE3bIl3bnxUu150MBSxW3L3fHjsLrKx7BoIqfThwyhnz8W1l\npfiWCXWukwKBgFbY/6mOllVmvQHzQRHoda9Lqoiv8hgok40IAYrUMQSRQYPwNPr7\njo9IltadEQaR8Lyispj0pKmj6Fx6ALC9dxey51E9gDgIOIn5QO9MboYPfxdu4TPo\nMtGdwavOe8zwaxznPxLKovPzzBMp0CDT9FUlN/c8K2az6ZWcFHFQhD+RAoGAXa5a\nHqQRelUagASylCSoy/F+9gpRvA6Q2Cg4aeIWhxt+WJYzywkjbctwk0FaTEmcoLZf\nlwYfG5VxEL2MSQpaTwu3wSyNexyiWvI7zuzuLI8Rc26wf75wlprAQxYaZ24CVNMJ\n9h5I+pULKu5RP3SIHGXhVXhBKjQEK4yOhyv492MCgYEAoraYhhHBL2jSVGIqgwn/\nkOGcBpgl4QinJfm0UPwvvwkx6tAJt9Ta3RH1XsVb55dVSgeAWgve0z5b8TpCg+UI\nT2/Y7jvAGRI9TMlEGpEI2l0vBx82H/fyjvseg34O0mOTaKfhUDv8v627LqlWwbjG\nwQy/HXN7M5OFm8wxzJophoY=\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "tranlate@translate-431108.iam.gserviceaccount.com",
|
||||
"client_id": "118043735176235215029",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/tranlate%40translate-431108.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
|
@ -3,12 +3,17 @@ from openai import OpenAI
|
|||
import os
|
||||
import re
|
||||
import logging
|
||||
from openai_api_key import api_key
|
||||
from openai_api_key import openai_api_key
|
||||
from google_api_key import google_api_key
|
||||
from x2_master_export2translate import transformar_texto
|
||||
import ollama
|
||||
import json
|
||||
from google.cloud import translate_v2 as translate
|
||||
from google.oauth2 import service_account
|
||||
import html
|
||||
|
||||
client = OpenAI(api_key=api_key())
|
||||
client = OpenAI(api_key=openai_api_key())
|
||||
GOOGLE_APPLICATION_CREDENTIALS ="translate-431108-020c17463fbb.json"
|
||||
|
||||
# Diccionario de idiomas
|
||||
IDIOMAS = {
|
||||
|
@ -20,10 +25,9 @@ IDIOMAS = {
|
|||
6: ("German", "de-DE"),
|
||||
}
|
||||
|
||||
|
||||
def configurar_logger():
|
||||
logger = logging.getLogger("translate_logger")
|
||||
logger.setLevel(logging.DEBUG) # Cambiado a DEBUG para más información
|
||||
logger.setLevel(logging.DEBUG)
|
||||
os.makedirs(".\\data", exist_ok=True)
|
||||
fh = logging.FileHandler(".\\data\\translate_log.log", encoding="utf-8")
|
||||
fh.setLevel(logging.DEBUG)
|
||||
|
@ -32,62 +36,50 @@ def configurar_logger():
|
|||
logger.addHandler(fh)
|
||||
return logger
|
||||
|
||||
def init_google_translate_client():
|
||||
if os.path.exists(GOOGLE_APPLICATION_CREDENTIALS):
|
||||
# Usar credenciales de cuenta de servicio
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
GOOGLE_APPLICATION_CREDENTIALS
|
||||
)
|
||||
return translate.Client(credentials=credentials)
|
||||
else:
|
||||
raise ValueError("No se han proporcionado credenciales válidas para Google Translate")
|
||||
|
||||
google_translate_client = init_google_translate_client()
|
||||
|
||||
def google_translate(text, target_language):
|
||||
result = google_translate_client.translate(text, target_language=target_language)
|
||||
translated_text = result['translatedText']
|
||||
return html.unescape(translated_text)
|
||||
|
||||
logger = configurar_logger()
|
||||
|
||||
|
||||
def mostrar_idiomas():
|
||||
print("Selecciona el idioma de destino:")
|
||||
for numero, (nombre, _) in IDIOMAS.items():
|
||||
print(f"{numero}: {nombre}")
|
||||
|
||||
|
||||
def translate_text(text, source_lang, target_lang):
|
||||
logger.info(
|
||||
f"Solicitando traducción de {source_lang} a {target_lang} para el texto: {text}"
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": f"You are a translator."},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Translate the following text from {source_lang} to {target_lang} while preserving special fields like <> and <#>. This texts are for an HMI industrial machine: {text}",
|
||||
},
|
||||
],
|
||||
max_tokens=150,
|
||||
temperature=0.3,
|
||||
)
|
||||
translated_text = response.choices[0].message.content.strip()
|
||||
logger.info(f"Respuesta recibida: {translated_text}")
|
||||
return translated_text
|
||||
|
||||
def read_system_prompt():
|
||||
try:
|
||||
with open("/data/system_prompt.txt", "r", encoding="utf-8") as file:
|
||||
with open(".\\data\\system_prompt.txt", "r", encoding="utf-8") as file:
|
||||
return file.read().strip()
|
||||
except FileNotFoundError:
|
||||
logger.warning("Archivo system_prompt.txt no encontrado. Usando prompt por defecto.")
|
||||
return "You are a translator."
|
||||
|
||||
|
||||
def translate_batch_openai(batch_texts, source_lang, target_lang):
|
||||
# Aquí se asume que esta función maneja una lista de textos y devuelve una lista de traducciones
|
||||
translations = []
|
||||
for text in batch_texts:
|
||||
translation = translate_text(text, source_lang, target_lang)
|
||||
translations.append(translation)
|
||||
return translations
|
||||
|
||||
def translate_batch_openai(texts, source_lang, target_lang):
|
||||
joined_text = "\n".join(texts)
|
||||
def translate_batch_openai(texts_dict, source_lang, target_lang):
|
||||
system_prompt = read_system_prompt()
|
||||
texts_list = list(texts_dict.values())
|
||||
joined_text = "\n".join(texts_list)
|
||||
|
||||
request_payload = json.dumps({"texts": texts_list, "source_lang": source_lang, "target_lang": target_lang})
|
||||
logger.info(
|
||||
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{joined_text}"
|
||||
f"Solicitando traducción de {source_lang} a {target_lang} para el lote de textos:\n{request_payload}"
|
||||
)
|
||||
request_payload = json.dumps({"texts": texts, "source_lang": source_lang, "target_lang": target_lang})
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model= "gpt-4o-mini", # "gpt-3.5-turbo",
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{"role": "system", "content": f"You are a translator.{system_prompt}."},
|
||||
{"role": "user", "content": request_payload}
|
||||
|
@ -98,11 +90,13 @@ def translate_batch_openai(texts, source_lang, target_lang):
|
|||
response_payload = json.loads(response.choices[0].message.content.strip())
|
||||
translations = response_payload.get("texts", [])
|
||||
logger.info(f"Respuestas recibidas:\n{translations}")
|
||||
if len(translations) != len(texts):
|
||||
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
|
||||
return translations
|
||||
|
||||
def translate_batch(texts, source_lang, target_lang):
|
||||
if len(translations) != len(texts_list):
|
||||
raise ValueError("La cantidad de traducciones recibidas no coincide con la cantidad de textos enviados.")
|
||||
|
||||
return dict(zip(texts_dict.keys(), translations))
|
||||
|
||||
def translate_batch_ollama(texts, source_lang, target_lang):
|
||||
joined_text = "\n".join(texts)
|
||||
system_prompt = read_system_prompt()
|
||||
logger.info(
|
||||
|
@ -125,72 +119,79 @@ def texto_requiere_traduccion(texto):
|
|||
)
|
||||
return requiere_traduccion
|
||||
|
||||
|
||||
def main(file_path, target_lang_code,target_lang, traducir_todo, batch_size=10):
|
||||
def main(file_path, target_lang_code, target_lang, traducir_todo, batch_size=10):
|
||||
df = pd.read_excel(file_path)
|
||||
source_col = "it-IT"
|
||||
source_translated_col = target_lang_code
|
||||
target_col = f"{target_lang_code} Translated"
|
||||
check_translate_col = f"{target_lang_code} CheckTranslate"
|
||||
|
||||
# Asegurarse de que la columna de destino existe
|
||||
if target_col not in df.columns:
|
||||
df[target_col] = None
|
||||
if check_translate_col not in df.columns:
|
||||
df[check_translate_col] = None
|
||||
|
||||
texts_to_translate = []
|
||||
indices_to_translate = []
|
||||
texts_to_translate = {}
|
||||
|
||||
for index, row in df.iterrows():
|
||||
for _, row in df.iterrows():
|
||||
source_text = str(row[source_col])
|
||||
source_translated_text = str(row[source_translated_col]) if source_translated_col in df.columns else ""
|
||||
processed_text = transformar_texto(source_text)
|
||||
|
||||
if traducir_todo:
|
||||
# Traducir todas las celdas del idioma de destino
|
||||
if texto_requiere_traduccion(processed_text):
|
||||
texts_to_translate.append(processed_text)
|
||||
indices_to_translate.append(index)
|
||||
texts_to_translate[source_text] = processed_text
|
||||
else:
|
||||
# Traducir solo las celdas vacías en el idioma de destino original
|
||||
if pd.isna(row[source_translated_col]) or source_translated_text.strip() == "":
|
||||
if texto_requiere_traduccion(processed_text):
|
||||
texts_to_translate.append(processed_text)
|
||||
indices_to_translate.append(index)
|
||||
texts_to_translate[source_text] = processed_text
|
||||
|
||||
num_texts = len(texts_to_translate)
|
||||
logger.info(f"Número total de textos a traducir: {num_texts}")
|
||||
print(f"Número total de textos a traducir: {num_texts}")
|
||||
|
||||
translations = []
|
||||
translations = {}
|
||||
for start_idx in range(0, num_texts, batch_size):
|
||||
end_idx = min(start_idx + batch_size, num_texts)
|
||||
batch_texts = texts_to_translate[start_idx:end_idx]
|
||||
batch_texts = dict(list(texts_to_translate.items())[start_idx:end_idx])
|
||||
logger.info(f"Traduciendo: celdas desde {start_idx} a {end_idx}.")
|
||||
print(f"Traduciendo : celdas desde: {start_idx} a :{end_idx}.")
|
||||
try:
|
||||
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang_code)
|
||||
translations.extend(batch_translations)
|
||||
except Exception as e:
|
||||
logger.error(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||
print(f"Error en la traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||
continue
|
||||
|
||||
retries = 2 # Número de intentos totales (1 inicial + 1 reintento)
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
batch_translations = translate_batch_openai(batch_texts, 'Italian', target_lang)
|
||||
translations.update(batch_translations)
|
||||
break # Si la traducción es exitosa, salimos del bucle de reintentos
|
||||
except Exception as e:
|
||||
if attempt < retries - 1: # Si no es el último intento
|
||||
logger.warning(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
|
||||
print(f"Error en el intento {attempt + 1} de traducción de celdas desde {start_idx} a {end_idx}: {e}. Reintentando...")
|
||||
else: # Si es el último intento
|
||||
logger.error(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||
print(f"Error en todos los intentos de traducción de celdas desde {start_idx} a {end_idx}: {e}")
|
||||
|
||||
logger.info(f"Número total de traducciones recibidas: {len(translations)}")
|
||||
|
||||
if len(translations) != len(indices_to_translate):
|
||||
logger.warning(f"Desajuste entre el número de traducciones ({len(translations)}) y el número de índices ({len(indices_to_translate)})")
|
||||
|
||||
for i, index in enumerate(indices_to_translate):
|
||||
if i < len(translations):
|
||||
df.at[index, target_col] = translations[i]
|
||||
else:
|
||||
logger.error(f"No hay traducción disponible para el índice {index}")
|
||||
# Actualizar el DataFrame con las traducciones
|
||||
for index, row in df.iterrows():
|
||||
source_text = str(row[source_col])
|
||||
if source_text in translations:
|
||||
df.at[index, target_col] = translations[source_text]
|
||||
# Realizar la traducción de verificación con Google Translate
|
||||
try:
|
||||
google_translation = google_translate(translations[source_text], 'it')
|
||||
df.at[index, check_translate_col] = google_translation
|
||||
except Exception as e:
|
||||
logger.error(f"Error en la traducción de Google para el texto '{source_text}': {e}")
|
||||
df.at[index, check_translate_col] = "Error en la traducción"
|
||||
|
||||
output_path = os.path.join(os.path.dirname(file_path), '3_master_export2translate_translated.xlsx')
|
||||
df.to_excel(output_path, index=False)
|
||||
logger.info(f"Archivo traducido guardado en: {output_path}")
|
||||
print(f"Archivo traducido guardado en: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
batch_size = 20
|
||||
translate_file = ".\\data\\2_master_export2translate.xlsx"
|
||||
|
@ -204,4 +205,4 @@ if __name__ == "__main__":
|
|||
traducir_todo = (
|
||||
input("¿Desea traducir todas las celdas (s/n)? ").strip().lower() == "s"
|
||||
)
|
||||
main(translate_file, target_lang_code,target_lang, traducir_todo, batch_size)
|
||||
main(translate_file, target_lang_code, target_lang, traducir_todo, batch_size)
|
Loading…
Reference in New Issue