ParamManagerScripts/backend/script_groups/EmailCrono/x1.py

"""
Script para desensamblar emails y generar un archivo Markdown con la
cronología de los mensajes.
"""

import os
import sys
from pathlib import Path
from utils.email_parser import procesar_eml
from utils.beautify import BeautifyProcessor

script_root = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
)
sys.path.append(script_root)
from backend.script_utils import load_configuration  # noqa: E402

# Forzar UTF-8 en la salida estándar
sys.stdout.reconfigure(encoding="utf-8")


def generar_indice(mensajes):
    """
    Genera una lista de mensajes usando el formato de Obsidian
    """
    indice = "# Índice de Mensajes\n\n"

    for mensaje in mensajes:
        indice += mensaje.get_index_entry() + "\n"

    indice += "\n---\n\n"
    return indice


def main():
    # Cargar configuraciones del entorno
    configs = load_configuration()

    # Verificar si la configuración se cargó según backend_setup.md
    if not configs:
        print(
            "Error: No se pudo cargar la configuración. " "Verificar script_config.json"
        )
        sys.stdout.flush()
        return

    print("✅ Configuración cargada exitosamente")
    sys.stdout.flush()

    # Obtener directorio de resultados/salida (working_directory)
    working_directory = configs.get("working_directory", ".")

    # Obtener configuraciones de nivel 2 (grupo)
    group_config = configs.get("level2", {})
    attachments_dir = group_config.get("attachments_dir", "adjuntos")

    work_config = configs.get("level3", {})
    # Directorio de ENTRADA (emails .eml) provisto por level3
    input_dir = work_config.get("input_directory", ".")
    cronologia_file = work_config.get("cronologia_file", "emails")

    # Construir rutas de salida en working_directory
    output_file = os.path.join(working_directory, cronologia_file + ".md")
    attachments_path = os.path.join(working_directory, attachments_dir)
    attachments_crono_path = os.path.join(attachments_path, "cronologia")

    # Debug prints
    print(f"Working/Output directory: {working_directory}")
    print(f"Input directory: {input_dir}")
    print(f"Output file: {output_file}")
    print(f"Attachments directory: {attachments_path}")
    sys.stdout.flush()

    # Obtener el directorio donde está el script actual
    script_dir = os.path.dirname(os.path.abspath(__file__))
    beautify_rules = os.path.join(script_dir, "config", "beautify_rules.json")
    beautifier = BeautifyProcessor(beautify_rules)
    print(f"Beautify rules file: {beautify_rules}")
    sys.stdout.flush()

    # Asegurar directorios de salida
    os.makedirs(working_directory, exist_ok=True)
    os.makedirs(attachments_path, exist_ok=True)
    os.makedirs(attachments_crono_path, exist_ok=True)

    # Check if input directory exists and has files
    input_path = Path(input_dir)
    if not input_path.exists():
        print(f"Error: Input directory {input_path} does not exist")
        sys.stdout.flush()
        return

    eml_files = list(input_path.glob("*.eml"))
    print(f"Found {len(eml_files)} .eml files")

    if not eml_files:
        print("⚠️ No se encontraron archivos .eml en el directorio")
        sys.stdout.flush()
        return

    # Crear cronología nueva (no cargar existente)
    mensajes = []
    mensajes_hash = set()
    print("Creando cronología nueva (archivo se sobrescribirá)")
    sys.stdout.flush()

    total_procesados = 0
    total_nuevos = 0
    mensajes_duplicados = 0

    for archivo in eml_files:
        print(f"\n{'='*60}")
        print(f"Processing file: {archivo}")
        sys.stdout.flush()
        nuevos_mensajes = procesar_eml(
            archivo, attachments_path, attachments_crono_path
        )
        print(f"Extracted {len(nuevos_mensajes)} messages from {archivo.name}")
        sys.stdout.flush()
        total_procesados += len(nuevos_mensajes)

        # Verificar duplicados y aplicar beautify solo a los mensajes nuevos
        for i, msg in enumerate(nuevos_mensajes):
            print("")
            print(f"--- Msg {i+1}/{len(nuevos_mensajes)} from " f"{archivo.name} ---")
            print(f"Remitente: {msg.remitente}")
            print(f"Fecha: {msg.fecha}")
            print(f"Subject: {msg.subject}")
            print(f"Hash: {msg.hash}")
            print(f"Adjuntos: {msg.adjuntos}")
            sys.stdout.flush()

            if msg.hash not in mensajes_hash:
                print("✓ NUEVO mensaje - Agregando a la cronología")
                sys.stdout.flush()
                # Aplicar beautify solo si el mensaje es nuevo
                msg.contenido = beautifier.process_text(msg.contenido)
                mensajes.append(msg)
                mensajes_hash.add(msg.hash)
                total_nuevos += 1
            else:
                print("⚠ DUPLICADO - Ya existe un mensaje con este hash")
                mensajes_duplicados += 1
                # Buscar el mensaje duplicado para mostrar
                # información detallada
                for existing_msg in mensajes:
                    if existing_msg.hash == msg.hash:
                        print("  📋 Comparación de mensajes duplicados:")
                        print("    Mensaje existente:")
                        print(f"      - Remitente: {existing_msg.remitente}")
                        print(f"      - Fecha: {existing_msg.fecha}")
                        print(f"      - Subject: {existing_msg.subject}")
                        print("    Mensaje nuevo (rechazado):")
                        print(f"      - Remitente: {msg.remitente}")
                        print(f"      - Fecha: {msg.fecha}")
                        print(f"      - Subject: {msg.subject}")

                        # Mostrar debug detallado del hash para ambos mensajes
                        print("  🔍 Debug detallado del hash duplicado:")
                        print(f"    Hash: {msg.hash}")
                        msg.debug_hash_info()
                        sys.stdout.flush()
                        break

    print("\nEstadísticas de procesamiento:")
    print("- Total mensajes encontrados:", total_procesados)
    print("- Mensajes únicos añadidos:", total_nuevos)
    print("- Mensajes duplicados ignorados:", mensajes_duplicados)
    sys.stdout.flush()

    # Ordenar mensajes de más reciente a más antiguo
    mensajes.sort(key=lambda x: x.fecha, reverse=True)

    # Generar el índice
    indice = generar_indice(mensajes)

    # Escribir el archivo con el índice y los mensajes
    print(f"\nWriting {len(mensajes)} messages to {output_file}")
    sys.stdout.flush()

    try:
        with open(output_file, "w", encoding="utf-8") as f:
            # Primero escribir el índice
            f.write(indice)
            # Luego escribir todos los mensajes
            for msg in mensajes:
                f.write(msg.to_markdown())

        print(f"✅ Cronología guardada exitosamente en: {output_file}")
        print(f"📊 Total de mensajes en la cronología: {len(mensajes)}")
        sys.stdout.flush()

    except Exception as e:
        print(f"❌ Error al guardar la cronología: {e}")
        sys.stdout.flush()
        return


if __name__ == "__main__":
    main()