ParamManagerScripts/backend/script_groups/EmailCrono/models/mensaje_email.py

# models/mensaje_email.py
import re
import hashlib
from datetime import datetime
from email.utils import parseaddr, parsedate_to_datetime


class MensajeEmail:
    def __init__(
        self,
        remitente,
        fecha,
        contenido,
        subject=None,
        adjuntos=None,
    ):
        self.remitente = self._estandarizar_remitente(remitente)
        self.fecha = self._estandarizar_fecha(fecha)
        self.subject = subject if subject else "Sin Asunto"
        self.contenido = self._limpiar_contenido(contenido)
        self.adjuntos = adjuntos if adjuntos else []
        self.hash = self._generar_hash()

    def _formatear_subject_para_link(self, subject):
        """
        Formatea el subject para usarlo como ancla en links de Obsidian
        Remueve caracteres especiales y espacios múltiples
        """
        if not subject:
            return "Sin-Asunto"
        # Eliminar caracteres especiales y reemplazar espacios con guiones
        formatted = re.sub(r"[^\w\s-]", "", subject)
        formatted = re.sub(r"\s+", "-", formatted.strip())
        return formatted

    def _limpiar_contenido(self, contenido):
        if not contenido:
            return ""

        # Eliminar líneas de metadatos
        lines = contenido.split("\n")
        cleaned_lines = []

        for line in lines:
            # Skip metadata lines
            if line.strip().startswith(
                (
                    "Da: ",
                    "Inviato: ",
                    "A: ",
                    "From: ",
                    "Sent: ",
                    "To: ",
                )
            ) or line.strip().startswith("Oggetto: "):
                continue
            # Limpiar espacios múltiples dentro de cada línea, manteniendo
            # la línea completa
            cleaned_line = re.sub(r" +", " ", line)
            cleaned_lines.append(cleaned_line)

        # Unir las líneas preservando los saltos de línea
        text = "\n".join(cleaned_lines)

        # Limpiar la combinación específica de
        # CRLF + NBSP + CRLF
        text = re.sub(r"\r?\n\xa0\r?\n", "\n", text)

        # Reemplazar CRLF por LF
        text = text.replace("\r\n", "\n")

        # Reemplazar CR por LF
        text = text.replace("\r", "\n")

        # Reemplazar 3 o más saltos de línea por dos
        text = re.sub(r"\n{3,}", "\n\n", text)

        # Eliminar espacios al inicio y final del texto completo
        return text.strip()

    def to_markdown(self):
        # Hash con caracteres no título
        hash_line = f"+ {self.hash}\n\n"

        # Subject como título
        subject_line = f"### {self.subject if self.subject else 'Sin Asunto'}\n\n"

        # Fecha en formato legible
        fecha_formato = self.fecha.strftime("%d-%m-%Y")
        fecha_line = f"- {fecha_formato}\n\n"

        # Contenido del mensaje
        md = f"{hash_line}{subject_line}{fecha_line}"
        md += self.contenido + "\n\n"

        # Adjuntos si existen
        if self.adjuntos:
            md += "### Adjuntos\n"
            for adj in self.adjuntos:
                md += f"- [[{adj}]]\n"
        md += "---\n\n"
        return md

    def get_index_entry(self):
        """
        Genera una entrada de lista para el índice
        """
        fecha_formato = self.fecha.strftime("%d-%m-%Y")
        subject_link = self._formatear_subject_para_link(self.subject)
        return (
            f"- {fecha_formato} - {self.remitente} - [[cronologia#"
            f"{self.subject}|{subject_link}]]"
        )

    def _estandarizar_remitente(self, remitente):
        if "Da:" in remitente:
            remitente = remitente.split("Da:")[1].split("Inviato:")[0]
        elif "From:" in remitente:
            remitente = remitente.split("From:")[1].split("Sent:")[0]

        nombre, email = parseaddr(remitente)
        if not nombre and email:
            nombre = email.split("@")[0]
        elif not nombre and not email:
            patron_nombre = r"([A-Za-z\s]+)\s*<"
            nombre_match = re.search(patron_nombre, remitente)
            if nombre_match:
                nombre = nombre_match.group(1)
            else:
                return "Remitente Desconocido"

        nombre = re.sub(r'[<>:"/\\|?*]', "", nombre.strip())
        nombre = nombre.encode("ascii", "ignore").decode("ascii")
        return nombre

    def _estandarizar_fecha(self, fecha):
        if isinstance(fecha, str):
            try:
                return parsedate_to_datetime(fecha)
            except Exception:
                return datetime.now()
        return fecha

    def _generar_hash(self):
        """
        Genera un hash único para el mensaje basado en una combinación de
        campos que identifican únicamente el mensaje
        """
        # Limpiar y normalizar el contenido para el hash (normaliza espacios)
        contenido_hash = re.sub(r"\s+", " ", self.contenido).strip()

        # Normalizar el subject
        subject_normalizado = re.sub(
            r"\s+", " ", self.subject if self.subject else ""
        ).strip()

        # Crear una cadena con los elementos clave del mensaje
        elementos_hash = [
            self.remitente.strip(),
            # Solo hasta minutos para permitir pequeñas variaciones
            self.fecha.strftime("%Y%m%d%H%M"),
            subject_normalizado,
            # Usar solo los primeros 500 caracteres del contenido normalizado
            contenido_hash[:500],
        ]

        # Unir todos los elementos con un separador único
        texto_hash = "|".join(elementos_hash)

        # Mostrar información de debug para el hash (solo si está habilitado)
        if hasattr(self, "_debug_hash") and self._debug_hash:
            print("      🔍 Debug Hash:")
            print("        - Remitente: '" + self.remitente.strip() + "'")
            print("        - Fecha: '" + self.fecha.strftime("%Y%m%d%H%M") + "'")
            print("        - Subject: '" + subject_normalizado + "'")
            preview = contenido_hash[:500]
            print("        - Contenido (500 chars): '" + preview + "'")
            print("        - Texto completo hash: '" + texto_hash[:100] + "...'")

        # Generar el hash
        hash_resultado = hashlib.md5(texto_hash.encode()).hexdigest()

        return hash_resultado

    def debug_hash_info(self):
        """
        Muestra información detallada de cómo se genera el hash de este
        mensaje
        """
        self._debug_hash = True
        hash_result = self._generar_hash()
        delattr(self, "_debug_hash")
        return hash_result