From b0150a58dde5c4720f86ab884434c5c9767459ca Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 5 Feb 2025 12:14:36 +0100 Subject: [PATCH] Software Base --- TEST.eml | 1317 +++++++++++++++++ config.json | 6 + config/__pycache__/config.cpython-310.pyc | Bin 0 -> 1808 bytes config/config.py | 44 + main.py | 52 + .../__pycache__/mensaje_email.cpython-310.pyc | Bin 0 -> 2600 bytes models/mensaje_email.py | 81 + .../attachment_handler.cpython-310.pyc | Bin 0 -> 897 bytes .../__pycache__/email_parser.cpython-310.pyc | Bin 0 -> 3714 bytes .../markdown_handler.cpython-310.pyc | Bin 0 -> 1185 bytes utils/attachment_handler.py | 33 + utils/email_parser.py | 134 ++ utils/markdown_handler.py | 39 + 13 files changed, 1706 insertions(+) create mode 100644 TEST.eml create mode 100644 config.json create mode 100644 config/__pycache__/config.cpython-310.pyc create mode 100644 config/config.py create mode 100644 main.py create mode 100644 models/__pycache__/mensaje_email.cpython-310.pyc create mode 100644 models/mensaje_email.py create mode 100644 utils/__pycache__/attachment_handler.cpython-310.pyc create mode 100644 utils/__pycache__/email_parser.cpython-310.pyc create mode 100644 utils/__pycache__/markdown_handler.cpython-310.pyc create mode 100644 utils/attachment_handler.py create mode 100644 utils/email_parser.py create mode 100644 utils/markdown_handler.py diff --git a/TEST.eml b/TEST.eml new file mode 100644 index 0000000..e0bffa2 --- /dev/null +++ b/TEST.eml @@ -0,0 +1,1317 @@ +From: Miguel Angel Vera - Vetromeccanica S.r.l. +To: Miguel Angel Vera - Vetromeccanica S.r.l. +Subject: TEST Tables +Thread-Topic: TEST Tables +Thread-Index: AQHbd7PepN5tf2wDskaE081T3aMPKQ== +Date: Wed, 5 Feb 2025 09:53:55 +0000 +Message-ID: + +Content-Language: es-ES +X-MS-Has-Attach: +X-MS-TNEF-Correlator: +X-MS-Exchange-Organization-RecordReviewCfmType: 0 +msip_labels: +Content-Type: multipart/alternative; + boundary="_000_AS8PR08MB682139DC577AE00699D695149CF72AS8PR08MB6821eurp_" +MIME-Version: 1.0 + +--_000_AS8PR08MB682139DC577AE00699D695149CF72AS8PR08MB6821eurp_ +Content-Type: text/plain; charset="Windows-1252" +Content-Transfer-Encoding: quoted-printable + +Allego le email che riassumono il funzionamento del batch handling. Riporto= + sotto la parte principale. + + + +We confirm the following code sas per attached e-mail) are matching with wh= +at we agreed back in May therefore we can process P/N (customer article) b= +ut ALPLA should write in our supervision PLC a list where customer article = +is associated to AV, AV Desc, Product Family as agreed. + +This list will be constantly editable by ALPLA on a persistent memory as ag= +reed in order for Vetromeccanica to handle new customer articles except for= + new formats (new bottle shapes) which will require new recipes and commis= +sioning first. + +As per now we only have an excel list dated May 2020 which we cannot use fo= +r batch handling. + +ALPLA AV + +Blank Bottle P/N + +184 + +2638879 + +253 + +2688129 + +102 + +2638876 + + + + + + + + + + + +Agreed: + + * AV (must be forwared to Autefa, must be shown on your HMI=92s) + * AV Desc (must be shown on you HMI=92s) + * Product Family (must be used by Vetro to select right Recipe) + * Customer article Number (used by Vetro to find matching AV) + + + + + +HENKEL + +Alpla + +Vetromeccanica + +AUTEFA + +Before Changeover + +Sends IDH_BTL_NEXT number to be validated + + + + + + + +Data_To_EbConvey[38] + + + + + + + + + +Validates IDH_BTL_NEXT is valid number and send acknowledge + + + + + + + +Data_From_EbConvey[0].2 + + + + + +Step 1 + +Operator manually selects "Line Clearance" (?) and send signal "0" on "Calc= +ulatedBottlesRemainingToFill". + + + + + + + + + +Data_To_EbConvey[23] + + + + + + + +Step 2 + + + +Stops taking bottles out of trays and sends what is already on the tables a= +nd conveyors. + +Emptying Merger and Line + +Stops taking bottles out of trays and sends what is already on the tables a= +nd conveyors. + + + + + +N/A + +TG10 Send 0 in Bottles for Actual Batch + + + +Step 3 + +Operator verifies line is empty and sends "changeover request" signal after= + last bottle goes thorugh filler. + + + + + + + + + +Data_To_EbConvey[0].0 + + + + + + + +Step 4 + + + +Operator verifies line is empty and sends "line is busy with changeover", c= +onfirming on Popup screen + + + + + + + + + +Data_From_EbConvey[0].0 + + + + + +Step 5 + + + +Starts changeover + +Starts changeover + + + + + + + +Data_From_EbConvey[0].0 + +Data_From_EbConvey[0].0 + +Step 6 + + + + + +Ends changeover. Sends signal "Changeover is finished and ready. + +Ends changeover. Sends signal "Changeover is finished and ready. + + + + + + + +Data_From_EbConvey[0].1 + +Data_From_EbConvey[0].1 + + + + + + + + + + + + + + + + + +Step 7 + +Send Reset counters signal + +Reset Counters + + + + + +Data_To_EbConvey[0].1 + + + + + + + +Step 8 + +Send new value on "CalculatedBottlesRemainingToFill". + + + + + + + +Data_To_EbConvey[23] + + + + + + + +Step 9 + +Send Changeover Complete. To be considered "Production Ready" + +Starts conveying bottles. + +Finish Chanover Cycle + + + + + +Data_To_EbConvey[0].2 + + + + + + + + + + +--_000_AS8PR08MB682139DC577AE00699D695149CF72AS8PR08MB6821eurp_ +Content-Type: text/html; charset="Windows-1252" +Content-Transfer-Encoding: quoted-printable + + + + + + + +

+Allego le email che riassumono il funzionamento del batch h= +andling. Riporto sotto la parte principale.

+

+ 

+

+We confirm the following code sas per attached e-mail) a= +re matching with what we agreed back in May therefore we can process P/N (c= +ustomer article)  but ALPLA should + write in our supervision PLC a list where customer article is associated t= +o  AV, AV Desc, Product Family as agreed.

+

+This list will be constantly editable by ALPLA on a pers= +istent memory as agreed in order for Vetromeccanica to handle new customer = +articles except for new formats (new + bottle shapes) which will  require new recipes and commissioning firs= +t.

+

+As per now we only have an excel list dated May 2020 whi= +ch we cannot use for batch handling.

+ + + + + + + + + + + + + + + + + + + +
+

+ALPLA AV

+
+

+Blank Bottle P/N

+
+

+184

+
+

+2638879

+
+

+253

+
+

+2688129

+
+

+102

+
+

+2638876

+
+

+ 

+

+ 

+

+ 

+

+ 

+

+ 

+

+Agreed:

+
    +
  • +AV  (must be forwared to Autefa, must be shown on you= +r HMI=92s)
  • +AV Desc  (must be shown on you HMI=92s)
  • +Product Family (must be used by Vetro to select right Reci= +pe)
  • +Customer article Number (used by Vetro to find matching AV= +)
+

+ 

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ 

+
+

+HENKEL

+
+

+Alpla

+
+

+Vetromeccanica

+
+

+AUTEFA

+
+

+Before Changeover

+
+

+Sends IDH_BTL_NEXT number to be validated

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[38]= +

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Validates IDH_BTL_NEXT is valid number and se= +nd acknowledge

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_From_EbConvey[0].2

+
+

+ 

+
+

+ 

+
+

+Step 1

+
+

+Operator manually selects "Line Clearanc= +e" (?) and send signal "0" on "CalculatedBottlesRemaini= +ngToFill".

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[23]= +

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Step 2

+
+

+ 

+
+

+Stops taking bottles out of trays and sends w= +hat is already on the tables and conveyors.

+
+

+Emptying Merger and Line

+
+

+Stops taking bottles out of trays and sends w= +hat is already on the tables and conveyors.

+
+

+ 

+
+

+ 

+
+

+N/A

+
+

+TG10 Send 0 in Bottles for Actu= +al Batch

+
+

+ 

+
+

+Step 3

+
+

+Operator verifies line is empty and sends &qu= +ot;changeover request" signal after last bottle goes thorugh filler.

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[0].0

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Step 4

+
+

+ 

+
+

+Operator verifies line is empty and sends &qu= +ot;line is busy with changeover", confirming on Popup screen +

+

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_From_EbConvey[0].0

+
+

+ 

+
+

+ 

+
+

+Step 5

+
+

+ 

+
+ +

+Starts changeover

+
+

+Starts changeover

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_From_EbConvey[0].0

+
+

+Data_From_EbConvey[0].0

+
+

+Step 6

+
+

+ 

+
+

+ 

+
+

+Ends changeover. Sends signal "Changeove= +r is finished and ready.

+
+

+Ends changeover. Sends signal "Changeove= +r is finished and ready.

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Data_From_EbConvey[0].1

+
+

+Data_From_EbConvey[0].1

+
+

+ 

+
+

+ 

+
+ +

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+ +

+ 

+
+

+ 

+
+

+Step 7

+
+

+Send Reset counters signal

+
+ +

+Reset Counters

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[0].1

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Step 8

+
+

+Send new value on "CalculatedBottlesRema= +iningToFill".

+
+ +

+ 

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[23]= +

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+Step 9

+
+

+Send Changeover Complete. To be considered &q= +uot;Production Ready" 

+
+

+Starts conveying bottles.

+
+

+Finish Chanover Cycle

+
+

+ 

+
+

+ 

+
+

+Data_To_EbConvey[0].2

+
+

+ 

+
+

+ 

+
+

+ 

+
+

+ 

+
+
+
+ + + +--_000_AS8PR08MB682139DC577AE00699D695149CF72AS8PR08MB6821eurp_-- diff --git a/config.json b/config.json new file mode 100644 index 0000000..e8bedec --- /dev/null +++ b/config.json @@ -0,0 +1,6 @@ +{ + "input_dir": "D:\\Proyectos\\Scripts\\EmailCrono", + "output_dir": "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\04-InLavoro\\HENKEL\\93040 - HENKEL - BowlingGreen\\Description\\HENKEL - ALPLA - AUTEFA - Batch Data", + "cronologia_file": "cronologia.md", + "attachments_dir": "adjuntos" +} \ No newline at end of file diff --git a/config/__pycache__/config.cpython-310.pyc b/config/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b771230be7a4225f7384cf291ce9571a1ffd135b GIT binary patch literal 1808 zcmZ`(OK;mo5Z+m?Xj;}Iwv#^66b0P04U1YxPA!U}C}6Y+P(Uc)Lr}p0L2xLUfSGZBcHn8Si(AW@JZ==zRX*$Dc{C^n5?l!X|SW{q2FUmBIiqrO&T zXeqQu>heB*efE>ip2bm-_AWs^8HiVJQO*UESF4&l;kVcZX=qhfs! z`nFD`*rgMiXfBxa`8y<9Af*I;-M{Ltf&=Nn-G43)*TtyNV1rm21z}DXM5p1Z3Y9UlG8MD_}FU1oxe{;C2b_r}~f#YNqRJriTy4O;RM_e5G&m7O(0LNEn6F z@nn^|W2tX2mYX~L>lmtet%PC?#tpXKAhC2cQ%k=~ zf+(6LslLbGwmf%v_J6S+a;#fT!>PXEoLq3B0qXCI+m*n#4qqF+_Q?Vxw$HwsEIwy( zZ4_IbYJ)$f8ynuY4t0Id+!TJDe{g9yj)TC|gJ7KDbj0&U5ImWNqjE=c1KLj1%uW3% z3+v|#o^@Do-W6ecml?zTn={y+sDr1x77fR98m{JD>IH6$vI4wNF<~vT9pimd#`P_e L+;)Wr9yR~Jy+DO= literal 0 HcmV?d00001 diff --git a/config/config.py b/config/config.py new file mode 100644 index 0000000..162a8b1 --- /dev/null +++ b/config/config.py @@ -0,0 +1,44 @@ +# config/config.py +import json +import os + +class Config: + def __init__(self, config_file='config.json'): + self.config_file = config_file + self.config = self._load_config() + + def _load_config(self): + if not os.path.exists(self.config_file): + default_config = { + 'input_dir': '.', + 'output_dir': '.', + 'cronologia_file': 'cronologia.md', + 'attachments_dir': 'adjuntos' + } + self._save_config(default_config) + return default_config + + with open(self.config_file, 'r', encoding='utf-8') as f: + return json.load(f) + + def _save_config(self, config): + with open(self.config_file, 'w', encoding='utf-8') as f: + json.dump(config, f, indent=4) + + def get_input_dir(self): + return self.config.get('input_dir', '.') + + def get_output_dir(self): + return self.config.get('output_dir', '.') + + def get_cronologia_file(self): + return os.path.join( + self.get_output_dir(), + self.config.get('cronologia_file', 'cronologia.md') + ) + + def get_attachments_dir(self): + return os.path.join( + self.get_output_dir(), + self.config.get('attachments_dir', 'adjuntos') + ) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..84ae2f5 --- /dev/null +++ b/main.py @@ -0,0 +1,52 @@ +# main.py +import os +from pathlib import Path +from utils.email_parser import procesar_eml +from utils.markdown_handler import cargar_cronologia_existente +from config.config import Config + +def main(): + config = Config() + + # Debug prints + print(f"Input directory: {config.get_input_dir()}") + print(f"Output directory: {config.get_output_dir()}") + print(f"Cronologia file: {config.get_cronologia_file()}") + print(f"Attachments directory: {config.get_attachments_dir()}") + + # Ensure directories exist + os.makedirs(config.get_output_dir(), exist_ok=True) + os.makedirs(config.get_attachments_dir(), exist_ok=True) + + # Check if input directory exists and has files + input_path = Path(config.get_input_dir()) + if not input_path.exists(): + print(f"Error: Input directory {input_path} does not exist") + return + + eml_files = list(input_path.glob('*.eml')) + print(f"Found {len(eml_files)} .eml files") + + # mensajes = cargar_cronologia_existente(config.get_cronologia_file()) + mensajes = [] + print(f"Loaded {len(mensajes)} existing messages") + mensajes_hash = {msg.hash for msg in mensajes} + + for archivo in eml_files: + print(f"Processing {archivo}") + nuevos_mensajes = procesar_eml(archivo, config.get_attachments_dir()) + for msg in nuevos_mensajes: + if msg.hash not in mensajes_hash: + mensajes.append(msg) + mensajes_hash.add(msg.hash) + + mensajes.sort(key=lambda x: x.fecha) + + output_file = config.get_cronologia_file() + print(f"Writing to {output_file}") + with open(output_file, 'w', encoding='utf-8') as f: + for msg in mensajes: + f.write(msg.to_markdown()) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/models/__pycache__/mensaje_email.cpython-310.pyc b/models/__pycache__/mensaje_email.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c6266f5e9a7214b9d37e41b826b9d7ae0c9d70f GIT binary patch literal 2600 zcmaJ?TW{Mo6eg*QWyQ|YG-;cn?Xq<#rmXHBx}if+bPamR_E2|N1`Jgk2&$>r$}Gtt zm0mRYDSg{dC<@rvQ~$tz3}5%uztE?hL)EF>4U<6fkUYG8=lhs7n=XOx?t?#je=HF4 zH&$jp11fhQ$=i@|!s&qcRY`pcbv9rL^BKi9V_+nvZzh&+QL;_A$*q@!TgS||Io%*q^Kw0Ar-h|T8d-_B*9!9AQ52E`?7!TTAduAoHak0CQL1M0^FI4gw;w92Dr)lR$wAc3)p9V1lVJ>VbTb&A#ynH?52h3H zgQGOhq-qAeD2)U-_CwiMCgz^4Oc@RKg#*VE7z?6_8N>n+i;%q`Vi}5eH~7uYPa=B( zD`v8@(-rYBmmPg+cSV+Foh0MYKz5SqHiHPS^V;x*a)KaE<2(p(1=)bCK^xSDv!j?422zr8%Y(0EYt<^5RL& zoKSJ;uvr>ofNMf2Ic#z^HW9Fqm0M$*TO44U7$d8+a0ZCjT3ZoCUC%Ded*RLXqW)w0 zG!F9&Ok)c&`+>+3EO!7oD9vB6tNy6hi*oH{chcXlUmd+;uIts5C5MAJha!jDQ!cl?&vT7a zm7T^>c-2dgv{UxC!6Fn0QxV4UDkLZu`JWK1~_ zVnjJB=_}9xXk$>CudNXS+YlE@#x1mTt8s%sJNB5|JWF(q99ZHz@Uo{hz|loKd9B1z z&TBpDumvskYCK@$wu{EbK0s|DEDI`owT$n2voRdmoK&~!j1c&0|fZ)*Mp`dGz>9dsrtP_S7SX8IEa(jTqjyyj~gt63kJ1 z@4?a)Xx}akxt+7I!5xr4h8z?3fmvq7z7%lTA<%E3#+GbsPYCb`a2{*5ov^XP8_3g= z0dkA&!T>tG#n>vGDW!z2TAERXr3v+p-YHIfxx3Z!!&cFedtZ4Sxp}Kt{duaD>-QoF zN|$v(^NPmqt=l(0|GLvDzu(*wt1!8;LfMUDWyifV6OmUJ=b%-fq>GE#Z^OeQx_xE! zM0PY3m#||^cSLCy^i0`2!qT%UBBF%(cA6zmfT5O_IKNR<0qeZm|0oDV4F+6?Bynno zo(H(k(FSwq5?Bp@9uiw-%NPV-%mu-`@}E2ja{+-Oc@hv;p^}3n=Jc3}mBPw_kjL~_ zkTI(N+!&h^aDNOfC5WfMk)Ea8SjH*7L|qh}TnMDOz^RpyX3soU9bG>S_!7)0OdYD* zkhH41ltDnFuESZ@|K}{4K6Ao?i{Yv|u<;xq59Af*bj&7|1w!!J7}4B>H)Af(E#m65F8$bgQ`cYz;s%m#TB$LOa$Jh zzy)VEp)XvkIg%gUBubG(T#2G#r6_UQLIz_y+6>y@l!tKFZ^PtYTgxi_ZVG Wf^u$Eo4t)hl!&*ZgLH{5vwr}yuxiTy literal 0 HcmV?d00001 diff --git a/models/mensaje_email.py b/models/mensaje_email.py new file mode 100644 index 0000000..870c2b8 --- /dev/null +++ b/models/mensaje_email.py @@ -0,0 +1,81 @@ +# models/mensaje_email.py +import re +import hashlib +from datetime import datetime +from email.utils import parseaddr, parsedate_to_datetime + +class MensajeEmail: + def __init__(self, remitente, fecha, contenido, subject=None, adjuntos=None): + self.remitente = self._estandarizar_remitente(remitente) + self.fecha = self._estandarizar_fecha(fecha) + self.subject = subject + self.contenido = self._limpiar_contenido(contenido) + self.adjuntos = adjuntos if adjuntos else [] + self.hash = self._generar_hash() + + def _limpiar_contenido(self, contenido): + if not contenido: + return "" + + # Eliminar líneas de metadatos + lines = contenido.split('\n') + cleaned_lines = [] + + for line in lines: + # Skip metadata lines + if line.strip().startswith(('Da: ', 'Inviato: ', 'A: ', 'From: ', 'Sent: ', 'To: ')) or line.strip().startswith('Oggetto: '): + continue + cleaned_lines.append(line) + + # Unir las líneas + text = '\n'.join(cleaned_lines) + + # Reemplazar 3 o más saltos de línea por dos + text = re.sub(r'\n{3,}', '\n\n', text) + + return text.strip() + + def to_markdown(self): + fecha_formato = self.fecha.strftime('%Y%m%d%H%M%S') + md = f"## {fecha_formato}|{self.remitente}\n\n" + if self.subject: + md += f"**Asunto**: {self.subject}\n\n" + md += self.contenido + "\n\n" + if self.adjuntos: + md += "### Adjuntos\n" + for adj in self.adjuntos: + md += f"- [[{adj}]]\n" + md += "---\n\n" + return md + + def _estandarizar_remitente(self, remitente): + if 'Da:' in remitente: + remitente = remitente.split('Da:')[1].split('Inviato:')[0] + elif 'From:' in remitente: + remitente = remitente.split('From:')[1].split('Sent:')[0] + + nombre, email = parseaddr(remitente) + if not nombre and email: + nombre = email.split('@')[0] + elif not nombre and not email: + nombre_match = re.search(r'([A-Za-z\s]+)\s*<', remitente) + if nombre_match: + nombre = nombre_match.group(1) + else: + return "Remitente Desconocido" + + nombre = re.sub(r'[<>:"/\\|?*]', '', nombre.strip()) + nombre = nombre.encode('ascii', 'ignore').decode('ascii') + return nombre + + def _estandarizar_fecha(self, fecha): + if isinstance(fecha, str): + try: + return parsedate_to_datetime(fecha) + except: + return datetime.now() + return fecha + + def _generar_hash(self): + texto = f"{self.remitente}{self.fecha.isoformat()}{self.contenido}" + return hashlib.md5(texto.encode()).hexdigest() \ No newline at end of file diff --git a/utils/__pycache__/attachment_handler.cpython-310.pyc b/utils/__pycache__/attachment_handler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a926e51c4aa48e4e60a775be238fe0790b742e03 GIT binary patch literal 897 zcmY+DPjAyO6u|wjY1(G(Ksyc&h)WR~0tq3FX&@$!Oh`jQgiMuNdu>XZM7F~=l$^#D z?E_3mtq=#klCPZh6_8*&XE4gM-ix2#^LsxhPPVerMj)#jU&dc4Lf@UUECigVp!zl# zfdo!ac=^O3u6+^`K?MDXLMrec>a%mmM7txm^RIMZ;)Cvh>bn@B63x*9Riq@^7ibR| zJSPGR@E5cqhECbMQDX2&MQhJ1=>n^wMAN3Af_=gjGyc3)vIU;Ep;E(iN-7#BFY@MOc%#oID;XOjj`Fg+RY692w$}7Ej_umN?K(Nf^x&ztie`K)M_jf-8 zZ@0ELZ{3Tc^2wcjiwCdz*m^>aG9fKd!*h70$dbchzh{H7G=qaUktv_KptMvMLrXGk zSNW^2Qjb&LbQ_GZuBwy!A zY~+!#jY9>Ke#bU)u8g#Sh}D3LX^|S}=B3$esAR}gVR+aZWvP*AEV4mb$oE;;cD*j= z79WJ&+O44^Bw9A)T87lM!SOew*5QOd9*fQBrOJ-w2pU9tBNgXHN6%+GPIgq5W>I0{ zL`U2hKAOy+&S1h*kw~?XAKTuz;7af-LeE{s2Vi{6V01|r2PDAGuHp3ypEA5k7!L3{ z_HpnlU~9O2!RYEwM%7K&)@RE7JF4f5yWRC_4Z37*K*0K2vrH6;+;-ndyIdb?LRIn` D4F>6) literal 0 HcmV?d00001 diff --git a/utils/__pycache__/email_parser.cpython-310.pyc b/utils/__pycache__/email_parser.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba11ac63c12462abd95131f056bab4004c1851cc GIT binary patch literal 3714 zcmZu!-H#kc5ucv#nVtQ(593@2<{%Mb^Km#NA;ED>h#e#Z*p?h&7@f76+n(E-^X$yJ zr`PA(%pju+!jZ6eNOH0p$@1i3k3NJj^Q+(n`EVLdX(2e${(E&X)Jo)iqW9 zSzYz3y1d!+HT-^j=?{bKl&1ZIK2H90e7uWR{04+;oTXYwy`E}A7BWV>k?KPuG!$>9 z=FkjH#k153E$FS(9y+0;WOnKfz0gyvw6Mbi z-bBB|TfB{anX_wJw=>6yw95}zZIALa>F;-0-FRLf06tTdk%Oe@fWEg`tuSRkQ zee=CI8p~vRoLz1lX;)f z0gisF=v_1et@t-IQY&Gt1HED;n=-D?*d^`8pDVr8@ny_dWt4{a%RsBlf%dUh8qj{H zvPuhDlUoCfuWYH`(+-%_OZ$M`)Wn73VYy?tw~#7b-0iQPQzaChqdTH9va?GK)&u1}2ha{VzUBImKtUPJrh zMUY>@iEec8q)a)EhKFQm^o(pxo3Wp2xVFxL`GHZpgIM-td`H$+Aw@E(t#o%> zNNDtpvo%X$;$g_MC z0h1P+F$F?zaj}ije(jwf#94f2Brbf74ovQ^-!(OxojII5-yvIp{u01g?Md#Kh-Vd|eNksCA@qG~c2O=L*e=W`=dh-LQx=rC= zQs{0c8Sh1DS{rGc)pj%*#Tl=?I2#XR0kEnKJfUdQ<{jGH+|83L3`9IiqkfFHg}3=x zyw@W~$OdqTQJTowFJvTSLF(>mXwoBvW&tD=4cgWe`CbvW9z9xb;k^c=p+&A!2m(5R zOreGTP8{)A^q{Qm1z&qb4FA9-Md(7Vd@*$C803|L{G>J)4w@!eT!hVj8b?{od+M`x zit*Mi+^{z3dtqs1b24|`dD5xqb&4$zpS291nYxMQvKH$wpLGs9`a07Om(32dnR}=+ z^YA&{*P+pA#3Natw~P+c@3xG9xrT3G4d!ATWXv~o7C`3e;w-l5ldnDck?qOnFQOHn z!j~RF{%@BmT>`du4X)p3Q?n#6H?AX9n)o{~n3lP(YQ@?fK#Tr6NK? zfFyWJ+NE`XRKU&30bIF&Ef0DCllUEacSgW^z4A+c>diE6-_*s=N`KcUXuZ#N8;}Px zZFGeooj305fN7^}ys1^rt@B570?hfvQ|2Vca{}2cg98Q}YGQsg= zl(Bz39_z@J5@`@;a!Zf5?pp&Y6WW(+U2F+@xZ2!C#MS62%TIjr!*3y6lI#Pdcbmuk zoW~PuEVo~ItG1ItmJ6yJ>{y6g6cbO4R}te!QbzrqAp*B+)t-oli3A_l)^^T)^k!BBBCenNFAk`UTTP8lziexC|1mwwrN#HWmZ;YBbpKI+~PJx z>cWSq%%u|d_W>fyomwb;Y($BHH9YA^myn?J&>JWUT`KrM9h7y}bFA?P*zOKHG@;ywaCsytnHWuzX zjAVZ&GaxCQ7t|qDzNo zwvdC6U*Lhu@J|;K`@bFQ8_fKV?U7u~=&pp8s=O3jgn{CZ%F6|e&=tfLklGpHpPV$= zQlT3fTgB^(YND~2q{=vDpKx^utV@fHqj*K|pte(C9zLUnZTuhNdOK0ZQ`kyD!&%ym n0z{qe#2t^zqS`%=)yHXkfeNgGY9kl7p}$AH;&u08@WTH9xl*;F literal 0 HcmV?d00001 diff --git a/utils/__pycache__/markdown_handler.cpython-310.pyc b/utils/__pycache__/markdown_handler.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f80d80427828c4cd1b898a730a91b8e8adddddd GIT binary patch literal 1185 zcmaJ>&5j&35VqaF)6=sFB!?vIA<}B3bd(*4h!8}pg(!=ZLlTe{31!sL$h5t)J@nt& zp8QPQ5>mo3LcGDWIPnO)ijQ#G3lD%0OSyN0_Jpk}SCy;EUzOcmk46!K@!RB=`R^`5 ze^}-GAfS8!B<}!Yh~WZFPggv}pp$}>WJ<8LIfYZYQx{~Hd2i6vV|X7W{xL+N#|bte zql&A%91U0)Va{9U zr?@4WtlbW^&bNTWh56dSs0wc*dHp%Y$j&D~+F5(d?QrXAcMm;%(0bZi`yG-WAKg;H zI%H?M3KoOTX?^VrrTul(q10)EHf$X=T*o?Ken;8?;EGtFoo7hoedeIUDQSmo%tAJJ zgLG_j!--Lc_t0zHMtW#1t)uaJq&9S@2inmQ8@>fh7#-cRl_l|GgT=~Qs)OxMZPR#_ z)hw^(D{p&yJBn7zB64i~kN-J2IRQYPR3dK@ zV#wqp<50Hv6^SA2|MeNWn|>|ompp^x()|p)luY{zzAx&kPM0b#WLi@3J*%HrvqM_3 zf{RJ>(nLE&E>%{S&8|fw?RlGM^!@k**PaNYarn|lu*8h!R9phuBeZeTu%K{L0q$$20h{#nh3nI3Y F{0kEVG=2a8 literal 0 HcmV?d00001 diff --git a/utils/attachment_handler.py b/utils/attachment_handler.py new file mode 100644 index 0000000..80679cb --- /dev/null +++ b/utils/attachment_handler.py @@ -0,0 +1,33 @@ +# utils/attachment_handler.py +import os +import hashlib +import re + +def guardar_adjunto(parte, dir_adjuntos): + nombre = parte.get_filename() + if not nombre: + return None + + nombre = re.sub(r'[<>:"/\\|?*]', '_', nombre) + ruta = os.path.join(dir_adjuntos, nombre) + + if os.path.exists(ruta): + contenido_nuevo = parte.get_payload(decode=True) + hash_nuevo = hashlib.md5(contenido_nuevo).hexdigest() + + with open(ruta, 'rb') as f: + hash_existente = hashlib.md5(f.read()).hexdigest() + + if hash_nuevo == hash_existente: + return ruta + + base, ext = os.path.splitext(nombre) + i = 1 + while os.path.exists(ruta): + ruta = os.path.join(dir_adjuntos, f"{base}_{i}{ext}") + i += 1 + + with open(ruta, 'wb') as f: + f.write(parte.get_payload(decode=True)) + + return ruta diff --git a/utils/email_parser.py b/utils/email_parser.py new file mode 100644 index 0000000..90798b3 --- /dev/null +++ b/utils/email_parser.py @@ -0,0 +1,134 @@ +# utils/email_parser.py +import email +from email import policy +from email.parser import BytesParser +from datetime import datetime +import re +from pathlib import Path +from bs4 import BeautifulSoup +from email.utils import parsedate_to_datetime +from models.mensaje_email import MensajeEmail +from utils.attachment_handler import guardar_adjunto + +def _html_a_markdown(html): + soup = BeautifulSoup(html, 'html.parser') + + # Convert tables, keeping all newlines + for table in soup.find_all('table'): + rows = table.find_all('tr') + + if rows: + markdown_table = [] + # Get maximum width for each column + max_widths = [] + for row in rows: + cells = row.find_all(['th', 'td']) + while len(max_widths) < len(cells): + max_widths.append(0) + for i, cell in enumerate(cells): + max_widths[i] = max(max_widths[i], len(cell.get_text().strip())) + + # Build table rows + header_row = rows[0].find_all(['th', 'td']) + header = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i]) + for i, cell in enumerate(header_row)) + ' |' + separator = '|' + '|'.join('-' * (width + 2) for width in max_widths) + '|' + + markdown_table.append(header) + markdown_table.append(separator) + + for row in rows[1:]: + cells = row.find_all(['td', 'th']) + row_text = '| ' + ' | '.join(cell.get_text().strip().ljust(max_widths[i]) + for i, cell in enumerate(cells)) + ' |' + markdown_table.append(row_text) + + # Join with newlines and replace + new_text = '\n' + '\n'.join(markdown_table) + table.replace_with(soup.new_string(new_text)) + + # Handle basic HTML elements + for br in soup.find_all('br'): + br.replace_with('\n') + + # Get text content + text = soup.get_text() + + # Only extract subject and remove basic email headers + lines = text.split('\n') + cleaned_lines = [] + subject = None + + for line in lines: + # Extract subject if present + if line.startswith('Oggetto: '): + subject = line[9:].strip() + continue + + # Skip only the most basic email headers + if line.startswith(('Da: ', 'Inviato: ', 'A: ', 'From: ', 'Sent: ', 'To: ')): + continue + + # Keep the line as is, with all its spacing + cleaned_lines.append(line) + + # Join lines preserving all newlines + text = '\n'.join(cleaned_lines) + + return subject, text + +def procesar_eml(ruta_archivo, dir_adjuntos): + with open(ruta_archivo, 'rb') as eml: + mensaje = BytesParser(policy=policy.default).parse(eml) + + remitente = mensaje.get('from', '') + fecha_str = mensaje.get('date', '') + fecha = _parsear_fecha(fecha_str) + + contenido = "" + subject = None + adjuntos = [] + + if mensaje.is_multipart(): + for parte in mensaje.walk(): + if parte.get_content_type() == "text/plain": + text = parte.get_payload(decode=True).decode(parte.get_content_charset() or 'utf-8', errors='ignore') + contenido += text + elif parte.get_content_type() == "text/html": + html_content = parte.get_payload(decode=True).decode(parte.get_content_charset() or 'utf-8', errors='ignore') + part_subject, text = _html_a_markdown(html_content) + if part_subject and not subject: + subject = part_subject + contenido += text + elif parte.get_content_disposition() == 'attachment': + ruta_adjunto = guardar_adjunto(parte, dir_adjuntos) + if ruta_adjunto: + adjuntos.append(Path(ruta_adjunto).name) + else: + if mensaje.get_content_type() == "text/html": + html_content = mensaje.get_payload(decode=True).decode(mensaje.get_content_charset() or 'utf-8', errors='ignore') + subject, contenido = _html_a_markdown(html_content) + else: + contenido = mensaje.get_payload(decode=True).decode(mensaje.get_content_charset() or 'utf-8', errors='ignore') + + return [MensajeEmail(remitente=remitente, fecha=fecha, contenido=contenido, subject=subject, adjuntos=adjuntos)] + +def _parsear_fecha(fecha_str): + try: + fecha = parsedate_to_datetime(fecha_str) + return fecha.replace(tzinfo=None) # Remove timezone info + except: + try: + fecha_match = re.search(r'venerd=EC (\d{1,2}) (\w+) (\d{4}) (\d{1,2}):(\d{2})', fecha_str) + if fecha_match: + dia, mes, año, hora, minuto = fecha_match.groups() + meses_it = { + 'gennaio': 1, 'febbraio': 2, 'marzo': 3, 'aprile': 4, + 'maggio': 5, 'giugno': 6, 'luglio': 7, 'agosto': 8, + 'settembre': 9, 'ottobre': 10, 'novembre': 11, 'dicembre': 12 + } + mes_num = meses_it.get(mes.lower(), 1) + return datetime(int(año), mes_num, int(dia), int(hora), int(minuto)) + except: + pass + return datetime.now() \ No newline at end of file diff --git a/utils/markdown_handler.py b/utils/markdown_handler.py new file mode 100644 index 0000000..2991e3d --- /dev/null +++ b/utils/markdown_handler.py @@ -0,0 +1,39 @@ +# utils/markdown_handler.py +import os +import re +from datetime import datetime +from models.mensaje_email import MensajeEmail + +def cargar_cronologia_existente(archivo): + mensajes = [] + if not os.path.exists(archivo): + return mensajes + + with open(archivo, 'r', encoding='utf-8') as f: + contenido = f.read() + + bloques = contenido.split('---\n\n') + for bloque in bloques: + if not bloque.strip(): + continue + + match = re.match(r'## (\d{14})\|(.*?)\n\n(.*)', bloque.strip(), re.DOTALL) + if match: + fecha_str, remitente, contenido = match.groups() + fecha = datetime.strptime(fecha_str, '%Y%m%d%H%M%S') + + adjuntos = [] + if '### Adjuntos' in contenido: + contenido_principal, lista_adjuntos = contenido.split('### Adjuntos') + adjuntos = [adj.strip()[2:-2] for adj in lista_adjuntos.strip().split('\n')] + contenido = contenido_principal.strip() + + mensajes.append(MensajeEmail( + remitente=remitente, + fecha=fecha, + contenido=contenido, + adjuntos=adjuntos + )) + + return mensajes +