From b9c3024e045543a61d175bfde9ee6c49294ed4d0 Mon Sep 17 00:00:00 2001 From: Miguel Date: Wed, 5 Feb 2025 12:58:41 +0100 Subject: [PATCH] Funcionando mejor --- config.json | 2 +- main.py | 21 +- .../__pycache__/mensaje_email.cpython-310.pyc | Bin 2632 -> 3051 bytes models/mensaje_email.py | 45 ++- .../__pycache__/email_parser.cpython-310.pyc | Bin 3865 -> 7200 bytes utils/email_parser.py | 362 ++++++++++++------ 6 files changed, 311 insertions(+), 119 deletions(-) diff --git a/config.json b/config.json index e8bedec..f9b1ad9 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "input_dir": "D:\\Proyectos\\Scripts\\EmailCrono", + "input_dir": "C:\\Trabajo\\VM\\40 - 93040 - HENKEL - NEXT2 Problem\\Reporte\\Emails", "output_dir": "C:\\Users\\migue\\OneDrive\\Miguel\\Obsidean\\Trabajo\\VM\\04-InLavoro\\HENKEL\\93040 - HENKEL - BowlingGreen\\Description\\HENKEL - ALPLA - AUTEFA - Batch Data", "cronologia_file": "cronologia.md", "attachments_dir": "adjuntos" diff --git a/main.py b/main.py index 84ae2f5..469ff41 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ from pathlib import Path from utils.email_parser import procesar_eml from utils.markdown_handler import cargar_cronologia_existente from config.config import Config +import hashlib def main(): config = Config() @@ -31,19 +32,35 @@ def main(): mensajes = [] print(f"Loaded {len(mensajes)} existing messages") mensajes_hash = {msg.hash for msg in mensajes} + + total_procesados = 0 + total_nuevos = 0 + mensajes_duplicados = 0 for archivo in eml_files: - print(f"Processing {archivo}") + print(f"\nProcessing {archivo}") nuevos_mensajes = procesar_eml(archivo, config.get_attachments_dir()) + total_procesados += len(nuevos_mensajes) + + # Verificar duplicados for msg in nuevos_mensajes: if msg.hash not in mensajes_hash: mensajes.append(msg) mensajes_hash.add(msg.hash) + total_nuevos += 1 + else: + mensajes_duplicados += 1 + print(f"\nEstadísticas de procesamiento:") + print(f"- Total mensajes encontrados: {total_procesados}") + print(f"- Mensajes únicos añadidos: {total_nuevos}") + print(f"- Mensajes duplicados ignorados: {mensajes_duplicados}") + + # Ordenar todos los mensajes por fecha mensajes.sort(key=lambda x: x.fecha) output_file = config.get_cronologia_file() - print(f"Writing to {output_file}") + print(f"\nWriting {len(mensajes)} messages to {output_file}") with open(output_file, 'w', encoding='utf-8') as f: for msg in mensajes: f.write(msg.to_markdown()) diff --git a/models/__pycache__/mensaje_email.cpython-310.pyc b/models/__pycache__/mensaje_email.cpython-310.pyc index fdaa767c42b6cccfa295d5080d464ed415cf7f06..e4a679143fc0fe235bde656de7cce2482cd5f854 100644 GIT binary patch delta 947 zcmYjP&ui2`6wdtEB%5v)TDO#1Thq!`mWnKjc<@l6f?gC#^`u0mER;22 z@lYuAW)5CFB=j$k-aL5e#ecv{KoC57R4>JYZ+1)R4D-#r_r7^E^S=37`!!PyY}?YH z{aioZemD84`T-9OFm)JfFv5=jF$yU|8!%@Y!#PUuww90>Khj>$Cp4xR%C!tLlB00` z!lF5n3ww4%GRjI!k7-VlQcSW^T*?vB_H2eTJ%&ArKH?Y*$2xHIW~^rhGvOed=%aOc zydKdkHZi6_1diR%8kO`mdANd|l>#|~YD=!RL~Gw|jur!KTW81~=q>T~?yckuTUNtN zkI392mb@Tgzg_5Vug3)|=x*SP!jQbzaXsEJ(l2D$+N^jT?h4Mf(jVl;(gGBNB8n_R zk%_NijH+~C%P5;vO=+4Pu=@6l->7=o=I0rimoeM8uywDj}bE70D z!7WV{OP?D}w3;3m-_b_unQOxmcrR4wfe~pLVhF0F?c$tdB-avj$wy@kB2*yIVH{P= zlH$RvN(zd^I7Uj113as3(}XZ1Ge;WGsGM1#-9)bCD68b!5Grf-kKkO(Id6}-;L>&a z!r5`d9q073@V&t4x!~g+r^`j??sBK?hAs;nE`aDdUeIm(!u9;q6X7uKcy6~Bgcm9I z`rPpu7m@$Q_gpc~=|ZY#$S))4?u<9@C-8!p3sXu+@|3Tnh26w}j z^NOP6lC}a>emS8)6~b&K5`pZx9e)7N-mLNtSJw~1G0BSfel*^YHp{iSSCBlM0;m}a#H1F& n$U^j=WloWFy}XVZ>2dkN^pa_twTda#A7~ik$J|eUmtXt`+?@1b delta 518 zcmX|8O=}ZD7@l`NcE)WsVy!mS#)5~!LP`sMTs$aLdgx87ml6*NvnDAvo6fEhbeEn4 z&kpLL2(ySkK=kg_KcL`(zrd>}z4%Td&hR|%^9(O9Gtc~7{<73eqbMY}UhTdc1dR{P zH}p6_D-hd=>V&Lo(A3Vvn!qQ zHx0vB+RyA_E9gxo$ynF?ARUcsYLc2|kNa_8|FCtt$=6m}_y`UhLdHSSRZ5|Ox`|s+ zJKzQ~hlo|;9}b2{tQX{5V|%-`<Tq8MG8_abn^%0fj=b@9 z;E!auHi}k~(GV5KhSZ&Uc4g8%G{XHLh*N(fxHRy?2pwnoBVTY|bbY=tie=Jcw1M)^ zL{1X8fG53)Wc*B~?P`0Pa6ZOquV|UJ!MM(iJ8Z%-#@R>{$EGd2`kppC$<2wLu?-{B zxy9`}`p4ReW@^&heuHTfNM+oT7OzM6fanqnRiqaO_(mKxg zLamLz4_9Rv#6dfV+CzUkN_^htL3kZWX2v7XEHrAx&0UV35|N{8dJ}Twz;_3;5P0- zW&FO9)ukI!_)5@zO6pn2xh;bmRQ7q2wt2V~2po5i7NW)xQLmoA8uWrv zf}~@Os~`4Bw8^>lxVkc0-N4bJ=)9#?L8-Db+6bts`ZgpOtgW4E=d5;_x;X3}Eb?t0+!K%}`Dg>jJE#cOUSYipz(B;g`iLMcDl z#aQ9@WOq7zO(cUZ&Gwa8>A??%?#~NeW6O)l{9Cv3b#_1O0CNY3QqyH{Qa#{T+NC(a`P$) z#k?3GtzM$OoGR=qzM$dOM@fvV_D+xnF;^lo-=Z4pV$eR<9uZz)RI+{l9=($4-oJ&? z6OZI2w5lWdGeFr7YZG=`!P_ptY>#~ewcOM>0P7yR+|dOczxZ7w9X;2Vzb#Iqc#nx^ zsagq9$*+384`{kE6mPtahLjFozjaC*pYEpX$tdF8^m;h#k|r#lkCL7rrEe@B3<3Ta ze!PvvXjX5g)EvTa8dfaRp#{}ivtOI@#p#-kdAuUd;O)qFA43J9ky3?ULNPL^0zlj5 z`igd!jh|N++2H1$K0zB@YHlBtD@9r2x^rhoC&=H`c~xEef1Bvzf5ZjXd2J8Sof+x3 zKh`!+_qB=H*M6*J24LXTiIrLCQRi-7M=x6%x1n^>$m|_VIwMm`N^ZDH>Of^jRZ9rJrHjg$pGk49bPP z1%btPy1ZU|=b;skmn*8TIkaL!&85D5^Orcs8qT)PZEo*YGn4kUhux8B+&m5Q>2Ci9 z=CcX&IU^fyDVLjt!#b=ZE}`!)I5{T;uPj-hP!MN{jty4J*FhGkj-TaYg1G2 z-eucY%iS(vwVX@N_BgtmEN=Xwz8F+GC@g9mN{DYlK3C zHNXBQ5`{G_m_(*bCFm3x!Clf@b^>yKggfH*(EXl~J7mYn0w@g1t!QJE%6o=*9?dxm zbF&vj(LLtX2L6riR>H=-cUZGSj~qUF^f9u2@eU^-u> zrO9sO21Jva8%Y>@ZaE1g1?8#5!B&^JMBEoG>OM=!93^C{l{r^H-Z|!(v@y?2!STfl zX!J~xY^7dvD#C7|mj-G*i*zCte@Jt@N`0$Tp|r@W6?UbIe#(mF)ii)=!zc^SL1m$9 z$7$%BlsrS@8@(v-+Cgr7(?1gJfzwli29EncM!j@=UPq4G z3T#{^D@O#Ev+KHe4P^r;YDzORrM07rZ)PTX)nN1VU0YT%D}!xPi2KxLUG;J{sv9-* ze68$Z9`vYZ3O%vb(K$c*t#iIxj5soH@c6t%AJ~uj_Gdt38}?&Xx?GnHvaVD2s}C!q zFxZ&bH$ld^%9R6Vj^9yG31@_E5a?d%XKjvxHXs>3SuYtR?b4Y91B|6mq&=ZrvB70E z!-M7^NK?Nbd`qnLUVZV!ar}kpEVIau0$!lR6 z#vtQyFCf01N;q^k;EnNop}h8@8vysa!aj}SIXUZl@E$-J$4s66){iKyF5H_ zmDAiA6wY+&Ik*kDL6~!ax*3DCFVy`}P;`Jt`C|9bF5sX}-R_@YR7wD*<4!n??t}w9 zb)d`o!=~P1`YpTW{9jRHo6Rqw_0WB~5z1xz2t$u(FT4TyJbTCAXtXJC0!6%|2wqI})yo=M669%E9{2iYQcNCntlUi2Wtpd($z}efpGC>?HW}C0XHu{wH2jl+p zVBC+V<0kd-dIrjh(NbbAtmtO0NM${9cNpkSW75p*G|3tQ&&1A}BF&nUO8g#1IT*E* zRnwhvBnYkebJcnttyQ%CwRAI(Vz?bMfVD<8Lr@x@#h#nka}7QI8RZ#3a4TzxUxCE7 zK&oarh`RXI6UWZTxpKzYY!;)P-Fdl?&F-*`V>CCY_V)ml$G86jsy?Tv_9Azsp3Oai zN)@SUOy+LRZGMldoD@8M`q7Dr|G^i*eNbod`JuC@;4D7JS>T!2v0d~UenB$IDj?W! zcvR|u953LD=j9Tvpp{`<=J4hnL|bT&8CX`4(q2KeUn+Ool8b!ix9yfjJ=$-lHNI3_ z$VaeVD*1sAf2O_d!^0=nKZRdkUW@ht)EBqXq*#R_OSwrwNAWIn`8(q(ao2AVdAT~S zsWMnp7>hqft5@@-^n2@AP>xNy;N#O@GX)A-v2@ff_eJKw%Bvzk{DBC3Zmk7~o8{F5 zhd?czJEbK`r!Z08PvU+DQ*}<{2CPZmEMjK}hN{U5680z9qasOOec)dY%3zMyDB_3Z zt1?XDJ?i>CB@{su#4H2>v-l8+w|LlK@h}}GDPkuG2fEa-7J@?h1AIu7Yy~2BV6s#= zNf3$)!USO>$xZ5Y2(2ix)tMLM8!3TVA@)U?DjO&+buSdKP5(#m2h17)d<2o5v z+;W6O5D^C@yu-*;76x?uxOR@leWD!uq@#jjl;RtdoTNHBu6?p>`!14>>s1gUi~TTx2NkTY3MyIf zVaACEA4=dRc=o{W_fc*4!x19$o*j+)5xx8UB$bH=heaarO1f4_5XPl~6DOY*RSpk( zs?@0oa(cyENI)5a+y;b!9OQOiz==vlk6Ov+6tw%?04?;4IN8F$kZM0c;?;|LH1K6_ z-7|RTdj{yDq>uLR|3d<|ED=7o9L6JT4Wuv)(rzd{B9&eEf{**T90QNRu}Dv&phI_? zaA9<%>vN!eq~G?JE$caE{;yr7dO;&PCxyZyI&jY|s5CAm_)9@;=mW$i68b4Yu1Ddj z0u0YsOldJx3d$kmDibBa&0_EBb^(2l0p!JLj>9Q54_@Ash*X2f%;T|1>WyG>%1g RTF$)FaIQE{JFfeK{|CGK7D503 delta 2339 zcmZ8j&2Jk;6rY)0+q>(vy|&XNPSPk_!T#*)1i5#LmQ7&t7%Be#92NfqiM7g0oaBi=NN{B{%Y$uKWNbOJ!D(dq#0QAG)r@^7AXD@m94G6HhUxB zqrs!V<4q8R1PT$wl=O)|;1#M4`NV+dSRk5QCq5Q9BsA5>=h2NvJ`n_VS|9t0P}omZ zqG*9!PP7AU~8|eI=E)Tl2P(xK1GQ60J1BGLX6x}B!u)Q&}o1sDVL5BN| z905dmWSqvn6U|->W=h!AZ>UPuP#3BU$j?AA4JdC~Wcm;y=%?ahABc;c^0Oiv8t^)# z<~Ptl9sHcgg{GAG78E~EkzWuNgf{6HMKR3K(LQE>iy|Ex1PygN2PUgeW92v~UjUzn zJc}{x(>RhcPAZ}BMM zaaQj{iN^Ap$3a6*&PKQ%sf%u-v5jq`>D;Px>y+2L?I5L`N$Yb}x8>gIu=%6GmGtMY zPQoE&!p2~U60sqZq5+wb+(976g6HYu7c9BWOHza=~=w=)$=4Dgm*Ln(I)PRamJ4sSDC19@C5TWh%kj=chY zkvgbgquz48U^6q`i&V)E(uot3IYm}Qo?US&O^2<}cDI!QsaFGH5)+%)AX&x0DpBFl za28wGz}AjMW-!?)s1{CQZHHiWXNnjEG(=kL$xQIIFu9vmOsvf*h5{g1149sELm}7% znMT-YIBij0h`o<|7jmAQgU9803Hg%d-xJoPl5$QFlgOfrz}LB1*bTvhM9bMkdQ}W^kACn_vM!fAboO;VP9L}A^TGMUuV0v`M zdR@9}4qaYr@wQhU2`(09Xrw^bMcE28f#*7`vg9t-9NVmWmF8N5*Fnr9wd*uiY)j6P z#deDWUWMQ8xOQ4fI?nAz+o9|Xpxed$HUI>eoShDS9G&Vh`E$sQNs^R-s4b`IvbfbP z+hl9psW@!0R{yMRo3zdNMmOu;JhRA%noZWx*ik)PQ4E1-Zda59>G&PrtG~z|H|fr zoe8Uo$qRDVR0sQMkbw>Kt%avaQg%J;WapAU&9pTQZ9h#$2)p$CgYg1R9tLoBDCn2Q z%x5X!pUGBbM}i+pU(6i^7GAp>CFP4*tVPKxgQ?~**~&1;+g+EjgyfZai`z=8-7OpJ zq69b_+?#m!z)SP_JZSEt;E#zDCnQ@g+6lMm)Ei9x)v(Q&KN(*G##D(SvDpW~>B;J~ rc&@NZQXqZzRc$!ZBW