From c783fff0409b8688c05745e633c26109776cd74b Mon Sep 17 00:00:00 2001 From: AdrianoDev Date: Sun, 26 Apr 2026 11:13:11 +0200 Subject: [PATCH] feat(mcp-docugen): output Word (.docx) via Pandoc con reference Tielogic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aggiunge la generazione di documenti Word coerenti con l'identità visiva Tielogic, in parallelo al render PDF già esistente. Il flusso completo è ora `bullet input → Markdown formattato → PDF e/o DOCX` in una singola chiamata MCP. - docx_renderer.py: subprocess Pandoc che legge il Markdown da stdin, emette il binario .docx su stdout. Strippa il YAML frontmatter e i blocchi `", re.DOTALL | re.IGNORECASE) +_FRONTMATTER_DELIM = "---" + + +class DocxRenderError(Exception): + pass + + +@dataclass(frozen=True) +class DocxRenderResult: + docx_bytes: bytes + size_bytes: int + + +def _strip_style_blocks(markdown_text: str) -> str: + """Remove `` blocks: they're meaningless in DOCX and + Pandoc would otherwise embed them as raw text.""" + return _STYLE_BLOCK_RE.sub("", markdown_text) + + +def _strip_frontmatter(markdown_text: str) -> str: + """Remove the YAML frontmatter so it doesn't appear as a body table in + the DOCX. Frontmatter values were meant for the PDF renderer.""" + if not markdown_text.startswith(_FRONTMATTER_DELIM): + return markdown_text + end_marker = f"\n{_FRONTMATTER_DELIM}\n" + idx = markdown_text.find(end_marker, len(_FRONTMATTER_DELIM)) + if idx == -1: + return markdown_text + return markdown_text[idx + len(end_marker) :].lstrip() + + +def _preprocess(markdown_text: str) -> str: + return _strip_style_blocks(_strip_frontmatter(markdown_text)) + + +async def render_markdown_to_docx( + markdown_text: str, reference_doc: Path | None = None +) -> DocxRenderResult: + """Convert Markdown to a DOCX file via Pandoc subprocess. + + Pandoc reads from stdin and writes the binary DOCX on stdout, so no + intermediate temp file is needed. The optional `reference_doc` is a + `.docx` whose styles (heading colors, fonts, header/footer, page size) + Pandoc will inherit — this is the path to add Tielogic branding to the + Word output later. + """ + if not markdown_text.strip(): + raise DocxRenderError("empty markdown input") + + cleaned = _preprocess(markdown_text) + if not cleaned.strip(): + raise DocxRenderError("nothing to render after stripping frontmatter/style") + + args = [ + "pandoc", + "-f", + "markdown+raw_html-implicit_figures", + "-t", + "docx", + "-o", + "-", + ] + if reference_doc is not None and reference_doc.is_file(): + args[5:5] = ["--reference-doc", str(reference_doc)] + + proc = await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate(cleaned.encode("utf-8")) + if proc.returncode != 0: + raise DocxRenderError( + f"pandoc exit {proc.returncode}: {stderr.decode('utf-8', errors='replace')}" + ) + + return DocxRenderResult(docx_bytes=stdout, size_bytes=len(stdout)) diff --git a/services/mcp-docugen/src/mcp_docugen/main.py b/services/mcp-docugen/src/mcp_docugen/main.py index b032079..b5f5ce9 100644 --- a/services/mcp-docugen/src/mcp_docugen/main.py +++ b/services/mcp-docugen/src/mcp_docugen/main.py @@ -51,7 +51,9 @@ async def build_app(settings: Settings | None = None) -> FastAPI: inline_stylesheet_path=settings.inline_stylesheet_path, ) - mcp = build_mcp_server(template_store, renderer) + mcp = build_mcp_server( + template_store, renderer, docx_reference_path=settings.docx_reference_path + ) mcp_asgi = mcp.streamable_http_app() @asynccontextmanager diff --git a/services/mcp-docugen/src/mcp_docugen/mcp_tools.py b/services/mcp-docugen/src/mcp_docugen/mcp_tools.py index 562b8b7..c3b9899 100644 --- a/services/mcp-docugen/src/mcp_docugen/mcp_tools.py +++ b/services/mcp-docugen/src/mcp_docugen/mcp_tools.py @@ -1,10 +1,12 @@ from __future__ import annotations import base64 +from pathlib import Path from typing import Literal from mcp.server.fastmcp import FastMCP +from mcp_docugen.docx_renderer import render_markdown_to_docx from mcp_docugen.models import TemplateFrontmatter from mcp_docugen.pdf_renderer import render_markdown_to_pdf from mcp_docugen.renderer import Renderer @@ -12,9 +14,16 @@ from mcp_docugen.template_store import TemplateStore def build_mcp_server( - template_store: TemplateStore, renderer: Renderer + template_store: TemplateStore, + renderer: Renderer, + docx_reference_path: Path | None = None, ) -> FastMCP: mcp = FastMCP("mcp-docugen") + docx_ref = ( + docx_reference_path + if docx_reference_path is not None and docx_reference_path.is_file() + else None + ) @mcp.tool() async def template_create( @@ -74,14 +83,15 @@ def build_mcp_server( content_md: str, variables: dict, instructions: str | None = None, - output_format: Literal["md", "pdf", "both"] = "md", + output_format: Literal["md", "pdf", "docx", "all"] = "md", ) -> dict: """Generate a document from a template, content, and variables. output_format: - - "md" → returns the generated Markdown only (default) - - "pdf" → also renders the Markdown to PDF (base64 encoded) - - "both" → same as "pdf" (kept for symmetry; PDF includes the MD) + - "md" → returns the generated Markdown only (default) + - "pdf" → also renders the Markdown to PDF (base64 encoded) + - "docx" → also renders the Markdown to a Word DOCX (base64) + - "all" → emits both PDF and DOCX alongside the Markdown """ result = await renderer.generate( template_name=template_name, @@ -91,11 +101,21 @@ def build_mcp_server( ) out = result.model_dump(mode="json") - if output_format in ("pdf", "both"): + want_pdf = output_format in ("pdf", "all") + want_docx = output_format in ("docx", "all") + + if want_pdf: pdf = await render_markdown_to_pdf(result.markdown) out["pdf_b64"] = base64.b64encode(pdf.pdf_bytes).decode("ascii") out["pdf_size_bytes"] = pdf.size_bytes + if want_docx: + docx = await render_markdown_to_docx( + result.markdown, reference_doc=docx_ref + ) + out["docx_b64"] = base64.b64encode(docx.docx_bytes).decode("ascii") + out["docx_size_bytes"] = docx.size_bytes + return out @mcp.tool() @@ -112,6 +132,22 @@ def build_mcp_server( "size_bytes": pdf.size_bytes, } + @mcp.tool() + async def document_to_docx(markdown: str) -> dict: + """Convert an arbitrary Markdown document into a Word DOCX file. + + YAML frontmatter and inline `\nPost" + assert "\n" + "After" + ) + cleaned = _strip_style_blocks(md) + assert "body { color: red; }\n\n" + "# Body\n\nContent.\n" + ) + out = _preprocess(md) + assert "foo: bar" not in out + assert "\n\n" + "# Hello\n\n" + "| A | B |\n|---|---|\n| 1 | 2 |\n\n" + "**bold** and *italic*.\n" + ) + result = await render_markdown_to_docx(md) + # DOCX is a ZIP archive; signature: PK\x03\x04 + assert result.docx_bytes.startswith(b"PK\x03\x04") + assert result.size_bytes > 1000 + + +async def test_render_empty_markdown_raises(): + with pytest.raises(DocxRenderError): + await render_markdown_to_docx("") + with pytest.raises(DocxRenderError): + await render_markdown_to_docx(" \n\n ") + + +async def test_render_only_frontmatter_and_style_raises(): + md = "---\nfoo: bar\n---\n\n\n\n \n" + with pytest.raises(DocxRenderError): + await render_markdown_to_docx(md) diff --git a/themes/tielogic-reference.docx b/themes/tielogic-reference.docx new file mode 100644 index 0000000000000000000000000000000000000000..b2f9815819e6e0acead4974f0e794e79328015d3 GIT binary patch literal 10286 zcmZ`<1yEeuvc+8ncb7nLcXxLW?(P=cHMqOGJHZ`-2iM>hoFIV^{@mRAUT*mRPt}>J zIaRCc?C##X*IC_)GT;#CARr(xAaqh0?;I-!&YeL)Kq#R>K+u49bwup#Tukj;^i@3_ zOr3S<0<@z*5f1bUf7j4&7)!f5@zfa0o(p%lrr z5vQlOJ}{9{HfGZoAU@<(Nv&|=)WbwKr88G1`Oxd_LbGRX%3^v!lN9KoDYuBb#AZRa zL(vHv5nxyBEm{Y-e4{xJ{((xfxIa1@Zr=}T(9s7LuQ1fw6uVorwy7Ey&4`3$Sywed-Is-Wwc0wzO7IPxQ6?W?CB zFjI*9CF_HW*3Mw%G7qJN%}mQ-6I6qzbYMvUx%3J8d(ZN+<*8fc_mBMAC%#yQb&UNh zQpA%)tkYLR{8kyhlUeAPMq0hM44caCABlgg>tJz~>z;yE$bBM?8Vd)Hh5Kl1KU94C zBDq!Ph4FZiU@BP2(CsART(!w{(#BvAE>!FJBvE79_A#iXAcXl@yl6q~9z+R0ZF)r@ zYIq0mrLfpe_wuooqw71A;vX?86BC9w2gYU|3 z)fe)pmXAVbEIl@>O780{C~w}Z;-*L*hp8rg3oghvmq!jCy4#a0H!2_&u$|bf<8=-j zTgIA0(ObABILJk?h)J_LW8G`c3|Q@a!yQJj=2q7f?RH3g8G+J+D$L$aws?uzC~~h> z0|5`G@OAy2a@d`0Uwmc>?Q=Tx+b{A3H&pw9l*LNLUvH9sZt}l3+C{)?Vw!84F*lk& zx=WC0=;)t)F?Jcn&r-BQi;Zba_XhdXEubdy;wHe<{mU%?pj%ArjTN2j9h?~q9UT73 z+ic}=`2j{i=RHlqdW4K;g_AA`U)lng_GIN;iP$Z~!tUdmiSOe*U{UkT@(B&96coo=~PkQ<` z_>e?~yn>5AORlc#l+9M;%-$q)AT)Pta}1Y)WsER7lK(p;);D@cE{&ioPGMADdg)}x zrKqtDO34t7Y$XUkX7ciU

#&<-wNE{P$TOs6H5Q<0;zD?kNPwl%7rXnOj=V1#wDj zFQ4wHf9_}VO=CJodGM^^?BdKMY%k2Mvo1s+@D-b)+z@O@;@ei8sKb7Fa|QWlL{t*J z-cSJ}q5_Nv%D*FGZ0}_HS4fHzbghFJ(L*opX~^$!n_UR{N}@mL*)paX8@rQm*@{d; zOL4FH^;lG9&vb0SjNEg@9;T^Q7Fs0`Iq2bbiHfBZLa=fOpPZrk%zym`T79HDM7KTM zm&ta{cb_F7wpCuS1>TK7lvTrT2 zdV2M187`hH%IEnRCqZYW9Z*dy{1-PG`;9j!)rLI8Ms$3{J2Mgp zI(LH=b}b+HJ(58jO>$7JacXgW>LE8U^EOEw6&xx~v^j*r0>d=AccV2(j1|UOIZ3)) z3FFVHiA1%lJ3S@}ch50I8|R5j^C#{tV|srs#KNeuV+b4sB#7wm{BXB-GGPF&#?=;B zasFDyS^An&CLs#Jju=*xwUMXPc8wIp%nm?jei!NW#`tNc(=|n2H&i1@bR88` ziCNCPW!jV1H4EY?xL_}7h#Yl<7Yg069$w~+>DF*BqZDesgh=U{9Wp^=;HVu2^EUv_ zfhQ;>M=>aMzYjIoNF)d>3f5Kx(O`tCaUfoSCAF(CI0CI9p+H}>JbdzXZnWk_l;)gb zj112icvEB17>o-{Iy{vRMVF(0tRg_k8xpURm0wsS`!RDT&u7z0D}|~A(m8i}$gCR; zOK0c~e^0O8Z#q}RsCbSI&iqcKSHwtXin3RUbkKv1n_0bU&T@))_fw1 zIb_*cDfpJV7m9;PhAWWy0Dg{>#rk((&7AQ6n7 zjcr|4qALhq3vXIVd?-yGW>pKvnj+*dWFtS4q;N6~k1xjYq)eSo#RySU7Bn-x8}m0RhIxSssHFYexznuIzB?46#yd5(iio%iHOkF8nHTXXo<`Qeh(>9H?N+di4n}?7^3^X&MN84cCtkVL`8{>`n5eb`=71rL_QRdpsk4y@^}{adXXBDW>lTMkA^YLYk33>BLDsJH=E?651jqpl;eoP` z0U&G!8W`RZJ>p%y2e-8cY$R#Z`J9e!MGxocwqw)Bht*&H-1A=RlH8fVn$Qgk0)h%W zyngL@Ga$)SH8isMRTR#Wbrt?83X*c2&gh|u+siu=ZpR^09=e)1auzbH%9eKO92j(LxQ|kjS^b* z52+^Q3bh&&+2+uBkCvzrSTqavw$8M5c4}=?Vwh5r_bbPgw4&HX zTh{B3ai*;;gVahH8MSn}BEp>GEl;k>*ekAk6NAT_btIATRE4^UX4ZA+-2D!t=Jd@w z_|$gQoY0@j&LX~Y`s!E?S<2_G^4OH((BqV@zAq@grR75LVbtXvCIdO?b1*!(?c*(Y zc4w5}V?<6)WW?+H&Ncz9MJ28l(ull=*qb>oNE5e-V*9$t#Nm)&d+(vOp zEuY9BU8)}4tLiGQ$xok$934FJ|5`~a8TH>4^!mTy}|RpJeiyUF|}VNJKexWeIrYolSj zehKP0Z=j;PAcI_OS*sK;=r{>UI`K$c7=rVZ@OhAVb_XN}luySnz^$x4P$FpFs20*8 zB^lxooax6bvKQ859&*&p&4`D6EN-MLKTo}SUwA<*igj_ktao@}kR|YZ0zh9FOHFcx z^z)<&5g!>odrH*|(kQ}~8mwDMA-&+^r%Oty+>up>Ux7ENO%~9qF+)hzD!GYb-F`V5 z^yJ-sSjyR3`ms>tQJJCKiSUT%-GIFu6jvdYQvBR3Krp2-X|D)A^aUN2BzA~c!D!3iXfhVc^WEBFM?+*C zVR=%PmoSzb`Y(*?jTCN2I*!X61r3`9hd&B2^zU_^sp$_lsHuZM7A0z&dK;yyv5gR< zrP+fZC%q6B54B?or*CZXh8eM1(A8 zgx%(ukf(y+-?7>ZiR#g*B2EFA@9L-U6d6ppq>X%2(+p{Sj%TXsa%E^>yUZG5TprE4 z`qB}F<%c4CM`R3xuI|6q))tiQeTV+j6HK2rk3xXiY76tPY_)T>H8ORwv@`!JTZd96 zfiJcQBNtw9NlzJg98+XES^tAx&b^VA5b`o4!avZf7JJZF|Du&#k*eFqsPn zpVF6<$}6-@Msrd2l$7R8Ddce;SMBvxnzVYV=w)WF!IKT{h=t4cDMZYxGbx5EI8|~g zwKfz%yN&5QYYb@=9+bx$;$4N310|;%1SJDK^11UU)MQar5vARyCt3V;BiO3r(szX8s>)ThD$*}Zkqv;&?uGC*3V}_qfRo*WqLiy|9r~O+7 ziXhw>xI&HXAFN2=GwXZ8_UhX=bMNn{JG;5tZBXH-P%OZiEi%*8H1p+YzXTe&l|V=s z%Wa#jmkpwaY|Xp`cY$kcFSavB6l}U!NzV*oKw&sH4a&LHt>fxZ&(LcRUNH;DO(?y0 zqmlkvSpM=sd~XrD{0on0bV zP=xzLVvRRKf*kj2*chx*u%qR0E@Mo(PVMDnw7>q(Zb1V_};850o4L|u8VHHJu_YEn zKd@n*-}M&!#KL2;RPBJGaV4oGQmVb=+LJq{;Y@d3m`P|4d|t)+3L|JKs)(#%Y1m5DBn)GvRNYsn+Tjd3`{+b zR%}@GLmK34He(74)utSl=o%&q4lxN@IUP0vukJ92U0;}t;A7=n%n5n$>YZb=W0yll z=*4Z0>rJC<@4tb};x!avP5QNX7#6W2?zxk_z-3HeI|yOro~yA>jUA8580x}Nd`M;) zsJzMFU2D1ZGtzkD_e!@2!B?Hi0v^#L!0Rs|?$;6RZfd0RuQb<<;XwixQqiZZekTPR zDnR5lirf)tQ7@=$19^&#{a!O_%(SqO5ZBE&aTRuki$S<}+ED3pXFAAWPzk&e=}r5y zbMj1X8t`MIAPHn@CXzn&yudZ-DQdb9W!8x;#QiowgU7)mxgG2`&4&1?;oyM!(`;@F zAJxre;;*vd7j2G6t=8@+CmJ7?4NoTF%uiOFXSAV-^eTFIZt?!yv9}y1)z84)YR&@! zg8ZM=IJfeU?nz%`$+sgN(7s66BUg}k)6B}Q;$9#+`g{y%1}^e z!_8}%bOt<04x?HLdnKa97R^>ic0JAjS{$Wu^4`dYUohbLC&#!E=Evps0GYZgi$Ur@ z@pDPxC@E?tf}vd;BnuPUTERVHmuXpl<24CTN}@MH!8XGI#07~k0n!H7#SXwi0RVQjPc?) zZlr{UI@C$z#{?tEGfAr8?8+cG-~xhzDGbKejZ<3IKJgkl5^7PGQh8$3D20}JMoUtH zq~_M$z-(}tM%Qn&81D?65eTIbKW5Dx%t#Q$f9=mB#;S^kf6*J*A_=sUe{rU~Oa2+c=%PnU2mcB=~8EuUq1r2YmVUWfcX-(QQDwa!;r)#;I z1*TCp;uBV_H82Y>B6yT2uRjyg@diUaYpHBvQQXD!h)bCw!I34z++CfKLHs}}P&w1+ zZ|0VOT!1RRh$^4or%4Wh^S%si(4c=f9vhc#L|X{C-8bmbufa81HQbU{US|W(^O6R! zDe?+TR7)r!D~XPtiilIv(sW}8#p2lScT!c-jERAY&%q(>Vi83TckxS`EL^J;6u+<- zJ<5!q6(AE460bEZR`D@L>c+8~;JUdFR{zMFnlr&L$>ljBbA#=dkSVFJaBp5Xb&I$} zS(4?nxeSF=yToG!n+n=wPj&t{^7b4x4ru~Fm_<@0*!Dsrh)oUnIVQ-%8sz2yRSmY< zR|~S5i%^7R7N?gbRN>!rZ9Iij!~pj>9;c{~MH1t$yYHRi^fH9rdqS0ieV)ZBDg{4o z4$GDhfkrT2YVJLq+rCm3tbWp)wWN6v2^epa*_`UH&uU`vbdYMaHV;()>Y^%sz^{~R zC<`u|&wp7yczz}^G%9n0&+{0@sR!OM@g)8*H(c=+og)7Q9yOb7iwJkuM`q41)dkhe zR@Xah!8;?C;s%LPW)!mk#k4D!ZI(4tt}oE7E2NfF=wD*Iho%&m+fd4wFw4>%t6=0V$VxRjgUdbtdddL?o8_cd*+ViLC z8k+X@sTtXo=Z_c@D~S1?ooYI0A2=|c>D>JRV>YQ)@dx@T~ z!KHJsh-ix7JHApUz7{uujYS5t88<4O&LGC6Xbz*l<#QhKfth=R14?WZvk~ zQ-bt-d%;?n>=OLy^pA{G_x7l~*`M>Q0OunI5Z}!Q__7F5QX>jxo)9BwGMQRe{cTiK zY|f8t+1)|=PGM%pu&8e_WM(N%7Lr61C6Q%?eB(zvf_HVC6I1JvUCm-;3c+jr&U<7N zqa-J#?oF+V!QNjqxD5u;`@d(n-OT>c9Pmc3q9bVB<;lw+@UQrxsUfafjAh%xJadY%r6)4OqD^uj>Bg_+BhE@P$MJ5(EVQ zKb7+DUe3QY&|iI>=J|1DpeEwK?UPHI*acCs7^89|o_*Z^VF$d(p-pzoG(n}<-X2}L zY%U~|Ha!=gpE+Q%1&iQ2gs~2VN`@#C1u?u@ON0Ee#80Qjk~N!p&!CvXjv@BymciN1=Vxf=9)MT6=WmH%uqrhte=ypAZCcL30>CF;;6m1Kjsi`$X@|N#V z3GO8K$r26`M&GD3(5P6snF>oy$swkFrWUuOr=U&i-zf*@K}pGBOxl`;7QN5VbKpUX zrfb@Uzq|0manE5I&!fGW$;DYVJfGh4_S@BM@AHo)) zfurzEkf`F@Sr^8`20$oc7bTh%9E;h6Omrn7^#~)%R;eZ?~_<~<-xkFA35MR<&HWP$Zjtoe!#mf*# zpt?Pq6jQp~a!k**H>)sX&sb*?-KPywQvwsB$x?MQa5$46Ct$rT_?UM<-7~24U9w?B z9LiYQSHTjU43`l8aMrBYz3(zGCvTrS3cR_tP7YHP~y>znD9tgau)e@sw~s~2nMOnr@wuLF)?DG8&r zn0BV~HaMw(QQsVFY@LsS#Vzb`tw73|#5R6%kBPd^ARK^sy?tjw{T!JE&~v7&@S0mCuzfIUCWhSS%j*?1 z2d9_gvFBU`LSD}Hm&6D+#NhyWud9tiKfXtZAK&-Zu#kJINF1^+*J5*jK8|CtN#>K~ z`rSUt?n5`x5SWKk9;8RRylLE?-ylxFUO zCavnVpCS&RG(c7Z_Y4~wHW1az-mJv#B5Ek_rQEw^tRQx$qTu^95u>Yd(xfT7njFloUR7Y*C= zCuPO`C}6Bdarthv2s&Jm@e<`w{4*VUWtz9{T~Y?zGD%|@oGBO)c0LDjF}F)5;WrU% z3bPJJ{F5!@?*UbsFmC0HYeJRQlmz3!y>0wA?u@AB@2*-M0U1#gJ}jm;-Fn9}C^7mA zHDoK*+d39Y4LtlW2eUkjWvwQ((bmTGd`n zrlds;L=+@TuvlhX6iNwvkV+GiP_8vPPqzBn3>!MkdJiQQSoDO)&hgL5O9p zh2Q8$4((}bZ_GZ+m_DmW6v^vuemOc;j`FCa-zlZy^&8X`>D^G-0lHsPr=z}X_OpVT zi?_ntFDV)Y)MI8Iq!KFw^yA)yDt_#Kl)Tm`o~#ExIfGsnV0C`H3KzofZuA2`$@wy< zBtYdEw5S^#1UuPNFHd4DLz?>X&@P`WnkNXdZs3rNyUlV$`Nu!7k+nubAB^Hyd+v`` zC)7KO%H!vvw(}aalq1a68`KHBc*>|d(@Uvc9spZ4pg^(h|DOf|7L;Efe_%cRKWBqp z8+bjc@;4j=#2>T>XyRXkE3bjChdF)&JAhC4-;Vb`qaCl&uLlo)qve3TivObjGluXQ z{<^{b8(swDYQN$CZMVNR@Vc@6+kh>wHvg|S_iOm;9@%fWA>9AfF?)@F-I)1}r$zWL z{$KRxHU4$e;5U8-@gMxZTL`b=ua)-S@KvOL;D6}uuko)%-{1IH;9Soy@BLc>ehq)E zr~QUI1E(eaFLmv;h1VL^Zwo@ez47-0{8!O>4Sr4Qe}k>@{sI3#B>%Og*X;GTr77V1 w!mr2p+tUB$vae0OriH&v^#Y0FFH^5cqM{5WFjxN?r2>E;0Cx=n`CtG32iei%*8l(j literal 0 HcmV?d00001