diff --git a/README.md b/README.md
index a6465ea..2298636 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Due pezzi, stesso repo:
| Servizio | Stato | Funzione |
|---|---|---|
-| `mcp-docugen` | Implementato, 83 test verde, deploy Docker via gateway Caddy (porta 8090), **7 tool MCP** esposti (CRUD template + `document_generate` + `document_to_pdf`), template seed versionati, CSS Tielogic iniettato inline, **render PDF server-side** via Chromium/Playwright | Genera Markdown formale da template + LLM (OpenRouter) e converte in PDF. Vedi [`docs/mcp-docugen-design.md`](docs/mcp-docugen-design.md) + [`docs/mcp-docugen-implementation.md`](docs/mcp-docugen-implementation.md). |
+| `mcp-docugen` | Implementato, 92 test verde, deploy Docker via gateway Caddy (porta 8090), **8 tool MCP** esposti (CRUD template + `document_generate` + `document_to_pdf` + `document_to_docx`), template seed versionati, CSS Tielogic iniettato inline, render server-side **PDF** via Chromium/Playwright e **DOCX** via Pandoc con reference `tielogic-reference.docx` | Genera Markdown formale da template + LLM (OpenRouter) e converte in PDF o Word. Vedi [`docs/mcp-docugen-design.md`](docs/mcp-docugen-design.md) + [`docs/mcp-docugen-implementation.md`](docs/mcp-docugen-implementation.md). |
| `mcp-convert` | Da progettare | Conversione Markdown → PDF / DOCX / HTML (pandoc/typst backend). |
| `mcp-inbox` | Da progettare | Ingest da Telegram (+ STT opzionale via Whisper) verso draft inbox consumati da Claude Code desktop. |
@@ -96,6 +96,8 @@ Conversione Markdown→PDF: tre strade, in ordine di comodità.
Il CSS Tielogic non viene mai referenziato come path esterno nel Markdown prodotto dal servizio: il `Renderer` lo legge da `themes/tielogic.css` (copiato nell'immagine Docker in `/app/themes/`) e lo inietta come blocco `", re.DOTALL | re.IGNORECASE)
+_FRONTMATTER_DELIM = "---"
+
+
+class DocxRenderError(Exception):
+ pass
+
+
+@dataclass(frozen=True)
+class DocxRenderResult:
+ docx_bytes: bytes
+ size_bytes: int
+
+
+def _strip_style_blocks(markdown_text: str) -> str:
+ """Remove `` blocks: they're meaningless in DOCX and
+ Pandoc would otherwise embed them as raw text."""
+ return _STYLE_BLOCK_RE.sub("", markdown_text)
+
+
+def _strip_frontmatter(markdown_text: str) -> str:
+ """Remove the YAML frontmatter so it doesn't appear as a body table in
+ the DOCX. Frontmatter values were meant for the PDF renderer."""
+ if not markdown_text.startswith(_FRONTMATTER_DELIM):
+ return markdown_text
+ end_marker = f"\n{_FRONTMATTER_DELIM}\n"
+ idx = markdown_text.find(end_marker, len(_FRONTMATTER_DELIM))
+ if idx == -1:
+ return markdown_text
+ return markdown_text[idx + len(end_marker) :].lstrip()
+
+
+def _preprocess(markdown_text: str) -> str:
+ return _strip_style_blocks(_strip_frontmatter(markdown_text))
+
+
+async def render_markdown_to_docx(
+ markdown_text: str, reference_doc: Path | None = None
+) -> DocxRenderResult:
+ """Convert Markdown to a DOCX file via Pandoc subprocess.
+
+ Pandoc reads from stdin and writes the binary DOCX on stdout, so no
+ intermediate temp file is needed. The optional `reference_doc` is a
+ `.docx` whose styles (heading colors, fonts, header/footer, page size)
+ Pandoc will inherit — this is the path to add Tielogic branding to the
+ Word output later.
+ """
+ if not markdown_text.strip():
+ raise DocxRenderError("empty markdown input")
+
+ cleaned = _preprocess(markdown_text)
+ if not cleaned.strip():
+ raise DocxRenderError("nothing to render after stripping frontmatter/style")
+
+ args = [
+ "pandoc",
+ "-f",
+ "markdown+raw_html-implicit_figures",
+ "-t",
+ "docx",
+ "-o",
+ "-",
+ ]
+ if reference_doc is not None and reference_doc.is_file():
+ args[5:5] = ["--reference-doc", str(reference_doc)]
+
+ proc = await asyncio.create_subprocess_exec(
+ *args,
+ stdin=asyncio.subprocess.PIPE,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE,
+ )
+ stdout, stderr = await proc.communicate(cleaned.encode("utf-8"))
+ if proc.returncode != 0:
+ raise DocxRenderError(
+ f"pandoc exit {proc.returncode}: {stderr.decode('utf-8', errors='replace')}"
+ )
+
+ return DocxRenderResult(docx_bytes=stdout, size_bytes=len(stdout))
diff --git a/services/mcp-docugen/src/mcp_docugen/main.py b/services/mcp-docugen/src/mcp_docugen/main.py
index b032079..b5f5ce9 100644
--- a/services/mcp-docugen/src/mcp_docugen/main.py
+++ b/services/mcp-docugen/src/mcp_docugen/main.py
@@ -51,7 +51,9 @@ async def build_app(settings: Settings | None = None) -> FastAPI:
inline_stylesheet_path=settings.inline_stylesheet_path,
)
- mcp = build_mcp_server(template_store, renderer)
+ mcp = build_mcp_server(
+ template_store, renderer, docx_reference_path=settings.docx_reference_path
+ )
mcp_asgi = mcp.streamable_http_app()
@asynccontextmanager
diff --git a/services/mcp-docugen/src/mcp_docugen/mcp_tools.py b/services/mcp-docugen/src/mcp_docugen/mcp_tools.py
index 562b8b7..c3b9899 100644
--- a/services/mcp-docugen/src/mcp_docugen/mcp_tools.py
+++ b/services/mcp-docugen/src/mcp_docugen/mcp_tools.py
@@ -1,10 +1,12 @@
from __future__ import annotations
import base64
+from pathlib import Path
from typing import Literal
from mcp.server.fastmcp import FastMCP
+from mcp_docugen.docx_renderer import render_markdown_to_docx
from mcp_docugen.models import TemplateFrontmatter
from mcp_docugen.pdf_renderer import render_markdown_to_pdf
from mcp_docugen.renderer import Renderer
@@ -12,9 +14,16 @@ from mcp_docugen.template_store import TemplateStore
def build_mcp_server(
- template_store: TemplateStore, renderer: Renderer
+ template_store: TemplateStore,
+ renderer: Renderer,
+ docx_reference_path: Path | None = None,
) -> FastMCP:
mcp = FastMCP("mcp-docugen")
+ docx_ref = (
+ docx_reference_path
+ if docx_reference_path is not None and docx_reference_path.is_file()
+ else None
+ )
@mcp.tool()
async def template_create(
@@ -74,14 +83,15 @@ def build_mcp_server(
content_md: str,
variables: dict,
instructions: str | None = None,
- output_format: Literal["md", "pdf", "both"] = "md",
+ output_format: Literal["md", "pdf", "docx", "all"] = "md",
) -> dict:
"""Generate a document from a template, content, and variables.
output_format:
- - "md" → returns the generated Markdown only (default)
- - "pdf" → also renders the Markdown to PDF (base64 encoded)
- - "both" → same as "pdf" (kept for symmetry; PDF includes the MD)
+ - "md" → returns the generated Markdown only (default)
+ - "pdf" → also renders the Markdown to PDF (base64 encoded)
+ - "docx" → also renders the Markdown to a Word DOCX (base64)
+ - "all" → emits both PDF and DOCX alongside the Markdown
"""
result = await renderer.generate(
template_name=template_name,
@@ -91,11 +101,21 @@ def build_mcp_server(
)
out = result.model_dump(mode="json")
- if output_format in ("pdf", "both"):
+ want_pdf = output_format in ("pdf", "all")
+ want_docx = output_format in ("docx", "all")
+
+ if want_pdf:
pdf = await render_markdown_to_pdf(result.markdown)
out["pdf_b64"] = base64.b64encode(pdf.pdf_bytes).decode("ascii")
out["pdf_size_bytes"] = pdf.size_bytes
+ if want_docx:
+ docx = await render_markdown_to_docx(
+ result.markdown, reference_doc=docx_ref
+ )
+ out["docx_b64"] = base64.b64encode(docx.docx_bytes).decode("ascii")
+ out["docx_size_bytes"] = docx.size_bytes
+
return out
@mcp.tool()
@@ -112,6 +132,22 @@ def build_mcp_server(
"size_bytes": pdf.size_bytes,
}
+ @mcp.tool()
+ async def document_to_docx(markdown: str) -> dict:
+ """Convert an arbitrary Markdown document into a Word DOCX file.
+
+ YAML frontmatter and inline `\nPost"
+ assert "\n"
+ "After"
+ )
+ cleaned = _strip_style_blocks(md)
+ assert "\n\n"
+ "# Body\n\nContent.\n"
+ )
+ out = _preprocess(md)
+ assert "foo: bar" not in out
+ assert "\n\n"
+ "# Hello\n\n"
+ "| A | B |\n|---|---|\n| 1 | 2 |\n\n"
+ "**bold** and *italic*.\n"
+ )
+ result = await render_markdown_to_docx(md)
+ # DOCX is a ZIP archive; signature: PK\x03\x04
+ assert result.docx_bytes.startswith(b"PK\x03\x04")
+ assert result.size_bytes > 1000
+
+
+async def test_render_empty_markdown_raises():
+ with pytest.raises(DocxRenderError):
+ await render_markdown_to_docx("")
+ with pytest.raises(DocxRenderError):
+ await render_markdown_to_docx(" \n\n ")
+
+
+async def test_render_only_frontmatter_and_style_raises():
+ md = "---\nfoo: bar\n---\n\n\n\n \n"
+ with pytest.raises(DocxRenderError):
+ await render_markdown_to_docx(md)
diff --git a/themes/tielogic-reference.docx b/themes/tielogic-reference.docx
new file mode 100644
index 0000000..b2f9815
Binary files /dev/null and b/themes/tielogic-reference.docx differ