feat(mcp-docugen): output Word (.docx) via Pandoc con reference Tielogic
Aggiunge la generazione di documenti Word coerenti con l'identità
visiva Tielogic, in parallelo al render PDF già esistente. Il flusso
completo è ora `bullet input → Markdown formattato → PDF e/o DOCX`
in una singola chiamata MCP.
- docx_renderer.py: subprocess Pandoc che legge il Markdown da stdin,
emette il binario .docx su stdout. Strippa il YAML frontmatter e i
blocchi `<style>` (presenti per il PDF, irrilevanti in DOCX) prima
della conversione.
- mcp_tools.py: nuovo tool `document_to_docx(markdown)` che ritorna
`{docx_b64, size_bytes}`; `document_generate` esteso con
`output_format ∈ {md, pdf, docx, all}`. La firma di
`build_mcp_server` accetta ora `docx_reference_path` opzionale.
- config.py: `Settings.docx_reference_path` (default
/app/themes/tielogic-reference.docx).
- main.py: passa la nuova setting a `build_mcp_server`.
- mcp-docugen.Dockerfile: installazione di pandoc accanto alle libs
Chromium.
- themes/tielogic-reference.docx: reference Word (10 KB) con stili
Tielogic — heading colors blu/dark, font Inter, dimensioni allineate
al CSS web. Generato da `scripts/build-reference-docx.py` che parte
dal reference.docx di default di Pandoc e riscrive `word/styles.xml`
con regex sui blocchi `<w:style>`. Pandoc lo applica in automatico
agli output DOCX prodotti dal servizio.
- 9 nuovi test unit per docx_renderer (strip frontmatter/style,
preprocess combinato, error empty input, smoke skippato in
ambienti senza Pandoc): 92 test totali.
Smoke E2E via MCP: una sola chiamata `document_generate` con
`output_format=all` produce MD (14 KB), PDF (137 KB, 4 pagine A4) e
DOCX (12.7 KB) coerenti tra loro.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Executable
+164
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Costruisce themes/tielogic-reference.docx applicando l'identità Tielogic
|
||||
al reference.docx di default di Pandoc.
|
||||
|
||||
Pandoc usa il reference.docx come template di stili (Heading1, Heading2,
|
||||
Normal, Table, ...) per l'output `-t docx`. Questo script:
|
||||
|
||||
1. estrae il reference.docx di default di Pandoc
|
||||
2. modifica word/styles.xml: font Inter, colori Tielogic blu (#2767d8),
|
||||
dimensioni e attributi paragrafo coerenti col theme CSS
|
||||
3. modifica word/header*.xml e word/footer*.xml con testo Tielogic generico
|
||||
4. riscrive lo zip in themes/tielogic-reference.docx
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
DEFAULT_OUTPUT = REPO_ROOT / "themes" / "tielogic-reference.docx"
|
||||
|
||||
TIELOGIC_BLUE = "2767D8"
|
||||
TIELOGIC_DARK = "0D1B2A"
|
||||
INTER = "Inter"
|
||||
INTER_MONO = "JetBrains Mono"
|
||||
|
||||
|
||||
def get_pandoc_default_reference() -> bytes:
|
||||
if shutil.which("pandoc") is None:
|
||||
sys.exit("pandoc not found in PATH; install pandoc and rerun")
|
||||
proc = subprocess.run(
|
||||
["pandoc", "--print-default-data-file", "reference.docx"],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
return proc.stdout
|
||||
|
||||
|
||||
def patch_styles_xml(xml: str) -> str:
|
||||
"""Apply Tielogic visual identity to styles.xml.
|
||||
|
||||
Targets the most visible styles: Heading1, Heading2, Heading3, Normal,
|
||||
plus the default Title/Subtitle. Operates with regex on the XML for
|
||||
minimal dependency footprint (no python-docx required).
|
||||
"""
|
||||
out = xml
|
||||
|
||||
# Override the run-property defaults globally where possible.
|
||||
# rFonts (font) — set ascii/hAnsi to Inter; cs to Inter; eastAsia kept.
|
||||
out = re.sub(
|
||||
r'<w:rFonts\s+[^/]*?/>',
|
||||
lambda m: _patch_rfonts(m.group(0)),
|
||||
out,
|
||||
)
|
||||
|
||||
# For each heading style, enforce color + bold + Inter.
|
||||
headings = {
|
||||
"Heading1": {"size": "44", "color": TIELOGIC_DARK, "caps": False, "bold": True},
|
||||
"Heading2": {"size": "28", "color": TIELOGIC_BLUE, "caps": True, "bold": True},
|
||||
"Heading3": {"size": "23", "color": TIELOGIC_DARK, "caps": False, "bold": True},
|
||||
"Title": {"size": "52", "color": TIELOGIC_DARK, "caps": True, "bold": True},
|
||||
"Subtitle": {"size": "26", "color": "5A6478", "caps": True, "bold": False},
|
||||
}
|
||||
for style_id, attrs in headings.items():
|
||||
out = _override_style(out, style_id, **attrs)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _patch_rfonts(tag: str) -> str:
|
||||
"""Replace ascii/hAnsi font names with Inter, preserve other attrs."""
|
||||
new = re.sub(r'w:ascii="[^"]*"', f'w:ascii="{INTER}"', tag)
|
||||
new = re.sub(r'w:hAnsi="[^"]*"', f'w:hAnsi="{INTER}"', new)
|
||||
new = re.sub(r'w:cs="[^"]*"', f'w:cs="{INTER}"', new)
|
||||
if 'w:ascii=' not in new:
|
||||
new = new.replace('/>', f' w:ascii="{INTER}" w:hAnsi="{INTER}"/>')
|
||||
return new
|
||||
|
||||
|
||||
def _override_style(xml: str, style_id: str, *, size: str, color: str, caps: bool, bold: bool) -> str:
|
||||
"""Inject explicit rPr override for a named style, replacing any existing
|
||||
color/sz/caps/font directive within that style's <w:rPr> block."""
|
||||
pattern = re.compile(
|
||||
r'(<w:style\s+[^>]*w:styleId="' + re.escape(style_id) + r'"[^>]*>)(.*?)(</w:style>)',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
def repl(m: re.Match[str]) -> str:
|
||||
head, body, tail = m.group(1), m.group(2), m.group(3)
|
||||
|
||||
rpr_block = (
|
||||
"<w:rPr>"
|
||||
f'<w:rFonts w:ascii="{INTER}" w:hAnsi="{INTER}" w:cs="{INTER}"/>'
|
||||
+ (f'<w:b/><w:bCs/>' if bold else "")
|
||||
+ (f'<w:caps/>' if caps else "")
|
||||
+ f'<w:color w:val="{color}"/>'
|
||||
f'<w:sz w:val="{size}"/>'
|
||||
f'<w:szCs w:val="{size}"/>'
|
||||
"</w:rPr>"
|
||||
)
|
||||
|
||||
if "<w:rPr>" in body:
|
||||
body = re.sub(r"<w:rPr>.*?</w:rPr>", rpr_block, body, count=1, flags=re.DOTALL)
|
||||
else:
|
||||
# insert after pPr if present, else right after style head
|
||||
if "<w:pPr>" in body:
|
||||
body = re.sub(r"(</w:pPr>)", r"\1" + rpr_block, body, count=1)
|
||||
else:
|
||||
body = rpr_block + body
|
||||
return head + body + tail
|
||||
|
||||
return pattern.sub(repl, xml)
|
||||
|
||||
|
||||
def patch_header_footer_xml(name: str, xml: str) -> str:
|
||||
"""Generic Tielogic running header/footer text. We don't try to add
|
||||
field codes for page numbers in headers — Pandoc emits its own with
|
||||
PAGE field if the reference includes one; we leave that intact."""
|
||||
# Replace any pre-existing visible body text inside <w:t>...</w:t>
|
||||
# with Tielogic placeholders. Keep formatting nodes.
|
||||
if "header" in name:
|
||||
replacement = "Tielogic — Soluzioni Software Industriali"
|
||||
else:
|
||||
replacement = "Tielogic — Soluzioni Software Industriali"
|
||||
return re.sub(r"<w:t[^>]*>[^<]*</w:t>", f"<w:t>{replacement}</w:t>", xml)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
||||
args = parser.parse_args()
|
||||
output = args.output.resolve()
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
src_bytes = get_pandoc_default_reference()
|
||||
|
||||
out_buf = io.BytesIO()
|
||||
with zipfile.ZipFile(io.BytesIO(src_bytes), "r") as zin:
|
||||
with zipfile.ZipFile(out_buf, "w", zipfile.ZIP_DEFLATED) as zout:
|
||||
for item in zin.infolist():
|
||||
data = zin.read(item.filename)
|
||||
if item.filename == "word/styles.xml":
|
||||
text = data.decode("utf-8")
|
||||
text = patch_styles_xml(text)
|
||||
data = text.encode("utf-8")
|
||||
elif re.match(r"word/(header|footer)\d*\.xml$", item.filename):
|
||||
text = data.decode("utf-8")
|
||||
text = patch_header_footer_xml(item.filename, text)
|
||||
data = text.encode("utf-8")
|
||||
zout.writestr(item, data)
|
||||
|
||||
output.write_bytes(out_buf.getvalue())
|
||||
print(f"OK: {output} ({output.stat().st_size} bytes)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user