c783fff040
Aggiunge la generazione di documenti Word coerenti con l'identità
visiva Tielogic, in parallelo al render PDF già esistente. Il flusso
completo è ora `bullet input → Markdown formattato → PDF e/o DOCX`
in una singola chiamata MCP.
- docx_renderer.py: subprocess Pandoc che legge il Markdown da stdin,
emette il binario .docx su stdout. Strippa il YAML frontmatter e i
blocchi `<style>` (presenti per il PDF, irrilevanti in DOCX) prima
della conversione.
- mcp_tools.py: nuovo tool `document_to_docx(markdown)` che ritorna
`{docx_b64, size_bytes}`; `document_generate` esteso con
`output_format ∈ {md, pdf, docx, all}`. La firma di
`build_mcp_server` accetta ora `docx_reference_path` opzionale.
- config.py: `Settings.docx_reference_path` (default
/app/themes/tielogic-reference.docx).
- main.py: passa la nuova setting a `build_mcp_server`.
- mcp-docugen.Dockerfile: installazione di pandoc accanto alle libs
Chromium.
- themes/tielogic-reference.docx: reference Word (10 KB) con stili
Tielogic — heading colors blu/dark, font Inter, dimensioni allineate
al CSS web. Generato da `scripts/build-reference-docx.py` che parte
dal reference.docx di default di Pandoc e riscrive `word/styles.xml`
con regex sui blocchi `<w:style>`. Pandoc lo applica in automatico
agli output DOCX prodotti dal servizio.
- 9 nuovi test unit per docx_renderer (strip frontmatter/style,
preprocess combinato, error empty input, smoke skippato in
ambienti senza Pandoc): 92 test totali.
Smoke E2E via MCP: una sola chiamata `document_generate` con
`output_format=all` produce MD (14 KB), PDF (137 KB, 4 pagine A4) e
DOCX (12.7 KB) coerenti tra loro.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
165 lines
6.0 KiB
Python
Executable File
165 lines
6.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Costruisce themes/tielogic-reference.docx applicando l'identità Tielogic
|
|
al reference.docx di default di Pandoc.
|
|
|
|
Pandoc usa il reference.docx come template di stili (Heading1, Heading2,
|
|
Normal, Table, ...) per l'output `-t docx`. Questo script:
|
|
|
|
1. estrae il reference.docx di default di Pandoc
|
|
2. modifica word/styles.xml: font Inter, colori Tielogic blu (#2767d8),
|
|
dimensioni e attributi paragrafo coerenti col theme CSS
|
|
3. modifica word/header*.xml e word/footer*.xml con testo Tielogic generico
|
|
4. riscrive lo zip in themes/tielogic-reference.docx
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import io
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
DEFAULT_OUTPUT = REPO_ROOT / "themes" / "tielogic-reference.docx"
|
|
|
|
TIELOGIC_BLUE = "2767D8"
|
|
TIELOGIC_DARK = "0D1B2A"
|
|
INTER = "Inter"
|
|
INTER_MONO = "JetBrains Mono"
|
|
|
|
|
|
def get_pandoc_default_reference() -> bytes:
|
|
if shutil.which("pandoc") is None:
|
|
sys.exit("pandoc not found in PATH; install pandoc and rerun")
|
|
proc = subprocess.run(
|
|
["pandoc", "--print-default-data-file", "reference.docx"],
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
return proc.stdout
|
|
|
|
|
|
def patch_styles_xml(xml: str) -> str:
|
|
"""Apply Tielogic visual identity to styles.xml.
|
|
|
|
Targets the most visible styles: Heading1, Heading2, Heading3, Normal,
|
|
plus the default Title/Subtitle. Operates with regex on the XML for
|
|
minimal dependency footprint (no python-docx required).
|
|
"""
|
|
out = xml
|
|
|
|
# Override the run-property defaults globally where possible.
|
|
# rFonts (font) — set ascii/hAnsi to Inter; cs to Inter; eastAsia kept.
|
|
out = re.sub(
|
|
r'<w:rFonts\s+[^/]*?/>',
|
|
lambda m: _patch_rfonts(m.group(0)),
|
|
out,
|
|
)
|
|
|
|
# For each heading style, enforce color + bold + Inter.
|
|
headings = {
|
|
"Heading1": {"size": "44", "color": TIELOGIC_DARK, "caps": False, "bold": True},
|
|
"Heading2": {"size": "28", "color": TIELOGIC_BLUE, "caps": True, "bold": True},
|
|
"Heading3": {"size": "23", "color": TIELOGIC_DARK, "caps": False, "bold": True},
|
|
"Title": {"size": "52", "color": TIELOGIC_DARK, "caps": True, "bold": True},
|
|
"Subtitle": {"size": "26", "color": "5A6478", "caps": True, "bold": False},
|
|
}
|
|
for style_id, attrs in headings.items():
|
|
out = _override_style(out, style_id, **attrs)
|
|
|
|
return out
|
|
|
|
|
|
def _patch_rfonts(tag: str) -> str:
|
|
"""Replace ascii/hAnsi font names with Inter, preserve other attrs."""
|
|
new = re.sub(r'w:ascii="[^"]*"', f'w:ascii="{INTER}"', tag)
|
|
new = re.sub(r'w:hAnsi="[^"]*"', f'w:hAnsi="{INTER}"', new)
|
|
new = re.sub(r'w:cs="[^"]*"', f'w:cs="{INTER}"', new)
|
|
if 'w:ascii=' not in new:
|
|
new = new.replace('/>', f' w:ascii="{INTER}" w:hAnsi="{INTER}"/>')
|
|
return new
|
|
|
|
|
|
def _override_style(xml: str, style_id: str, *, size: str, color: str, caps: bool, bold: bool) -> str:
|
|
"""Inject explicit rPr override for a named style, replacing any existing
|
|
color/sz/caps/font directive within that style's <w:rPr> block."""
|
|
pattern = re.compile(
|
|
r'(<w:style\s+[^>]*w:styleId="' + re.escape(style_id) + r'"[^>]*>)(.*?)(</w:style>)',
|
|
re.DOTALL,
|
|
)
|
|
|
|
def repl(m: re.Match[str]) -> str:
|
|
head, body, tail = m.group(1), m.group(2), m.group(3)
|
|
|
|
rpr_block = (
|
|
"<w:rPr>"
|
|
f'<w:rFonts w:ascii="{INTER}" w:hAnsi="{INTER}" w:cs="{INTER}"/>'
|
|
+ (f'<w:b/><w:bCs/>' if bold else "")
|
|
+ (f'<w:caps/>' if caps else "")
|
|
+ f'<w:color w:val="{color}"/>'
|
|
f'<w:sz w:val="{size}"/>'
|
|
f'<w:szCs w:val="{size}"/>'
|
|
"</w:rPr>"
|
|
)
|
|
|
|
if "<w:rPr>" in body:
|
|
body = re.sub(r"<w:rPr>.*?</w:rPr>", rpr_block, body, count=1, flags=re.DOTALL)
|
|
else:
|
|
# insert after pPr if present, else right after style head
|
|
if "<w:pPr>" in body:
|
|
body = re.sub(r"(</w:pPr>)", r"\1" + rpr_block, body, count=1)
|
|
else:
|
|
body = rpr_block + body
|
|
return head + body + tail
|
|
|
|
return pattern.sub(repl, xml)
|
|
|
|
|
|
def patch_header_footer_xml(name: str, xml: str) -> str:
|
|
"""Generic Tielogic running header/footer text. We don't try to add
|
|
field codes for page numbers in headers — Pandoc emits its own with
|
|
PAGE field if the reference includes one; we leave that intact."""
|
|
# Replace any pre-existing visible body text inside <w:t>...</w:t>
|
|
# with Tielogic placeholders. Keep formatting nodes.
|
|
if "header" in name:
|
|
replacement = "Tielogic — Soluzioni Software Industriali"
|
|
else:
|
|
replacement = "Tielogic — Soluzioni Software Industriali"
|
|
return re.sub(r"<w:t[^>]*>[^<]*</w:t>", f"<w:t>{replacement}</w:t>", xml)
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
|
|
args = parser.parse_args()
|
|
output = args.output.resolve()
|
|
|
|
output.parent.mkdir(parents=True, exist_ok=True)
|
|
src_bytes = get_pandoc_default_reference()
|
|
|
|
out_buf = io.BytesIO()
|
|
with zipfile.ZipFile(io.BytesIO(src_bytes), "r") as zin:
|
|
with zipfile.ZipFile(out_buf, "w", zipfile.ZIP_DEFLATED) as zout:
|
|
for item in zin.infolist():
|
|
data = zin.read(item.filename)
|
|
if item.filename == "word/styles.xml":
|
|
text = data.decode("utf-8")
|
|
text = patch_styles_xml(text)
|
|
data = text.encode("utf-8")
|
|
elif re.match(r"word/(header|footer)\d*\.xml$", item.filename):
|
|
text = data.decode("utf-8")
|
|
text = patch_header_footer_xml(item.filename, text)
|
|
data = text.encode("utf-8")
|
|
zout.writestr(item, data)
|
|
|
|
output.write_bytes(out_buf.getvalue())
|
|
print(f"OK: {output} ({output.stat().st_size} bytes)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|