ArcaSuite/scripts/build-reference-docx.py

#!/usr/bin/env python3
"""Costruisce themes/tielogic-reference.docx applicando l'identità Tielogic
al reference.docx di default di Pandoc.

Pandoc usa il reference.docx come template di stili (Heading1, Heading2,
Normal, Table, ...) per l'output `-t docx`. Questo script:

  1. estrae il reference.docx di default di Pandoc
  2. modifica word/styles.xml: font Inter, colori Tielogic blu (#2767d8),
     dimensioni e attributi paragrafo coerenti col theme CSS
  3. modifica word/header*.xml e word/footer*.xml con testo Tielogic generico
  4. riscrive lo zip in themes/tielogic-reference.docx
"""

from __future__ import annotations

import argparse
import io
import re
import shutil
import subprocess
import sys
import zipfile
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
DEFAULT_OUTPUT = REPO_ROOT / "themes" / "tielogic-reference.docx"

TIELOGIC_BLUE = "2767D8"
TIELOGIC_DARK = "0D1B2A"
INTER = "Inter"
INTER_MONO = "JetBrains Mono"


def get_pandoc_default_reference() -> bytes:
    if shutil.which("pandoc") is None:
        sys.exit("pandoc not found in PATH; install pandoc and rerun")
    proc = subprocess.run(
        ["pandoc", "--print-default-data-file", "reference.docx"],
        capture_output=True,
        check=True,
    )
    return proc.stdout


def patch_styles_xml(xml: str) -> str:
    """Apply Tielogic visual identity to styles.xml.

    Targets the most visible styles: Heading1, Heading2, Heading3, Normal,
    plus the default Title/Subtitle. Operates with regex on the XML for
    minimal dependency footprint (no python-docx required).
    """
    out = xml

    # Override the run-property defaults globally where possible.
    # rFonts (font) — set ascii/hAnsi to Inter; cs to Inter; eastAsia kept.
    out = re.sub(
        r'<w:rFonts\s+[^/]*?/>',
        lambda m: _patch_rfonts(m.group(0)),
        out,
    )

    # For each heading style, enforce color + bold + Inter.
    headings = {
        "Heading1": {"size": "44", "color": TIELOGIC_DARK, "caps": False, "bold": True},
        "Heading2": {"size": "28", "color": TIELOGIC_BLUE, "caps": True, "bold": True},
        "Heading3": {"size": "23", "color": TIELOGIC_DARK, "caps": False, "bold": True},
        "Title":    {"size": "52", "color": TIELOGIC_DARK, "caps": True, "bold": True},
        "Subtitle": {"size": "26", "color": "5A6478", "caps": True, "bold": False},
    }
    for style_id, attrs in headings.items():
        out = _override_style(out, style_id, **attrs)

    return out


def _patch_rfonts(tag: str) -> str:
    """Replace ascii/hAnsi font names with Inter, preserve other attrs."""
    new = re.sub(r'w:ascii="[^"]*"', f'w:ascii="{INTER}"', tag)
    new = re.sub(r'w:hAnsi="[^"]*"', f'w:hAnsi="{INTER}"', new)
    new = re.sub(r'w:cs="[^"]*"', f'w:cs="{INTER}"', new)
    if 'w:ascii=' not in new:
        new = new.replace('/>', f' w:ascii="{INTER}" w:hAnsi="{INTER}"/>')
    return new


def _override_style(xml: str, style_id: str, *, size: str, color: str, caps: bool, bold: bool) -> str:
    """Inject explicit rPr override for a named style, replacing any existing
    color/sz/caps/font directive within that style's <w:rPr> block."""
    pattern = re.compile(
        r'(<w:style\s+[^>]*w:styleId="' + re.escape(style_id) + r'"[^>]*>)(.*?)(</w:style>)',
        re.DOTALL,
    )

    def repl(m: re.Match[str]) -> str:
        head, body, tail = m.group(1), m.group(2), m.group(3)

        rpr_block = (
            "<w:rPr>"
            f'<w:rFonts w:ascii="{INTER}" w:hAnsi="{INTER}" w:cs="{INTER}"/>'
            + (f'<w:b/><w:bCs/>' if bold else "")
            + (f'<w:caps/>' if caps else "")
            + f'<w:color w:val="{color}"/>'
            f'<w:sz w:val="{size}"/>'
            f'<w:szCs w:val="{size}"/>'
            "</w:rPr>"
        )

        if "<w:rPr>" in body:
            body = re.sub(r"<w:rPr>.*?</w:rPr>", rpr_block, body, count=1, flags=re.DOTALL)
        else:
            # insert after pPr if present, else right after style head
            if "<w:pPr>" in body:
                body = re.sub(r"(</w:pPr>)", r"\1" + rpr_block, body, count=1)
            else:
                body = rpr_block + body
        return head + body + tail

    return pattern.sub(repl, xml)


def patch_header_footer_xml(name: str, xml: str) -> str:
    """Generic Tielogic running header/footer text. We don't try to add
    field codes for page numbers in headers — Pandoc emits its own with
    PAGE field if the reference includes one; we leave that intact."""
    # Replace any pre-existing visible body text inside <w:t>...</w:t>
    # with Tielogic placeholders. Keep formatting nodes.
    if "header" in name:
        replacement = "Tielogic — Soluzioni Software Industriali"
    else:
        replacement = "Tielogic — Soluzioni Software Industriali"
    return re.sub(r"<w:t[^>]*>[^<]*</w:t>", f"<w:t>{replacement}</w:t>", xml)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
    args = parser.parse_args()
    output = args.output.resolve()

    output.parent.mkdir(parents=True, exist_ok=True)
    src_bytes = get_pandoc_default_reference()

    out_buf = io.BytesIO()
    with zipfile.ZipFile(io.BytesIO(src_bytes), "r") as zin:
        with zipfile.ZipFile(out_buf, "w", zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                data = zin.read(item.filename)
                if item.filename == "word/styles.xml":
                    text = data.decode("utf-8")
                    text = patch_styles_xml(text)
                    data = text.encode("utf-8")
                elif re.match(r"word/(header|footer)\d*\.xml$", item.filename):
                    text = data.decode("utf-8")
                    text = patch_header_footer_xml(item.filename, text)
                    data = text.encode("utf-8")
                zout.writestr(item, data)

    output.write_bytes(out_buf.getvalue())
    print(f"OK: {output} ({output.stat().st_size} bytes)")


if __name__ == "__main__":
    main()