CheddahBot/cheddahbot/docx_export.py

78 lines
2.1 KiB
Python

"""Convert plain-text press releases to formatted .docx files."""
from __future__ import annotations
import logging
from pathlib import Path
from docx import Document
from docx.shared import Pt
log = logging.getLogger(__name__)
# Standard PR format
_HEADLINE_FONT = "Times New Roman"
_HEADLINE_SIZE = Pt(16)
_BODY_FONT = "Times New Roman"
_BODY_SIZE = Pt(12)
def text_to_docx(text: str, output_path: Path) -> Path:
"""Convert a plain-text press release into a formatted .docx file.
Layout:
- First non-blank line → headline (bold, 16pt Times New Roman)
- Remaining lines → body paragraphs (12pt Times New Roman)
- Blank lines in the source start new paragraphs.
Returns the output path.
"""
doc = Document()
# Set default font for the document
style = doc.styles["Normal"]
style.font.name = _BODY_FONT
style.font.size = _BODY_SIZE
lines = text.strip().splitlines()
if not lines:
doc.save(str(output_path))
return output_path
# First non-blank line is the headline
headline = lines[0].strip()
h_para = doc.add_paragraph()
h_run = h_para.add_run(headline)
h_run.bold = True
h_run.font.name = _HEADLINE_FONT
h_run.font.size = _HEADLINE_SIZE
# Group remaining lines into paragraphs (split on blank lines)
body_lines = lines[1:]
current_para_lines: list[str] = []
for line in body_lines:
if line.strip() == "":
if current_para_lines:
_add_body_paragraph(doc, " ".join(current_para_lines))
current_para_lines = []
else:
current_para_lines.append(line.strip())
# Flush any remaining lines
if current_para_lines:
_add_body_paragraph(doc, " ".join(current_para_lines))
output_path.parent.mkdir(parents=True, exist_ok=True)
doc.save(str(output_path))
log.info("Saved .docx: %s", output_path)
return output_path
def _add_body_paragraph(doc: Document, text: str) -> None:
"""Add a body paragraph with standard PR formatting."""
para = doc.add_paragraph()
run = para.add_run(text)
run.font.name = _BODY_FONT
run.font.size = _BODY_SIZE