"""Convert plain-text press releases to formatted .docx files.""" from __future__ import annotations import logging from pathlib import Path from docx import Document from docx.shared import Pt log = logging.getLogger(__name__) # Standard PR format _HEADLINE_FONT = "Times New Roman" _HEADLINE_SIZE = Pt(16) _BODY_FONT = "Times New Roman" _BODY_SIZE = Pt(12) def text_to_docx(text: str, output_path: Path) -> Path: """Convert a plain-text press release into a formatted .docx file. Layout: - First non-blank line → headline (bold, 16pt Times New Roman) - Remaining lines → body paragraphs (12pt Times New Roman) - Blank lines in the source start new paragraphs. Returns the output path. """ doc = Document() # Set default font for the document style = doc.styles["Normal"] style.font.name = _BODY_FONT style.font.size = _BODY_SIZE lines = text.strip().splitlines() if not lines: doc.save(str(output_path)) return output_path # First non-blank line is the headline headline = lines[0].strip() h_para = doc.add_paragraph() h_run = h_para.add_run(headline) h_run.bold = True h_run.font.name = _HEADLINE_FONT h_run.font.size = _HEADLINE_SIZE # Group remaining lines into paragraphs (split on blank lines) body_lines = lines[1:] current_para_lines: list[str] = [] for line in body_lines: if line.strip() == "": if current_para_lines: _add_body_paragraph(doc, " ".join(current_para_lines)) current_para_lines = [] else: current_para_lines.append(line.strip()) # Flush any remaining lines if current_para_lines: _add_body_paragraph(doc, " ".join(current_para_lines)) output_path.parent.mkdir(parents=True, exist_ok=True) doc.save(str(output_path)) log.info("Saved .docx: %s", output_path) return output_path def _add_body_paragraph(doc: Document, text: str) -> None: """Add a body paragraph with standard PR formatting.""" para = doc.add_paragraph() run = para.add_run(text) run.font.name = _BODY_FONT run.font.size = _BODY_SIZE