78 lines
2.1 KiB
Python
78 lines
2.1 KiB
Python
"""Convert plain-text press releases to formatted .docx files."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from docx import Document
|
|
from docx.shared import Pt
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Standard PR format
|
|
_HEADLINE_FONT = "Times New Roman"
|
|
_HEADLINE_SIZE = Pt(16)
|
|
_BODY_FONT = "Times New Roman"
|
|
_BODY_SIZE = Pt(12)
|
|
|
|
|
|
def text_to_docx(text: str, output_path: Path) -> Path:
|
|
"""Convert a plain-text press release into a formatted .docx file.
|
|
|
|
Layout:
|
|
- First non-blank line → headline (bold, 16pt Times New Roman)
|
|
- Remaining lines → body paragraphs (12pt Times New Roman)
|
|
- Blank lines in the source start new paragraphs.
|
|
|
|
Returns the output path.
|
|
"""
|
|
doc = Document()
|
|
|
|
# Set default font for the document
|
|
style = doc.styles["Normal"]
|
|
style.font.name = _BODY_FONT
|
|
style.font.size = _BODY_SIZE
|
|
|
|
lines = text.strip().splitlines()
|
|
if not lines:
|
|
doc.save(str(output_path))
|
|
return output_path
|
|
|
|
# First non-blank line is the headline
|
|
headline = lines[0].strip()
|
|
h_para = doc.add_paragraph()
|
|
h_run = h_para.add_run(headline)
|
|
h_run.bold = True
|
|
h_run.font.name = _HEADLINE_FONT
|
|
h_run.font.size = _HEADLINE_SIZE
|
|
|
|
# Group remaining lines into paragraphs (split on blank lines)
|
|
body_lines = lines[1:]
|
|
current_para_lines: list[str] = []
|
|
|
|
for line in body_lines:
|
|
if line.strip() == "":
|
|
if current_para_lines:
|
|
_add_body_paragraph(doc, " ".join(current_para_lines))
|
|
current_para_lines = []
|
|
else:
|
|
current_para_lines.append(line.strip())
|
|
|
|
# Flush any remaining lines
|
|
if current_para_lines:
|
|
_add_body_paragraph(doc, " ".join(current_para_lines))
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
doc.save(str(output_path))
|
|
log.info("Saved .docx: %s", output_path)
|
|
return output_path
|
|
|
|
|
|
def _add_body_paragraph(doc: Document, text: str) -> None:
|
|
"""Add a body paragraph with standard PR formatting."""
|
|
para = doc.add_paragraph()
|
|
run = para.add_run(text)
|
|
run.font.name = _BODY_FONT
|
|
run.font.size = _BODY_SIZE
|