100 word limit

This commit is contained in:
Yanxin Lu
2026-02-22 14:29:01 -08:00
parent ef92820954
commit d4a4768e18
2 changed files with 30 additions and 6 deletions

View File

@@ -5,9 +5,13 @@ Recommended: run via ./run.sh, which uses `uv` to handle dependencies
automatically (no manual venv or pip install needed).
When an `ollama` key is present in config.json, each newly fetched article is
automatically summarized and the result is stored in the database. Ollama
latency provides natural rate limiting between HTTP requests; when Ollama is
not configured, a 1-second sleep is used instead.
automatically summarized and the result is stored in the database. Summaries
are truncated at the last sentence boundary within 100 words to keep them
concise. Ollama latency provides natural rate limiting between HTTP requests;
when Ollama is not configured, a 1-second sleep is used instead.
The log file (news_digest.log) is cleared at the start of each fetch cycle
and appended to during the run via run.sh.
Uses a requests.Session with automatic retries and browser-like headers to
handle transient HTTP errors (429/5xx). A configurable per-feed article cap
@@ -224,6 +228,23 @@ def get_recent_articles(conn: sqlite3.Connection, hours: int) -> list[dict]:
return [dict(r) for r in rows]
def _truncate_summary(text: str, max_words: int = 100) -> str:
"""Truncate summary at the last sentence boundary within max_words."""
words = text.split()
if len(words) <= max_words:
return text
truncated = " ".join(words[:max_words])
# Find the last sentence-ending punctuation
last_period = -1
for ch in (".", "", "!", "", "?", ""):
idx = truncated.rfind(ch)
if idx > last_period:
last_period = idx
if last_period > 0:
return truncated[: last_period + 1]
return truncated + "..."
def generate_summary(title: str, description: str | None, content: str | None, model: str, prompt: str) -> str | None:
try:
import ollama as ollama_lib
@@ -242,7 +263,8 @@ def generate_summary(title: str, description: str | None, content: str | None, m
model=model,
messages=[{"role": "user", "content": user_message}],
)
return response["message"]["content"]
summary = response["message"]["content"]
return _truncate_summary(summary)
except Exception as e:
logger.warning("Ollama error for '%s': %s", title, e)
return None