email-processor: fix concurrency bugs and several other issues

- Add fcntl file locking around read-modify-write cycles on both
  decision_history.json and pending_emails.json to prevent data
  corruption from parallel processes
- Pass --page-size 500 to himalaya envelope list to avoid silently
  missing emails beyond the default first page
- Use ollama.Client(host=...) so the config.json host setting is
  actually respected
- Fall back to sender-only matching in compute_confidence when LLM
  returns no valid taxonomy tags, instead of always returning 50%
- Fix _format_address to return empty string instead of literal
  "None" or "[]" for missing address fields
This commit is contained in:
Yanxin Lu
2026-03-20 18:58:13 -07:00
parent 4e3c6acab6
commit 71672b31ca
3 changed files with 78 additions and 59 deletions

View File

@@ -187,12 +187,15 @@ def classify_email(email_data, config):
import ollama
prompt = _build_prompt(email_data, config)
model = config.get("ollama", {}).get("model", "kamekichi128/qwen3-4b-instruct-2507:latest")
ollama_config = config.get("ollama", {})
model = ollama_config.get("model", "kamekichi128/qwen3-4b-instruct-2507:latest")
host = ollama_config.get("host")
client = ollama.Client(host=host) if host else ollama.Client()
start_time = time.time()
try:
# Low temperature for consistent classification
response = ollama.generate(model=model, prompt=prompt, options={"temperature": 0.1})
response = client.generate(model=model, prompt=prompt, options={"temperature": 0.1})
output = response["response"]
action, tags, summary, reason = _parse_response(output)
except Exception as e: