diff --git a/scripts/email_processor/README.md b/scripts/email_processor/README.md index 9cc5f42..3f21dc8 100644 --- a/scripts/email_processor/README.md +++ b/scripts/email_processor/README.md @@ -31,7 +31,7 @@ The system separates **classification** (what the LLM does) from **confidence** 1. **Cron runs `scan`.** For each email, the LLM suggests an action and assigns tags from a fixed taxonomy. Since there's no history yet, `compute_confidence` returns 50% (below the 85% threshold), so everything gets queued. -2. **You run `review list`.** It prints what's pending: +2. **You run `review list`.** It prints what's pending. Item numbers are stable within a scan cycle — they don't shift when earlier items are resolved: ``` 1. [msg_f1d43ea3] Subject: New jobs matching your profile From: LinkedIn @@ -43,10 +43,10 @@ The system separates **classification** (what the LLM does) from **confidence** Suggested: archive (50%) ``` -3. **You act on them.** Either individually or in bulk: +3. **You act on them.** Either individually or in bulk. Numbers stay stable — after deleting item 1, item 2 is still 2: ```bash ./email-processor.sh review 1 delete # agree with suggestion - ./email-processor.sh review 2 archive # agree with suggestion + ./email-processor.sh review 2 archive # still #2, not renumbered ./email-processor.sh review accept # accept all suggestions at once ``` Each command executes via himalaya and appends to `decision_history.json` with tags. @@ -88,7 +88,8 @@ chmod +x email-processor.sh # --- Review --- ./email-processor.sh review list # show pending queue -./email-processor.sh review 1 delete # delete email #1 +./email-processor.sh review 1 delete # delete item #1 +./email-processor.sh review 3 archive # #3 is still #3 even after #1 was deleted ./email-processor.sh review msg_f1d43ea3 archive # archive by ID ./email-processor.sh review all delete # delete all pending ./email-processor.sh review accept # accept all suggestions @@ -114,13 +115,13 @@ Or call Python directly: `python main.py scan --dry-run` The LLM assigns 3-5 tags from this fixed list to each email: ``` -receipt, invoice, payment, billing, shipping, delivery, -promotion, discount, marketing, newsletter, notification, -security, social, reminder, confirmation, update, alert, +receipt, billing, shipping, +promotion, newsletter, security, social, +reminder, confirmation, alert, personal, account, subscription, travel ``` -Tags serve one purpose: making signature matching work for confidence computation. They need to be specific enough to distinguish different email types from the same sender that you'd treat differently (e.g., `[account, security]` for a password reset vs `[promotion, marketing]` for a promo, both from the same service). +Tags serve one purpose: making signature matching work for confidence computation. They need to be specific enough to distinguish different email types from the same sender that you'd treat differently (e.g., `[account, security]` for a password reset vs `[promotion]` for a promo, both from the same service). ### Refining the Tag Taxonomy @@ -303,6 +304,10 @@ Tags are defined in `classifier.py` as `TAG_TAXONOMY` — a manually curated lis The `keep` action is a deliberate no-op — it leaves the email unread in the inbox, meaning it needs human attention. This is distinct from `mark_read`, which dismisses low-priority emails without moving them. +### Stable item numbers during review + +Each pending item gets a `scan_index` assigned sequentially during `scan`. These numbers are stable within a scan cycle — resolving item 1 doesn't renumber item 2 to 1. This matters when an agent (like OpenClaw) issues multiple `review ` commands in sequence: without stable indices, the queue renumbers after each action, causing later commands to target the wrong emails. Indices reset to 1 on each new `scan` (done items from the previous cycle are cleared at scan start). + ### Fail-safe classification If the LLM call fails (Ollama down, model not loaded, timeout), the classifier returns `action="keep"` with empty tags. Empty tags produce 50% confidence (below threshold), so the email gets queued for manual review rather than being auto-acted upon. The system never auto-trashes an email it couldn't classify. diff --git a/scripts/email_processor/classifier.py b/scripts/email_processor/classifier.py index 8abde77..d9de378 100644 --- a/scripts/email_processor/classifier.py +++ b/scripts/email_processor/classifier.py @@ -28,11 +28,11 @@ import decision_store LOGS_DIR = Path(__file__).parent / "logs" TAG_TAXONOMY = [ - "receipt", "invoice", "payment", "billing", - "shipping", "delivery", - "promotion", "discount", "marketing", "newsletter", - "notification", "security", "social", - "reminder", "confirmation", "update", "alert", + "receipt", "billing", + "shipping", + "promotion", "newsletter", + "security", "social", + "reminder", "confirmation", "alert", "personal", "account", "subscription", "travel", ] diff --git a/scripts/email_processor/main.py b/scripts/email_processor/main.py index 30fa977..afaf45d 100644 --- a/scripts/email_processor/main.py +++ b/scripts/email_processor/main.py @@ -237,6 +237,9 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t Stores the classifier's suggestion, computed confidence, and tags alongside the email metadata so the user can see what the model thought. + Each item gets a stable scan_index (assigned sequentially within a scan + cycle) so that review commands can reference items by number without + indices shifting after earlier items are resolved. """ pending = load_pending() @@ -245,6 +248,14 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t key = f"{eid}_{email_data['subject']}" msg_id = f"msg_{hashlib.md5(key.encode()).hexdigest()[:8]}" + # Assign the next scan_index: max of existing pending items + 1 + existing_indices = [ + v.get("scan_index", 0) + for v in pending.values() + if v.get("status") == "pending" + ] + next_index = max(existing_indices, default=0) + 1 + pending[msg_id] = { "envelope_id": eid, "subject": email_data["subject"], @@ -258,6 +269,7 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t "email_date": email_data.get("date", ""), "status": "pending", "found_at": datetime.now().isoformat(), + "scan_index": next_index, } save_pending(pending) return msg_id @@ -298,6 +310,13 @@ def cmd_scan(config, recent=None, dry_run=False): print(f"Email Processor - {mode}") print("=" * 50) + # Clear done items from previous scan cycles so new items get + # scan_index values starting from 1. + pending = load_pending() + cleared = {k: v for k, v in pending.items() if v.get("status") != "done"} + if len(cleared) < len(pending): + save_pending(cleared) + LOGS_DIR.mkdir(exist_ok=True) log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log" @@ -412,10 +431,10 @@ def cmd_scan(config, recent=None, dry_run=False): # --------------------------------------------------------------------------- def _get_pending_items(): - """Return only pending (not done) items, sorted by found_at.""" + """Return only pending (not done) items, sorted by scan_index.""" pending = load_pending() items = {k: v for k, v in pending.items() if v.get("status") == "pending"} - sorted_items = sorted(items.items(), key=lambda x: x[1].get("found_at", "")) + sorted_items = sorted(items.items(), key=lambda x: x[1].get("scan_index", 0)) return sorted_items @@ -434,12 +453,13 @@ def cmd_review_list(): print(f"Pending emails: {len(sorted_items)}") print("=" * 60) - for i, (msg_id, data) in enumerate(sorted_items, 1): + for msg_id, data in sorted_items: + num = data.get("scan_index", "?") suggested = data.get("suggested_action", "?") conf = data.get("confidence", "?") tags = data.get("tags", []) tags_str = ", ".join(tags) if tags else "(none)" - print(f"\n {i}. [{msg_id}]") + print(f"\n {num}. [{msg_id}]") print(f" Subject: {data.get('subject', 'N/A')[:55]}") print(f" From: {data.get('sender', 'N/A')[:55]}") print(f" To: {data.get('recipient', 'N/A')[:40]}") @@ -556,18 +576,21 @@ def cmd_review_accept(): def _resolve_target(selector, sorted_items): - """Resolve a selector (number or msg_id) to a (msg_id, data) tuple. + """Resolve a selector (scan_index number or msg_id) to a (msg_id, data) tuple. + When given a number, looks up the pending item whose scan_index matches + (stable across deletions). When given a string, looks up by msg_id. Returns None and prints an error if the selector is invalid. """ - # Try as 1-based index + # Try as scan_index number try: - idx = int(selector) - 1 - if 0 <= idx < len(sorted_items): - return sorted_items[idx] - else: - print(f"Invalid number. Range: 1-{len(sorted_items)}") - return None + idx = int(selector) + for msg_id, data in sorted_items: + if data.get("scan_index") == idx: + return (msg_id, data) + valid = [str(d.get("scan_index")) for _, d in sorted_items] + print(f"No item with number {idx}. Valid numbers: {', '.join(valid)}") + return None except ValueError: pass