From 361e983b0f2bb1d9fae612ac8f714266680c9e96 Mon Sep 17 00:00:00 2001
From: Yanxin Lu <ylu@meta.com>
Date: Thu, 5 Mar 2026 15:02:49 -0800
Subject: [PATCH] Stable review indices and deduplicate tag taxonomy

Review items now get a stable scan_index assigned during scan, so
sequential review commands don't target wrong emails after earlier
items are resolved. Indices reset on each new scan.

Deduplicate tag taxonomy from 21 to 14 tags: drop invoice/payment
(covered by billing), delivery (covered by shipping), discount/marketing
(covered by promotion), and generic notification/update tags.
---
 scripts/email_processor/README.md     | 21 +++++++-----
 scripts/email_processor/classifier.py | 10 +++---
 scripts/email_processor/main.py       | 47 ++++++++++++++++++++-------
 3 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/scripts/email_processor/README.md b/scripts/email_processor/README.md
index 9cc5f42..3f21dc8 100644
--- a/scripts/email_processor/README.md
+++ b/scripts/email_processor/README.md
@@ -31,7 +31,7 @@ The system separates **classification** (what the LLM does) from **confidence**
 
 1. **Cron runs `scan`.** For each email, the LLM suggests an action and assigns tags from a fixed taxonomy. Since there's no history yet, `compute_confidence` returns 50% (below the 85% threshold), so everything gets queued.
 
-2. **You run `review list`.** It prints what's pending:
+2. **You run `review list`.** It prints what's pending. Item numbers are stable within a scan cycle — they don't shift when earlier items are resolved:
    ```
      1. [msg_f1d43ea3]  Subject: New jobs matching your profile
         From: LinkedIn
@@ -43,10 +43,10 @@ The system separates **classification** (what the LLM does) from **confidence**
         Suggested: archive (50%)
    ```
 
-3. **You act on them.** Either individually or in bulk:
+3. **You act on them.** Either individually or in bulk. Numbers stay stable — after deleting item 1, item 2 is still 2:
    ```bash
    ./email-processor.sh review 1 delete     # agree with suggestion
-   ./email-processor.sh review 2 archive    # agree with suggestion
+   ./email-processor.sh review 2 archive    # still #2, not renumbered
    ./email-processor.sh review accept       # accept all suggestions at once
    ```
    Each command executes via himalaya and appends to `decision_history.json` with tags.
@@ -88,7 +88,8 @@ chmod +x email-processor.sh
 
 # --- Review ---
 ./email-processor.sh review list                  # show pending queue
-./email-processor.sh review 1 delete              # delete email #1
+./email-processor.sh review 1 delete              # delete item #1
+./email-processor.sh review 3 archive             # #3 is still #3 even after #1 was deleted
 ./email-processor.sh review msg_f1d43ea3 archive  # archive by ID
 ./email-processor.sh review all delete            # delete all pending
 ./email-processor.sh review accept                # accept all suggestions
@@ -114,13 +115,13 @@ Or call Python directly: `python main.py scan --dry-run`
 The LLM assigns 3-5 tags from this fixed list to each email:
 
 ```
-receipt, invoice, payment, billing, shipping, delivery,
-promotion, discount, marketing, newsletter, notification,
-security, social, reminder, confirmation, update, alert,
+receipt, billing, shipping,
+promotion, newsletter, security, social,
+reminder, confirmation, alert,
 personal, account, subscription, travel
 ```
 
-Tags serve one purpose: making signature matching work for confidence computation. They need to be specific enough to distinguish different email types from the same sender that you'd treat differently (e.g., `[account, security]` for a password reset vs `[promotion, marketing]` for a promo, both from the same service).
+Tags serve one purpose: making signature matching work for confidence computation. They need to be specific enough to distinguish different email types from the same sender that you'd treat differently (e.g., `[account, security]` for a password reset vs `[promotion]` for a promo, both from the same service).
 
 ### Refining the Tag Taxonomy
 
@@ -303,6 +304,10 @@ Tags are defined in `classifier.py` as `TAG_TAXONOMY` — a manually curated lis
 
 The `keep` action is a deliberate no-op — it leaves the email unread in the inbox, meaning it needs human attention. This is distinct from `mark_read`, which dismisses low-priority emails without moving them.
 
+### Stable item numbers during review
+
+Each pending item gets a `scan_index` assigned sequentially during `scan`. These numbers are stable within a scan cycle — resolving item 1 doesn't renumber item 2 to 1. This matters when an agent (like OpenClaw) issues multiple `review <n> <action>` commands in sequence: without stable indices, the queue renumbers after each action, causing later commands to target the wrong emails. Indices reset to 1 on each new `scan` (done items from the previous cycle are cleared at scan start).
+
 ### Fail-safe classification
 
 If the LLM call fails (Ollama down, model not loaded, timeout), the classifier returns `action="keep"` with empty tags. Empty tags produce 50% confidence (below threshold), so the email gets queued for manual review rather than being auto-acted upon. The system never auto-trashes an email it couldn't classify.
diff --git a/scripts/email_processor/classifier.py b/scripts/email_processor/classifier.py
index 8abde77..d9de378 100644
--- a/scripts/email_processor/classifier.py
+++ b/scripts/email_processor/classifier.py
@@ -28,11 +28,11 @@ import decision_store
 LOGS_DIR = Path(__file__).parent / "logs"
 
 TAG_TAXONOMY = [
-    "receipt", "invoice", "payment", "billing",
-    "shipping", "delivery",
-    "promotion", "discount", "marketing", "newsletter",
-    "notification", "security", "social",
-    "reminder", "confirmation", "update", "alert",
+    "receipt", "billing",
+    "shipping",
+    "promotion", "newsletter",
+    "security", "social",
+    "reminder", "confirmation", "alert",
     "personal", "account", "subscription", "travel",
 ]
 
diff --git a/scripts/email_processor/main.py b/scripts/email_processor/main.py
index 30fa977..afaf45d 100644
--- a/scripts/email_processor/main.py
+++ b/scripts/email_processor/main.py
@@ -237,6 +237,9 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t
 
     Stores the classifier's suggestion, computed confidence, and tags
     alongside the email metadata so the user can see what the model thought.
+    Each item gets a stable scan_index (assigned sequentially within a scan
+    cycle) so that review commands can reference items by number without
+    indices shifting after earlier items are resolved.
     """
     pending = load_pending()
 
@@ -245,6 +248,14 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t
     key = f"{eid}_{email_data['subject']}"
     msg_id = f"msg_{hashlib.md5(key.encode()).hexdigest()[:8]}"
 
+    # Assign the next scan_index: max of existing pending items + 1
+    existing_indices = [
+        v.get("scan_index", 0)
+        for v in pending.values()
+        if v.get("status") == "pending"
+    ]
+    next_index = max(existing_indices, default=0) + 1
+
     pending[msg_id] = {
         "envelope_id": eid,
         "subject": email_data["subject"],
@@ -258,6 +269,7 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t
         "email_date": email_data.get("date", ""),
         "status": "pending",
         "found_at": datetime.now().isoformat(),
+        "scan_index": next_index,
     }
     save_pending(pending)
     return msg_id
@@ -298,6 +310,13 @@ def cmd_scan(config, recent=None, dry_run=False):
     print(f"Email Processor - {mode}")
     print("=" * 50)
 
+    # Clear done items from previous scan cycles so new items get
+    # scan_index values starting from 1.
+    pending = load_pending()
+    cleared = {k: v for k, v in pending.items() if v.get("status") != "done"}
+    if len(cleared) < len(pending):
+        save_pending(cleared)
+
     LOGS_DIR.mkdir(exist_ok=True)
     log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log"
 
@@ -412,10 +431,10 @@ def cmd_scan(config, recent=None, dry_run=False):
 # ---------------------------------------------------------------------------
 
 def _get_pending_items():
-    """Return only pending (not done) items, sorted by found_at."""
+    """Return only pending (not done) items, sorted by scan_index."""
     pending = load_pending()
     items = {k: v for k, v in pending.items() if v.get("status") == "pending"}
-    sorted_items = sorted(items.items(), key=lambda x: x[1].get("found_at", ""))
+    sorted_items = sorted(items.items(), key=lambda x: x[1].get("scan_index", 0))
     return sorted_items
 
 
@@ -434,12 +453,13 @@ def cmd_review_list():
     print(f"Pending emails: {len(sorted_items)}")
     print("=" * 60)
 
-    for i, (msg_id, data) in enumerate(sorted_items, 1):
+    for msg_id, data in sorted_items:
+        num = data.get("scan_index", "?")
         suggested = data.get("suggested_action", "?")
         conf = data.get("confidence", "?")
         tags = data.get("tags", [])
         tags_str = ", ".join(tags) if tags else "(none)"
-        print(f"\n  {i}. [{msg_id}]")
+        print(f"\n  {num}. [{msg_id}]")
         print(f"     Subject: {data.get('subject', 'N/A')[:55]}")
         print(f"     From: {data.get('sender', 'N/A')[:55]}")
         print(f"     To: {data.get('recipient', 'N/A')[:40]}")
@@ -556,18 +576,21 @@ def cmd_review_accept():
 
 
 def _resolve_target(selector, sorted_items):
-    """Resolve a selector (number or msg_id) to a (msg_id, data) tuple.
+    """Resolve a selector (scan_index number or msg_id) to a (msg_id, data) tuple.
 
+    When given a number, looks up the pending item whose scan_index matches
+    (stable across deletions). When given a string, looks up by msg_id.
     Returns None and prints an error if the selector is invalid.
     """
-    # Try as 1-based index
+    # Try as scan_index number
     try:
-        idx = int(selector) - 1
-        if 0 <= idx < len(sorted_items):
-            return sorted_items[idx]
-        else:
-            print(f"Invalid number. Range: 1-{len(sorted_items)}")
-            return None
+        idx = int(selector)
+        for msg_id, data in sorted_items:
+            if data.get("scan_index") == idx:
+                return (msg_id, data)
+        valid = [str(d.get("scan_index")) for _, d in sorted_items]
+        print(f"No item with number {idx}. Valid numbers: {', '.join(valid)}")
+        return None
     except ValueError:
         pass