Remove scan_index, use envelope_id (IMAP UID) as single identifier

scan_index created confusion for the OpenClaw agent which would sometimes reference emails by scan_index and sometimes by envelope_id. Since himalaya's envelope ID is an IMAP UID (stable, never recycled), it works as the sole identifier for review commands.
2026-03-07 22:01:02 -08:00
parent 2c00649488
commit 3c54098b1d
3 changed files with 37 additions and 53 deletions
--- a/scripts/email_processor/main.py
+++ b/scripts/email_processor/main.py
@@ -15,7 +15,7 @@ Subcommands:
    python main.py scan --dry-run                    # classify only, no changes
    python main.py scan --recent 7 --dry-run         # combine both
    python main.py review list                       # print pending queue
-    python main.py review <num-or-id> <action>       # act on one email
+    python main.py review <envelope_id> <action>     # act on one email
    python main.py review all <action>               # act on all pending
    python main.py review accept                     # accept all suggestions
    python main.py stats                             # show decision history
@@ -222,9 +222,7 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t

    Stores the classifier's suggestion, computed confidence, and tags
    alongside the email metadata so the user can see what the model thought.
-    Each item gets a stable scan_index (assigned sequentially within a scan
-    cycle) so that review commands can reference items by number without
-    indices shifting after earlier items are resolved.
+    Uses envelope_id as the primary identifier for review commands.
    """
    pending = load_pending()

@@ -233,14 +231,6 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t
    key = f"{eid}_{email_data['subject']}"
    msg_id = f"msg_{hashlib.md5(key.encode()).hexdigest()[:8]}"

-    # Assign the next scan_index: max of existing pending items + 1
-    existing_indices = [
-        v.get("scan_index", 0)
-        for v in pending.values()
-        if v.get("status") == "pending"
-    ]
-    next_index = max(existing_indices, default=0) + 1
-
    pending[msg_id] = {
        "envelope_id": eid,
        "subject": email_data["subject"],
@@ -254,7 +244,6 @@ def add_to_pending(email_data, summary, reason, action_suggestion, confidence, t
        "email_date": email_data.get("date", ""),
        "status": "pending",
        "found_at": datetime.now().isoformat(),
-        "scan_index": next_index,
    }
    save_pending(pending)
    return msg_id
@@ -295,8 +284,7 @@ def cmd_scan(config, recent=None, dry_run=False):
    print(f"Email Processor - {mode}")
    print("=" * 50)

-    # Clear done items from previous scan cycles so new items get
-    # scan_index values starting from 1.
+    # Clear done items from previous scan cycles
    pending = load_pending()
    cleared = {k: v for k, v in pending.items() if v.get("status") != "done"}
    if len(cleared) < len(pending):
@@ -418,17 +406,17 @@ def cmd_scan(config, recent=None, dry_run=False):
 # ---------------------------------------------------------------------------

 def _get_pending_items():
-    """Return only pending (not done) items, sorted by scan_index."""
+    """Return only pending (not done) items, sorted by envelope_id."""
    pending = load_pending()
    items = {k: v for k, v in pending.items() if v.get("status") == "pending"}
-    sorted_items = sorted(items.items(), key=lambda x: x[1].get("scan_index", 0))
+    sorted_items = sorted(items.items(), key=lambda x: int(x[1].get("envelope_id", 0)))
    return sorted_items


 def cmd_review_list():
    """Print the pending queue and exit.

-    Shows each email with its number, ID, subject, sender, summary,
+    Shows each email with its envelope ID, subject, sender, summary,
    and the classifier's suggested action with confidence.
    """
    sorted_items = _get_pending_items()
@@ -441,12 +429,12 @@ def cmd_review_list():
    print("=" * 60)

    for msg_id, data in sorted_items:
-        num = data.get("scan_index", "?")
+        eid = data.get("envelope_id", "?")
        suggested = data.get("suggested_action", "?")
        conf = data.get("confidence", "?")
        tags = data.get("tags", [])
        tags_str = ", ".join(tags) if tags else "(none)"
-        print(f"\n  {num}. [{msg_id}]")
+        print(f"\n  [{eid}] {msg_id}")
        print(f"     Subject: {data.get('subject', 'N/A')[:55]}")
        print(f"     From: {data.get('sender', 'N/A')[:55]}")
        print(f"     To: {data.get('recipient', 'N/A')[:40]}")
@@ -456,7 +444,7 @@ def cmd_review_list():

    print(f"\n{'=' * 60}")
    print("Usage:")
-    print("  python main.py review <number> <action>")
+    print("  python main.py review <envelope_id> <action>")
    print("  python main.py review all <action>")
    print("  python main.py review accept")
    print("Actions: delete / archive / keep / mark_read / label:<name>")
@@ -466,7 +454,7 @@ def cmd_review_act(selector, action):
    """Execute an action on one or more pending emails.

    Args:
-        selector: a scan_index number, a msg_id string, or "all".
+        selector: an envelope_id, a msg_id string, or "all".
        action:   one of delete/archive/keep/mark_read/label:<name>.
    """
    # Validate action
@@ -573,23 +561,15 @@ def cmd_review_accept():


 def _resolve_target(selector, sorted_items):
-    """Resolve a selector (scan_index number or msg_id) to a (msg_id, data) tuple.
+    """Resolve a selector (envelope_id or msg_id) to a (msg_id, data) tuple.

-    When given a number, looks up the pending item whose scan_index matches
-    (stable across deletions). When given a string, looks up by msg_id.
+    Looks up by envelope_id first, then by msg_id string.
    Returns None and prints an error if the selector is invalid.
    """
-    # Try as scan_index number
-    try:
-        idx = int(selector)
-        for msg_id, data in sorted_items:
-            if data.get("scan_index") == idx:
-                return (msg_id, data)
-        valid = [str(d.get("scan_index")) for _, d in sorted_items]
-        print(f"No item with number {idx}. Valid numbers: {', '.join(valid)}")
-        return None
-    except ValueError:
-        pass
+    # Try as envelope_id
+    for msg_id, data in sorted_items:
+        if data.get("envelope_id") == selector:
+            return (msg_id, data)

    # Try as msg_id
    for msg_id, data in sorted_items:
@@ -597,6 +577,8 @@ def _resolve_target(selector, sorted_items):
            return (msg_id, data)

    print(f"Not found: {selector}")
+    valid = [d.get("envelope_id") for _, d in sorted_items]
+    print(f"Valid envelope IDs: {', '.join(valid)}")
    return None


@@ -705,7 +687,7 @@ if __name__ == "__main__":
        else:
            print("Usage:")
            print("  python main.py review list")
-            print("  python main.py review <number-or-id> <action>")
+            print("  python main.py review <envelope_id> <action>")
            print("  python main.py review all <action>")
            print("  python main.py review accept")
            sys.exit(1)