Clean up stale comments, dead code, and code quality issues
- Remove dead code: unused PENDING_FILE, _extract_domain(), sender_domain field, imap_uid fallback, check_unseen_only config key - Fix stale comments: removed tag references in README and docstrings, top_domains -> top_senders, 1-based number -> scan_index number - Make _extract_email_address public (used by 3 modules) - Extract _format_address helper to deduplicate from/to parsing - Batch pending queue disk I/O in review act/accept (load once, save once) - Reuse cleared pending dict in scan instead of redundant disk load - Track envelope IDs during scan loop to catch duplicates - Fix default confidence_threshold 75 -> 85 to match config and docs - Update get_relevant_examples default n=10 -> n=5 to match caller - Add graceful error for --recent with non-numeric value
This commit is contained in:
@@ -121,6 +121,20 @@ def read_message(envelope_id):
|
||||
return _himalaya("message", "read", "--preview", "--no-headers", str(envelope_id))
|
||||
|
||||
|
||||
def _format_address(addr_field):
|
||||
"""Format a himalaya address field (dict, list, or string) into a display string."""
|
||||
if isinstance(addr_field, dict):
|
||||
name = addr_field.get("name", "")
|
||||
addr = addr_field.get("addr", "")
|
||||
return f"{name} <{addr}>" if name else addr
|
||||
elif isinstance(addr_field, list) and addr_field:
|
||||
first = addr_field[0]
|
||||
name = first.get("name", "")
|
||||
addr = first.get("addr", "")
|
||||
return f"{name} <{addr}>" if name else addr
|
||||
return str(addr_field)
|
||||
|
||||
|
||||
def build_email_data(envelope, body, config):
|
||||
"""Build the email_data dict expected by classifier and decision_store.
|
||||
|
||||
@@ -129,40 +143,11 @@ def build_email_data(envelope, body, config):
|
||||
"""
|
||||
max_body = config.get("rules", {}).get("max_body_length", 1000)
|
||||
|
||||
# himalaya envelope JSON uses "from" as a nested object or string
|
||||
sender = envelope.get("from", {})
|
||||
if isinstance(sender, dict):
|
||||
# Format: {"name": "Display Name", "addr": "user@example.com"}
|
||||
name = sender.get("name", "")
|
||||
addr = sender.get("addr", "")
|
||||
sender_str = f"{name} <{addr}>" if name else addr
|
||||
elif isinstance(sender, list) and sender:
|
||||
first = sender[0]
|
||||
name = first.get("name", "")
|
||||
addr = first.get("addr", "")
|
||||
sender_str = f"{name} <{addr}>" if name else addr
|
||||
else:
|
||||
sender_str = str(sender)
|
||||
|
||||
# Same for "to"
|
||||
to = envelope.get("to", {})
|
||||
if isinstance(to, dict):
|
||||
name = to.get("name", "")
|
||||
addr = to.get("addr", "")
|
||||
to_str = f"{name} <{addr}>" if name else addr
|
||||
elif isinstance(to, list) and to:
|
||||
first = to[0]
|
||||
name = first.get("name", "")
|
||||
addr = first.get("addr", "")
|
||||
to_str = f"{name} <{addr}>" if name else addr
|
||||
else:
|
||||
to_str = str(to)
|
||||
|
||||
return {
|
||||
"id": str(envelope.get("id", "")),
|
||||
"subject": envelope.get("subject", "(No Subject)"),
|
||||
"sender": sender_str,
|
||||
"recipient": to_str,
|
||||
"sender": _format_address(envelope.get("from", {})),
|
||||
"recipient": _format_address(envelope.get("to", {})),
|
||||
"date": envelope.get("date", ""),
|
||||
"body": body[:max_body],
|
||||
}
|
||||
@@ -322,7 +307,7 @@ def cmd_scan(config, recent=None, dry_run=False):
|
||||
|
||||
# Load automation threshold
|
||||
automation = config.get("automation", {})
|
||||
confidence_threshold = automation.get("confidence_threshold", 75)
|
||||
confidence_threshold = automation.get("confidence_threshold", 85)
|
||||
|
||||
# Fetch envelopes via himalaya
|
||||
if recent:
|
||||
@@ -340,9 +325,8 @@ def cmd_scan(config, recent=None, dry_run=False):
|
||||
queued = 0
|
||||
skipped = 0
|
||||
|
||||
# Load pending queue once to skip already-queued emails
|
||||
pending = load_pending()
|
||||
pending_eids = {v.get("envelope_id") for v in pending.values() if v.get("status") == "pending"}
|
||||
# Reuse the cleared pending dict from above to skip already-queued emails
|
||||
pending_eids = {v.get("envelope_id") for v in cleared.values() if v.get("status") == "pending"}
|
||||
|
||||
for envelope in envelopes:
|
||||
eid = envelope.get("id", "?")
|
||||
@@ -353,6 +337,9 @@ def cmd_scan(config, recent=None, dry_run=False):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Track this eid so duplicates within the same envelope list are caught
|
||||
pending_eids.add(str(eid))
|
||||
|
||||
print(f"[{eid}] ", end="", flush=True)
|
||||
|
||||
# Read message body without marking as seen
|
||||
@@ -370,7 +357,7 @@ def cmd_scan(config, recent=None, dry_run=False):
|
||||
)
|
||||
|
||||
# Compute confidence from decision history
|
||||
sender_email = decision_store._extract_email_address(email_data.get("sender", ""))
|
||||
sender_email = decision_store.extract_email_address(email_data.get("sender", ""))
|
||||
confidence = decision_store.compute_confidence(sender_email, action, tags)
|
||||
|
||||
tags_str = ", ".join(tags) if tags else "(none)"
|
||||
@@ -479,7 +466,7 @@ def cmd_review_act(selector, action):
|
||||
"""Execute an action on one or more pending emails.
|
||||
|
||||
Args:
|
||||
selector: a 1-based number, a msg_id string, or "all".
|
||||
selector: a scan_index number, a msg_id string, or "all".
|
||||
action: one of delete/archive/keep/mark_read/label:<name>.
|
||||
"""
|
||||
# Validate action
|
||||
@@ -507,8 +494,11 @@ def cmd_review_act(selector, action):
|
||||
log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log"
|
||||
|
||||
# Execute action on each target
|
||||
pending = load_pending()
|
||||
pending_dirty = False
|
||||
|
||||
for msg_id, data in targets:
|
||||
eid = data.get("envelope_id") or data.get("imap_uid")
|
||||
eid = data.get("envelope_id")
|
||||
if not eid:
|
||||
print(f" {msg_id}: No envelope ID, skipping")
|
||||
continue
|
||||
@@ -519,11 +509,10 @@ def cmd_review_act(selector, action):
|
||||
decision_store.record_decision(data, action, source="user", tags=data.get("tags", []))
|
||||
|
||||
# Mark as done in pending queue
|
||||
pending = load_pending()
|
||||
pending[msg_id]["status"] = "done"
|
||||
pending[msg_id]["action"] = action
|
||||
pending[msg_id]["processed_at"] = datetime.now().isoformat()
|
||||
save_pending(pending)
|
||||
pending_dirty = True
|
||||
|
||||
log_result(log_file, data, f"REVIEW:{action}", data.get("reason", ""))
|
||||
print(f" {msg_id}: {action} -> OK ({data['subject'][:40]})")
|
||||
@@ -531,6 +520,9 @@ def cmd_review_act(selector, action):
|
||||
log_result(log_file, data, f"REVIEW_FAILED:{action}", data.get("reason", ""))
|
||||
print(f" {msg_id}: {action} -> FAILED")
|
||||
|
||||
if pending_dirty:
|
||||
save_pending(pending)
|
||||
|
||||
|
||||
def cmd_review_accept():
|
||||
"""Accept all classifier suggestions for pending emails.
|
||||
@@ -547,13 +539,16 @@ def cmd_review_accept():
|
||||
LOGS_DIR.mkdir(exist_ok=True)
|
||||
log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log"
|
||||
|
||||
pending = load_pending()
|
||||
pending_dirty = False
|
||||
|
||||
for msg_id, data in sorted_items:
|
||||
action = data.get("suggested_action")
|
||||
if not action:
|
||||
print(f" {msg_id}: No suggestion, skipping")
|
||||
continue
|
||||
|
||||
eid = data.get("envelope_id") or data.get("imap_uid")
|
||||
eid = data.get("envelope_id")
|
||||
if not eid:
|
||||
print(f" {msg_id}: No envelope ID, skipping")
|
||||
continue
|
||||
@@ -562,11 +557,10 @@ def cmd_review_accept():
|
||||
if success:
|
||||
decision_store.record_decision(data, action, source="user", tags=data.get("tags", []))
|
||||
|
||||
pending = load_pending()
|
||||
pending[msg_id]["status"] = "done"
|
||||
pending[msg_id]["action"] = action
|
||||
pending[msg_id]["processed_at"] = datetime.now().isoformat()
|
||||
save_pending(pending)
|
||||
pending_dirty = True
|
||||
|
||||
log_result(log_file, data, f"ACCEPT:{action}", data.get("reason", ""))
|
||||
print(f" {msg_id}: {action} -> OK ({data['subject'][:40]})")
|
||||
@@ -574,6 +568,9 @@ def cmd_review_accept():
|
||||
log_result(log_file, data, f"ACCEPT_FAILED:{action}", data.get("reason", ""))
|
||||
print(f" {msg_id}: {action} -> FAILED")
|
||||
|
||||
if pending_dirty:
|
||||
save_pending(pending)
|
||||
|
||||
|
||||
def _resolve_target(selector, sorted_items):
|
||||
"""Resolve a selector (scan_index number or msg_id) to a (msg_id, data) tuple.
|
||||
@@ -611,7 +608,7 @@ def cmd_stats():
|
||||
"""Print a summary of the decision history.
|
||||
|
||||
Shows total decisions, user vs. auto breakdown, action distribution,
|
||||
top sender domains, and custom labels.
|
||||
top senders, and custom labels.
|
||||
"""
|
||||
stats = decision_store.get_all_stats()
|
||||
|
||||
@@ -672,7 +669,11 @@ if __name__ == "__main__":
|
||||
i = 0
|
||||
while i < len(args):
|
||||
if args[i] == "--recent" and i + 1 < len(args):
|
||||
recent = int(args[i + 1])
|
||||
try:
|
||||
recent = int(args[i + 1])
|
||||
except ValueError:
|
||||
print(f"--recent requires a number, got: {args[i + 1]}")
|
||||
sys.exit(1)
|
||||
i += 2
|
||||
elif args[i] == "--dry-run":
|
||||
dry_run = True
|
||||
|
||||
Reference in New Issue
Block a user