Clean up stale comments, dead code, and code quality issues
- Remove dead code: unused PENDING_FILE, _extract_domain(), sender_domain field, imap_uid fallback, check_unseen_only config key - Fix stale comments: removed tag references in README and docstrings, top_domains -> top_senders, 1-based number -> scan_index number - Make _extract_email_address public (used by 3 modules) - Extract _format_address helper to deduplicate from/to parsing - Batch pending queue disk I/O in review act/accept (load once, save once) - Reuse cleared pending dict in scan instead of redundant disk load - Track envelope IDs during scan loop to catch duplicates - Fix default confidence_threshold 75 -> 85 to match config and docs - Update get_relevant_examples default n=10 -> n=5 to match caller - Add graceful error for --recent with non-numeric value
This commit is contained in:
@@ -25,7 +25,6 @@ from collections import Counter
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
DATA_DIR = SCRIPT_DIR / "data"
|
||||
HISTORY_FILE = DATA_DIR / "decision_history.json"
|
||||
PENDING_FILE = DATA_DIR / "pending_emails.json"
|
||||
|
||||
# Stop-words excluded from subject keyword matching to reduce noise.
|
||||
_STOP_WORDS = {"re", "fwd", "the", "a", "an", "is", "to", "for", "and", "or", "your", "you"}
|
||||
@@ -50,18 +49,7 @@ def _save_history(history):
|
||||
json.dump(history, f, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def _extract_domain(sender):
|
||||
"""Extract the domain part from a sender string.
|
||||
|
||||
Handles formats like:
|
||||
"Display Name <user@example.com>"
|
||||
user@example.com
|
||||
"""
|
||||
match = re.search(r"[\w.+-]+@([\w.-]+)", sender)
|
||||
return match.group(1).lower() if match else ""
|
||||
|
||||
|
||||
def _extract_email_address(sender):
|
||||
def extract_email_address(sender):
|
||||
"""Extract the full email address from a sender string."""
|
||||
match = re.search(r"([\w.+-]+@[\w.-]+)", sender)
|
||||
return match.group(1).lower() if match else sender.lower()
|
||||
@@ -85,7 +73,6 @@ def record_decision(email_data, action, source="user", tags=None):
|
||||
entry = {
|
||||
"timestamp": datetime.now().isoformat(timespec="seconds"),
|
||||
"sender": email_data.get("sender", ""),
|
||||
"sender_domain": _extract_domain(email_data.get("sender", "")),
|
||||
"recipient": email_data.get("recipient", ""),
|
||||
"subject": email_data.get("subject", ""),
|
||||
"summary": email_data.get("summary", ""),
|
||||
@@ -98,7 +85,7 @@ def record_decision(email_data, action, source="user", tags=None):
|
||||
return entry
|
||||
|
||||
|
||||
def get_relevant_examples(email_data, n=10):
|
||||
def get_relevant_examples(email_data, n=5):
|
||||
"""Find the N most relevant past decisions for a given email.
|
||||
|
||||
Relevance is scored by two signals:
|
||||
@@ -112,7 +99,7 @@ def get_relevant_examples(email_data, n=10):
|
||||
if not history:
|
||||
return []
|
||||
|
||||
target_email = _extract_email_address(email_data.get("sender", ""))
|
||||
target_email = extract_email_address(email_data.get("sender", ""))
|
||||
target_words = (
|
||||
set(re.findall(r"\w+", email_data.get("subject", "").lower())) - _STOP_WORDS
|
||||
)
|
||||
@@ -122,7 +109,7 @@ def get_relevant_examples(email_data, n=10):
|
||||
score = 0
|
||||
|
||||
# Signal 1: sender email match
|
||||
if target_email and _extract_email_address(entry.get("sender", "")) == target_email:
|
||||
if target_email and extract_email_address(entry.get("sender", "")) == target_email:
|
||||
score += 3
|
||||
|
||||
# Signal 2: subject keyword overlap
|
||||
@@ -146,7 +133,7 @@ def get_sender_stats(sender_email):
|
||||
history = _load_history()
|
||||
actions = Counter()
|
||||
for entry in history:
|
||||
if _extract_email_address(entry.get("sender", "")) == sender_email:
|
||||
if extract_email_address(entry.get("sender", "")) == sender_email:
|
||||
actions[entry["action"]] += 1
|
||||
return dict(actions)
|
||||
|
||||
@@ -171,7 +158,7 @@ def compute_confidence(sender_email, action, tags):
|
||||
# Find past decisions with same sender and sufficient tag overlap
|
||||
matches = []
|
||||
for entry in history:
|
||||
entry_email = _extract_email_address(entry.get("sender", ""))
|
||||
entry_email = extract_email_address(entry.get("sender", ""))
|
||||
if entry_email != sender_email:
|
||||
continue
|
||||
|
||||
@@ -216,7 +203,7 @@ def get_known_labels():
|
||||
def get_all_stats():
|
||||
"""Compute aggregate statistics across the full decision history.
|
||||
|
||||
Returns a dict with keys: total, by_action, by_source, top_domains.
|
||||
Returns a dict with keys: total, by_action, by_source, top_senders.
|
||||
Returns None if history is empty.
|
||||
"""
|
||||
history = _load_history()
|
||||
@@ -228,7 +215,7 @@ def get_all_stats():
|
||||
by_source = Counter(e["source"] for e in history)
|
||||
|
||||
# Top 10 sender addresses by decision count
|
||||
sender_counts = Counter(_extract_email_address(e.get("sender", "")) for e in history)
|
||||
sender_counts = Counter(extract_email_address(e.get("sender", "")) for e in history)
|
||||
top_senders = sender_counts.most_common(10)
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user