From b14a93866ee5f9735a9c1b7c5c5d1abcfbd77fb3 Mon Sep 17 00:00:00 2001 From: Yanxin Lu Date: Thu, 26 Feb 2026 20:54:07 -0800 Subject: [PATCH] email processor --- scripts/email_processor/.gitignore | 3 + scripts/email_processor/README.md | 233 +++++ scripts/email_processor/classifier.py | 191 ++++ scripts/email_processor/config.json | 12 +- .../email_processor/data/pending_emails.json | 52 - scripts/email_processor/decision_store.py | 253 +++++ scripts/email_processor/email-processor.sh | 27 + scripts/email_processor/logs/2026-02-15.log | 50 - scripts/email_processor/logs/2026-02-18.log | 29 - scripts/email_processor/main.py | 887 +++++++++++++----- scripts/email_processor/move_ad_to_trash.py | 28 - scripts/email_processor/process_queue.py | 214 ----- scripts/email_processor/test_single.py | 38 - scripts/email_processor/venv/bin/python | 2 +- scripts/email_processor/venv/bin/python3 | 2 +- scripts/email_processor/venv/bin/python3.12 | 1 - scripts/email_processor/venv/lib64 | 1 - scripts/email_processor/venv/pyvenv.cfg | 8 +- 18 files changed, 1365 insertions(+), 666 deletions(-) create mode 100644 scripts/email_processor/.gitignore create mode 100644 scripts/email_processor/README.md create mode 100644 scripts/email_processor/classifier.py delete mode 100644 scripts/email_processor/data/pending_emails.json create mode 100644 scripts/email_processor/decision_store.py create mode 100755 scripts/email_processor/email-processor.sh delete mode 100644 scripts/email_processor/logs/2026-02-15.log delete mode 100644 scripts/email_processor/logs/2026-02-18.log delete mode 100644 scripts/email_processor/move_ad_to_trash.py delete mode 100644 scripts/email_processor/process_queue.py delete mode 100644 scripts/email_processor/test_single.py delete mode 120000 scripts/email_processor/venv/bin/python3.12 delete mode 120000 scripts/email_processor/venv/lib64 diff --git a/scripts/email_processor/.gitignore b/scripts/email_processor/.gitignore new file mode 100644 index 0000000..2706474 --- /dev/null +++ b/scripts/email_processor/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +venv diff --git a/scripts/email_processor/README.md b/scripts/email_processor/README.md new file mode 100644 index 0000000..bfa64c5 --- /dev/null +++ b/scripts/email_processor/README.md @@ -0,0 +1,233 @@ +# Email Processor + +Learning-based mailbox cleanup using Himalaya (IMAP) + Ollama (local LLM). Classifies emails, learns from your decisions over time, and gradually automates common actions. + +## Prerequisites + +- **Himalaya** — CLI email client, handles IMAP connection and auth. +- **Ollama** — local LLM server. +- **Python 3.8+** + +```bash +# Install himalaya (macOS) +brew install himalaya + +# Configure himalaya for your IMAP account (first time only) +himalaya account list # should show your account after setup + +# Install and start Ollama, pull the model +brew install ollama +ollama pull kamekichi128/qwen3-4b-instruct-2507:latest + +# Set up Python venv and install dependencies +cd scripts/email_processor +python3 -m venv venv +source venv/bin/activate +pip install ollama +``` + +## How It Works + +The system has two phases: a **learning phase** where it builds up knowledge from your decisions, and a **steady state** where it handles most emails automatically. + +### Learning Phase (first ~20 decisions) + +The confidence threshold is automatically raised to 95%. Most emails get queued. + +1. **Cron runs `scan`.** For each unseen email, the classifier uses Qwen3's general knowledge (no history yet) to suggest an action. Most come back at 60-80% confidence — below the 95% threshold — so they get saved to `pending_emails.json` with the suggestion attached. A few obvious spam emails might hit 95%+ and get auto-deleted. + +2. **You run `review list`.** It prints what's pending: + ``` + 1. [msg_f1d43ea3] Subject: New jobs matching your profile + From: LinkedIn Suggested: delete (82%) + 2. [msg_60c56a87] Subject: Your order shipped + From: Amazon Suggested: archive (78%) + 3. [msg_ebd24205] Subject: Meeting tomorrow at 3pm + From: Coworker Suggested: keep (70%) + ``` + +3. **You act on them.** Either individually or in bulk: + ```bash + ./email-processor.sh review 1 delete # agree with suggestion + ./email-processor.sh review 2 archive # agree with suggestion + ./email-processor.sh review accept # accept all suggestions at once + ``` + Each command executes via himalaya, appends to `decision_history.json`, and marks the pending entry as done. + +4. **Next scan is smarter.** The classifier now has few-shot examples in the prompt: + ``` + History for linkedin.com: delete 2 times + --- Past decisions --- + From: LinkedIn | Subject: New jobs matching your profile -> delete + From: Amazon | Subject: Your package delivered -> archive + --- + ``` + Confidence scores climb. You keep reviewing. History grows. + +### Steady State (20+ decisions) + +The threshold drops to the configured 75%. The classifier has rich context. + +- **Repeat senders** (LinkedIn, Amazon, Uber) get auto-acted at 85-95% confidence during `scan`. They never touch the pending queue. +- **New or ambiguous senders** may fall below 75% and get queued. +- **You occasionally run `review list`** to handle stragglers — each decision further improves future classifications. +- **`stats` shows your automation rate** climbing: 60%, 70%, 80%+. + +The pending queue shrinks over time. It's not a backlog — it's an ever-narrowing set of emails the system hasn't learned to handle yet. + +## Usage + +All commands are non-interactive — they take arguments, act, and exit. Compatible with cron/OpenClaw. + +```bash +# Make the entry script executable (first time) +chmod +x email-processor.sh + +# --- Scan --- +./email-processor.sh scan # classify unseen emails +./email-processor.sh scan --recent 30 # classify last 30 days +./email-processor.sh scan --dry-run # classify only, no changes +./email-processor.sh scan --recent 7 --dry-run # combine both + +# --- Review --- +./email-processor.sh review list # show pending queue +./email-processor.sh review 1 delete # delete email #1 +./email-processor.sh review msg_f1d43ea3 archive # archive by ID +./email-processor.sh review all delete # delete all pending +./email-processor.sh review accept # accept all suggestions + +# --- Other --- +./email-processor.sh stats # show decision history +./email-processor.sh migrate # import old decisions +``` + +Or call Python directly: `python main.py scan --dry-run` + +## Actions + +| Action | Effect | +|---|---| +| `delete` | Move to Trash (`himalaya message delete`) | +| `archive` | Move to Archive folder | +| `keep` | Leave unread in inbox (no changes) | +| `mark_read` | Add `\Seen` flag, stays in inbox | +| `label:` | Move to named folder (created if needed) | + +## Auto-Action Criteria + +Scan auto-acts when the classifier's confidence meets the threshold. During the learning phase (fewer than `bootstrap_min_decisions` total decisions, default 20), a higher threshold of 95% is used automatically. Once enough history accumulates, the configured `confidence_threshold` (default 75%) takes over. + +This means on day one, only very obvious emails (spam, clear promotions) get auto-acted. As you review emails and build history, the system gradually handles more on its own. + +## Configuration + +`config.json` — only Ollama and automation settings. IMAP auth is managed by himalaya's own config. + +```json +{ + "ollama": { + "host": "http://localhost:11434", + "model": "kamekichi128/qwen3-4b-instruct-2507:latest" + }, + "rules": { + "max_body_length": 1000 + }, + "automation": { + "confidence_threshold": 75, + "bootstrap_min_decisions": 20 + } +} +``` + +| Key | Description | +|---|---| +| `ollama.host` | Ollama server URL. Default `http://localhost:11434`. | +| `ollama.model` | Ollama model to use for classification. | +| `rules.max_body_length` | Max characters of email body sent to the LLM. Longer bodies are truncated. Keeps prompt size and latency down. | +| `automation.confidence_threshold` | Minimum confidence (0-100) for auto-action in steady state. Emails below this get queued for review. Lower = more automation, higher = more manual review. | +| `automation.bootstrap_min_decisions` | Number of decisions needed before leaving the learning phase. During the learning phase, the threshold is raised to 95% regardless of `confidence_threshold`. Set to 0 to skip the learning phase entirely. | + +## Testing + +```bash +# 1. Verify himalaya can reach your mailbox +himalaya envelope list --page-size 3 + +# 2. Verify Ollama is running with the model +ollama list # should show kamekichi128/qwen3-4b-instruct-2507:latest + +# 3. Dry run — classify recent emails without touching anything +./email-processor.sh scan --recent 7 --dry-run + +# 4. Live run — classify and act (auto-act or queue) +./email-processor.sh scan --recent 7 + +# 5. Check what got queued +./email-processor.sh review list + +# 6. Act on a queued email to seed decision history +./email-processor.sh review 1 delete + +# 7. Check that the decision was recorded +./email-processor.sh stats +``` + +## File Structure + +``` +email_processor/ + main.py # Entry point — scan/review/stats/migrate subcommands + classifier.py # LLM prompt builder + response parser + decision_store.py # Decision history storage + few-shot retrieval + config.json # Ollama + automation settings + email-processor.sh # Shell wrapper (activates venv, forwards args) + data/ + pending_emails.json # Queue of emails awaiting review + decision_history.json # Past decisions (few-shot learning data) + logs/ + YYYY-MM-DD.log # Daily processing logs +``` + +## Design Decisions + +### Himalaya instead of raw IMAP + +All IMAP operations go through the `himalaya` CLI via subprocess calls. This means: +- No IMAP credentials stored in config.json — himalaya manages its own auth. +- No connection management, reconnect logic, or SSL setup in Python. +- Each action is a single himalaya command (e.g., `himalaya message delete 42`). + +The tradeoff is a subprocess spawn per operation, but for email volumes (tens per run, not thousands) this is negligible. + +### Non-interactive design + +Every command takes its full input as arguments, acts, and exits. No `input()` calls, no interactive loops. This makes the system compatible with cron/OpenClaw and composable with other scripts. The pending queue on disk (`pending_emails.json`) is the shared state between scan and review invocations. + +### decision_history.json as the "database" + +`data/decision_history.json` is the only persistent state that matters for learning. It's a flat JSON array — every decision (user or auto) is appended as an entry. The classifier reads the whole file on each email to find relevant few-shot examples via relevance scoring. + +The pending queue (`pending_emails.json`) is transient — emails pass through it and get marked "done". Logs are for debugging. The decision history is what the system learns from. + +A flat JSON file works fine for hundreds or low thousands of decisions. SQLite would make sense if the history grows past ~10k entries and the linear scan becomes noticeable, or if concurrent writes from multiple processes become necessary. Neither applies at current scale. + +### Few-shot learning via relevance scoring + +Rather than sending the entire decision history to the LLM, `decision_store.get_relevant_examples()` scores each past decision against the current email using three signals: +- Exact sender domain match (+3 points) +- Recipient address match (+2 points) +- Subject keyword overlap (+1 per shared word, stop-words excluded) + +The top 5 most relevant examples are injected into the prompt as few-shot demonstrations. This keeps the prompt small while giving the model the most useful context. + +### Conservative auto-action + +Auto-action uses a single confidence threshold with an adaptive learning phase. When the decision history has fewer than `bootstrap_min_decisions` (default 20) entries, the threshold is raised to 95% — only very obvious classifications get auto-acted. Once enough history accumulates, the configured `confidence_threshold` (default 75%) takes over. This lets the system start working from day one while being cautious until it has enough examples to learn from. + +### `keep` means unread + +The `keep` action is a deliberate no-op — it leaves the email unread in the inbox, meaning it needs human attention. This is distinct from `mark_read`, which dismisses low-priority emails without moving them. During scan, queued emails are marked as read to prevent re-processing, but that's a scan-level concern separate from the `keep` action itself. + +### Fail-safe classification + +If the LLM call fails (Ollama down, model not loaded, timeout), the classifier returns `action="keep"` with `confidence=0`. This guarantees the email gets queued for manual review rather than being auto-acted upon. The system never auto-trashes an email it couldn't classify. diff --git a/scripts/email_processor/classifier.py b/scripts/email_processor/classifier.py new file mode 100644 index 0000000..10cecef --- /dev/null +++ b/scripts/email_processor/classifier.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Classifier - LLM-based email classification with learning. + +This module builds a rich prompt for the local Ollama model (Qwen3) that +includes few-shot examples from past user decisions, per-sender statistics, +and a list of known labels. The model returns a structured response with +an action, confidence score, summary, and reason. + +The prompt structure: + 1. System instructions (action definitions) + 2. Known labels (so the model reuses them) + 3. Sender statistics ("linkedin.com: deleted 8 times, kept 2 times") + 4. Few-shot examples (top 5 most relevant past decisions) + 5. The email to classify (subject, sender, recipient, body preview) + 6. Output format specification +""" + +import time +from datetime import datetime +from pathlib import Path + +import decision_store + +LOGS_DIR = Path(__file__).parent / "logs" + + +def _build_prompt(email_data, config): + """Assemble the full classification prompt with learning context. + + The prompt is built in sections, each providing different context to + help the model make better decisions. Sections are omitted when there + is no relevant data (e.g., no history yet for a new sender). + """ + max_body = config.get("rules", {}).get("max_body_length", 1000) + + # Gather learning context from decision history + examples = decision_store.get_relevant_examples(email_data, n=10) + sender_domain = decision_store._extract_domain(email_data.get("sender", "")) + sender_stats = decision_store.get_sender_stats(sender_domain) if sender_domain else {} + known_labels = decision_store.get_known_labels() + + # /no_think disables Qwen3's chain-of-thought, giving faster + shorter output + parts = ["/no_think\n"] + + # Section 1: Action definitions + parts.append( + "You are an email classifier. Classify the email into one of these actions:\n" + "- delete: Spam, ads, promotions, unwanted notifications\n" + "- archive: Informational emails worth keeping but not needing attention " + "(receipts, shipping updates, automated confirmations)\n" + "- keep: Important emails that need attention or action (left unread in inbox)\n" + "- mark_read: Low-priority, leave in inbox but mark as read\n" + "- label:: Categorize with a specific label\n" + ) + + # Section 2: Known labels (helps model reuse instead of inventing) + if known_labels: + parts.append(f"\nLabels used before: {', '.join(sorted(known_labels))}\n") + + # Section 3: Sender statistics (strong signal for repeat senders) + if sender_stats: + stats_str = ", ".join( + f"{action} {count} times" for action, count in sender_stats.items() + ) + parts.append(f"\nHistory for {sender_domain}: {stats_str}\n") + + # Section 4: Few-shot examples (top 5 most relevant past decisions) + if examples: + parts.append("\n--- Past decisions (learn from these) ---") + for ex in examples[:5]: + parts.append( + f"From: {ex['sender'][:60]} | To: {ex['recipient'][:40]} | " + f"Subject: {ex['subject'][:60]} -> {ex['action']}" + ) + parts.append("--- End examples ---\n") + + # Section 5: The email being classified + body_preview = email_data.get("body", "")[:max_body] + parts.append( + f"Now classify this email:\n" + f"Subject: {email_data.get('subject', '(No Subject)')}\n" + f"From: {email_data.get('sender', '(Unknown)')}\n" + f"To: {email_data.get('recipient', '(Unknown)')}\n" + f"Body: {body_preview}\n" + ) + + # Section 6: Required output format + parts.append( + "Respond in this exact format (nothing else):\n" + "Action: [delete|archive|keep|mark_read|label:]\n" + "Confidence: [0-100]\n" + "Summary: [one sentence summary of the email]\n" + "Reason: [brief explanation for your classification]" + ) + + return "\n".join(parts) + + +def _log_llm(prompt, output, email_data, action, confidence, duration): + """Log the full LLM prompt and response to logs/llm_YYYY-MM-DD.log.""" + LOGS_DIR.mkdir(exist_ok=True) + log_file = LOGS_DIR / f"llm_{datetime.now().strftime('%Y-%m-%d')}.log" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + subject = email_data.get("subject", "(No Subject)")[:60] + sender = email_data.get("sender", "(Unknown)")[:60] + + with open(log_file, "a", encoding="utf-8") as f: + f.write(f"{'=' * 70}\n") + f.write(f"[{timestamp}] {subject}\n") + f.write(f"From: {sender} | Result: {action} @ {confidence}% | {duration:.1f}s\n") + f.write(f"{'-' * 70}\n") + f.write(f"PROMPT:\n{prompt}\n") + f.write(f"{'-' * 70}\n") + f.write(f"RESPONSE:\n{output}\n") + f.write(f"{'=' * 70}\n\n") + + +def _parse_response(output): + """Parse the model's text response into structured fields. + + Expected format (one per line): + Action: delete + Confidence: 92 + Summary: Promotional offer from retailer + Reason: Clearly a marketing email with discount offer + + Falls back to safe defaults (keep, 50% confidence) on parse failure. + """ + action = "keep" + confidence = 50 + summary = "No summary" + reason = "Unknown" + + for line in output.strip().split("\n"): + line = line.strip() + if line.startswith("Action:"): + raw_action = line.replace("Action:", "").strip().lower() + valid_actions = {"delete", "archive", "keep", "mark_read"} + if raw_action in valid_actions or raw_action.startswith("label:"): + action = raw_action + elif line.startswith("Confidence:"): + try: + confidence = int(line.replace("Confidence:", "").strip().rstrip("%")) + confidence = max(0, min(100, confidence)) # clamp to 0-100 + except ValueError: + confidence = 50 + elif line.startswith("Summary:"): + summary = line.replace("Summary:", "").strip()[:200] + elif line.startswith("Reason:"): + reason = line.replace("Reason:", "").strip() + + return action, confidence, summary, reason + + +def classify_email(email_data, config): + """Classify an email using the local LLM with few-shot learning context. + + Connects to Ollama, sends the assembled prompt, and parses the response. + On any error, falls back to "keep" with 0% confidence so the email + gets queued for manual review rather than auto-acted upon. + + Args: + email_data: dict with subject, sender, recipient, body keys. + config: full config dict (needs ollama.model and rules.max_body_length). + + Returns: + Tuple of (action, confidence, summary, reason, duration_seconds). + """ + import ollama + + prompt = _build_prompt(email_data, config) + model = config.get("ollama", {}).get("model", "kamekichi128/qwen3-4b-instruct-2507:latest") + + start_time = time.time() + try: + # Low temperature for consistent classification + response = ollama.generate(model=model, prompt=prompt, options={"temperature": 0.1}) + output = response["response"] + action, confidence, summary, reason = _parse_response(output) + except Exception as e: + # On failure, default to "keep" with 0 confidence -> always queued + output = f"ERROR: {e}" + action = "keep" + confidence = 0 + summary = "Classification failed" + reason = f"error - {str(e)[:100]}" + + duration = time.time() - start_time + _log_llm(prompt, output, email_data, action, confidence, duration) + return action, confidence, summary, reason, duration diff --git a/scripts/email_processor/config.json b/scripts/email_processor/config.json index 956d019..71052d7 100644 --- a/scripts/email_processor/config.json +++ b/scripts/email_processor/config.json @@ -1,16 +1,14 @@ { - "imap": { - "host": "imap.migadu.com", - "port": 993, - "email": "youlu@luyanxin.com", - "password": "kDkNau2r7m.hV!uk*D4Yr8mC7Dyjx9T" - }, "ollama": { "host": "http://localhost:11434", - "model": "qwen3:4b" + "model": "kamekichi128/qwen3-4b-instruct-2507:latest" }, "rules": { "max_body_length": 1000, "check_unseen_only": true + }, + "automation": { + "confidence_threshold": 75, + "bootstrap_min_decisions": 20 } } diff --git a/scripts/email_processor/data/pending_emails.json b/scripts/email_processor/data/pending_emails.json deleted file mode 100644 index e9c5857..0000000 --- a/scripts/email_processor/data/pending_emails.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "msg_f1d43ea3": { - "imap_uid": "2", - "subject": "Delivered: \"Voikinfo Bottom Gusset Bags...\"", - "sender": "\"Amazon.com - order-update(a)amazon.com\"\r\n ", - "recipient": "sho.amazon@ylu17.com", - "summary": "Your Amazon package (order #114-1496788-7649829) was delivered today to Argo, Los Angeles, CA and left near the front door or porch.", - "email_date": "Wed, 18 Feb 2026 04:15:24 +0000", - "status": "pending", - "found_at": "2026-02-18T16:18:42.347538" - }, - "msg_60c56a87": { - "imap_uid": "3", - "subject": "=?UTF-8?b?5L2V5LiN5ruh6Laz6Ieq5bex55qE5Y+j6IW55LmL5qyy?=", - "sender": "\"Uber Eats - uber(a)uber.com\" ", - "recipient": "uber@ylu17.com", - "summary": "Uber Eats has sent a notification that the user's order is ready for pickup.", - "email_date": "Wed, 18 Feb 2026 11:36:59 +0000", - "status": "pending", - "found_at": "2026-02-18T08:05:56.594842" - }, - "msg_ebd24205": { - "imap_uid": "4", - "subject": "Your order has been shipped (or closed if combined/delivered).", - "sender": "\"cd(a)woodenswords.com\"\r\n ", - "recipient": "mail@luyx.org", - "summary": "This email confirms that your order has been shipped or closed (if combined/delivered).", - "email_date": "Wed, 18 Feb 2026 16:07:58 +0000", - "status": "pending", - "found_at": "2026-02-18T12:01:19.048091" - }, - "msg_fa73b3bd": { - "imap_uid": "6", - "subject": "=?UTF-8?Q?Yanxin,_I=E2=80=99m_still_waiting_for_your_response?=", - "sender": "\"Arslan (via LinkedIn) - messages-noreply(a)linkedin.com\"\r\n ", - "recipient": "Yanxin Lu ", - "summary": "Arslan Ahmed, a Senior AI | ML | Full Stack Engineer from Ilford, invited you to connect on February 11, 2026 at 10:08 PM and is waiting for your response.", - "email_date": "Wed, 18 Feb 2026 18:53:45 +0000 (UTC)", - "status": "pending", - "found_at": "2026-02-18T12:04:34.602407" - }, - "msg_59f23736": { - "imap_uid": "1", - "subject": "New Software Engineer jobs that match your profile", - "sender": "\"LinkedIn - jobs-noreply(a)linkedin.com\"\r\n ", - "recipient": "Yanxin Lu ", - "summary": "LinkedIn has notified the user of new software engineering jobs that match their profile and includes a link to update their top card.", - "email_date": "Wed, 18 Feb 2026 02:07:12 +0000 (UTC)", - "status": "pending", - "found_at": "2026-02-18T16:16:00.784822" - } -} \ No newline at end of file diff --git a/scripts/email_processor/decision_store.py b/scripts/email_processor/decision_store.py new file mode 100644 index 0000000..203734e --- /dev/null +++ b/scripts/email_processor/decision_store.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python3 +""" +Decision Store - Manages decision history for learning-based email classification. + +This module persists every user and auto-made decision to a flat JSON file +(data/decision_history.json). Past decisions serve as few-shot examples +that are injected into the LLM prompt by classifier.py, enabling the +system to learn from user behavior over time. + +Storage format: a JSON array of decision entries, each containing sender, +recipient, subject, summary, action taken, and whether it was a user or +auto decision. +""" + +import json +import re +from datetime import datetime +from pathlib import Path +from collections import Counter + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).parent +DATA_DIR = SCRIPT_DIR / "data" +HISTORY_FILE = DATA_DIR / "decision_history.json" +PENDING_FILE = DATA_DIR / "pending_emails.json" + +# Stop-words excluded from subject keyword matching to reduce noise. +_STOP_WORDS = {"re", "fwd", "the", "a", "an", "is", "to", "for", "and", "or", "your", "you"} + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _load_history(): + """Load the full decision history list from disk.""" + if not HISTORY_FILE.exists(): + return [] + with open(HISTORY_FILE, "r", encoding="utf-8") as f: + return json.load(f) + + +def _save_history(history): + """Write the full decision history list to disk.""" + DATA_DIR.mkdir(exist_ok=True) + with open(HISTORY_FILE, "w", encoding="utf-8") as f: + json.dump(history, f, indent=2, ensure_ascii=False) + + +def _extract_domain(sender): + """Extract the domain part from a sender string. + + Handles formats like: + "Display Name " + user@example.com + """ + match = re.search(r"[\w.+-]+@([\w.-]+)", sender) + return match.group(1).lower() if match else "" + + +def _extract_email_address(sender): + """Extract the full email address from a sender string.""" + match = re.search(r"([\w.+-]+@[\w.-]+)", sender) + return match.group(1).lower() if match else sender.lower() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def record_decision(email_data, action, source="user"): + """Append a decision to the history file. + + Args: + email_data: dict with keys: sender, recipient, subject, summary. + action: one of "delete", "archive", "keep", "mark_read", + or "label:". + source: "user" (manual review) or "auto" (high-confidence). + """ + history = _load_history() + entry = { + "timestamp": datetime.now().isoformat(timespec="seconds"), + "sender": email_data.get("sender", ""), + "sender_domain": _extract_domain(email_data.get("sender", "")), + "recipient": email_data.get("recipient", ""), + "subject": email_data.get("subject", ""), + "summary": email_data.get("summary", ""), + "action": action, + "source": source, + } + history.append(entry) + _save_history(history) + return entry + + +def get_relevant_examples(email_data, n=10): + """Find the N most relevant past decisions for a given email. + + Relevance is scored by three signals: + - Exact sender domain match: +3 points + - Recipient string match: +2 points + - Subject keyword overlap: +1 point per shared word + + Only entries with score > 0 are considered. Results are returned + sorted by descending relevance. + """ + history = _load_history() + if not history: + return [] + + target_domain = _extract_domain(email_data.get("sender", "")) + target_recipient = email_data.get("recipient", "").lower() + target_words = ( + set(re.findall(r"\w+", email_data.get("subject", "").lower())) - _STOP_WORDS + ) + + scored = [] + for entry in history: + score = 0 + + # Signal 1: sender domain match + if target_domain and entry.get("sender_domain", "") == target_domain: + score += 3 + + # Signal 2: recipient substring match + if target_recipient and target_recipient in entry.get("recipient", "").lower(): + score += 2 + + # Signal 3: subject keyword overlap + entry_words = ( + set(re.findall(r"\w+", entry.get("subject", "").lower())) - _STOP_WORDS + ) + score += len(target_words & entry_words) + + if score > 0: + scored.append((score, entry)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [entry for _, entry in scored[:n]] + + +def get_sender_stats(sender_domain): + """Get action distribution for a sender domain. + + Returns a dict like {"delete": 5, "keep": 2, "archive": 1}. + """ + history = _load_history() + actions = Counter() + for entry in history: + if entry.get("sender_domain", "") == sender_domain: + actions[entry["action"]] += 1 + return dict(actions) + + +def get_sender_history_count(sender_domain): + """Count total past decisions for a sender domain. + + Used by the scan command to decide whether there is enough history + to trust auto-actions for this sender. + """ + history = _load_history() + return sum(1 for e in history if e.get("sender_domain", "") == sender_domain) + + +def get_known_labels(): + """Return the set of all label names used in past "label:" decisions. + + These are offered to the LLM so it can reuse existing labels rather + than inventing new ones. + """ + history = _load_history() + labels = set() + for entry in history: + action = entry.get("action", "") + if action.startswith("label:"): + labels.add(action[6:]) + return labels + + +def get_all_stats(): + """Compute aggregate statistics across the full decision history. + + Returns a dict with keys: total, by_action, by_source, top_domains. + Returns None if history is empty. + """ + history = _load_history() + if not history: + return None + + total = len(history) + by_action = Counter(e["action"] for e in history) + by_source = Counter(e["source"] for e in history) + + # Top 10 sender domains by decision count + domain_counts = Counter(e.get("sender_domain", "") for e in history) + top_domains = domain_counts.most_common(10) + + return { + "total": total, + "by_action": dict(by_action), + "by_source": dict(by_source), + "top_domains": top_domains, + } + + +# --------------------------------------------------------------------------- +# Migration +# --------------------------------------------------------------------------- + +def migrate_pending(): + """One-time migration: import 'done' entries from pending_emails.json. + + Converts old-style action names ("archived" -> "archive", etc.) and + records them as user decisions in the history file. Safe to run + multiple times (will create duplicates though, so run once only). + """ + if not PENDING_FILE.exists(): + print("No pending_emails.json found, nothing to migrate.") + return 0 + + with open(PENDING_FILE, "r", encoding="utf-8") as f: + pending = json.load(f) + + # Map old action names to new ones + action_map = { + "archived": "archive", + "kept": "keep", + "deleted": "delete", + } + + migrated = 0 + for msg_id, data in pending.items(): + if data.get("status") != "done": + continue + old_action = data.get("action", "") + action = action_map.get(old_action, old_action) + if not action: + continue + + email_data = { + "sender": data.get("sender", ""), + "recipient": data.get("recipient", ""), + "subject": data.get("subject", ""), + "summary": data.get("summary", ""), + } + record_decision(email_data, action, source="user") + migrated += 1 + + print(f"Migrated {migrated} decisions from pending_emails.json") + return migrated diff --git a/scripts/email_processor/email-processor.sh b/scripts/email_processor/email-processor.sh new file mode 100755 index 0000000..9e54866 --- /dev/null +++ b/scripts/email_processor/email-processor.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# email-processor — wrapper script for the email processor. +# +# Usage: +# ./email-processor.sh scan # classify unseen emails +# ./email-processor.sh scan --recent 30 # last 30 days +# ./email-processor.sh scan --dry-run # classify only, no changes +# ./email-processor.sh scan --recent 7 --dry-run # combine both +# ./email-processor.sh review list # show pending queue +# ./email-processor.sh review 1 delete # act on email #1 +# ./email-processor.sh review all delete # act on all pending +# ./email-processor.sh review accept # accept all suggestions +# ./email-processor.sh stats # show history stats +# ./email-processor.sh migrate # import old decisions +# +# Requires: Python 3.8+, himalaya, Ollama running with model. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +# Activate the virtualenv if it exists +if [ -d "$SCRIPT_DIR/venv" ]; then + source "$SCRIPT_DIR/venv/bin/activate" +fi + +exec python3 "$SCRIPT_DIR/main.py" "$@" diff --git a/scripts/email_processor/logs/2026-02-15.log b/scripts/email_processor/logs/2026-02-15.log deleted file mode 100644 index ab9beaf..0000000 --- a/scripts/email_processor/logs/2026-02-15.log +++ /dev/null @@ -1,50 +0,0 @@ -[2026-02-15 21:14:02] KEPT: Please confirm your mailbox youlu@luyanxin.com - From: "noreply@simplelogin.io" - Analysis: KEEP: Legitimate service confirmation email for mailbox addition (not promotional) - -[2026-02-15 21:15:04] KEPT: =?utf-8?B?RndkOiBHZXQgMTAlIG9mZiB5b3VyIG5leHQgb3JkZXIg4pyF?= - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: error - HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=60) - -[2026-02-15 21:15:37] KEPT: - =?utf-8?B?RndkOiDigJxzb2Z0d2FyZSBlbmdpbmVlcuKAnTogTWljcm9 - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: LinkedIn job alert notification for subscribed job search (not promotional) - -[2026-02-15 21:15:52] KEPT: Fwd: Your receipt from OpenRouter, Inc #2231-9732 - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: This is a legitimate receipt for a payment made to OpenRouter, Inc (a known AI service provider), not promotional content. - -[2026-02-15 21:16:10] KEPT: Fwd: Your ChatGPT code is 217237 - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: Legitimate security verification code from OpenAI (standard login confirmation) - -[2026-02-15 22:49:44] KEPT (69.0s): =?UTF-8?B?5rWL6K+V6YKu5Lu2?= - From: Yanxin Lu - Analysis: KEEP: Test email for delivery verification - - From: Yanxin Lu - Analysis: KEEP: Test email for delivery verification - -[2026-02-15 22:57:03] MOVED_TO_TRASH (68.5s): =?utf-8?B?RndkOiBHZXQgMTAlIG9mZiB5b3VyIG5leHQgb3JkZXIg4pyF?= - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: AD: Forwarded Uber promotional offer - - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: AD: Forwarded Uber promotional offer - -[2026-02-15 23:00:09] KEPT (120.1s): Fwd: Your ChatGPT code is 217237 - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: error - HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=120) - - From: "Yanxin Lu - crac1017(a)hotmail.com" - - Analysis: KEEP: error - HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=120) - diff --git a/scripts/email_processor/logs/2026-02-18.log b/scripts/email_processor/logs/2026-02-18.log deleted file mode 100644 index 5ab4664..0000000 --- a/scripts/email_processor/logs/2026-02-18.log +++ /dev/null @@ -1,29 +0,0 @@ -[2026-02-18 08:04:26] ADDED_TO_PENDING (msg_f1d43ea3) (108.6s): Delivered: "Voikinfo Bottom Gusset Bags..." - From: "Amazon.com - order-update(a)amazon.com" - - Analysis: KEEP: Standard delivery confirmation from Amazon - -[2026-02-18 08:05:56] ADDED_TO_PENDING (msg_60c56a87) (88.0s): =?UTF-8?b?5L2V5LiN5ruh6Laz6Ieq5bex55qE5Y+j6IW55LmL5qyy?= - From: "Uber Eats - uber(a)uber.com" - Analysis: KEEP: The decoded subject line "Your Uber Eats order is ready!" indicates a transactional order update, not an advertisement. - -[2026-02-18 12:01:19] ADDED_TO_PENDING (msg_ebd24205) (66.7s): Your order has been shipped (or closed if combined/delivered - From: "cd(a)woodenswords.com" - - Analysis: KEEP: System-generated shipping update notification from an e-commerce store, not promotional content. - -[2026-02-18 12:03:36] MOVED_TO_TRASH (133.4s): =?UTF-8?Q?=E2=80=9Csoftware_engineer=E2=80=9D:_Snap_Inc._-_S - From: "LinkedIn Job Alerts - jobalerts-noreply(a)linkedin.com" - - Analysis: AD: This email is a promotional job alert notification from LinkedIn's service for users who have set up job preferences. - -[2026-02-18 12:04:34] ADDED_TO_PENDING (msg_fa73b3bd) (57.3s): =?UTF-8?Q?Yanxin,_I=E2=80=99m_still_waiting_for_your_respons - From: "Arslan (via LinkedIn) - messages-noreply(a)linkedin.com" - - Analysis: KEEP: This is a standard LinkedIn connection request notification with no promotional content, discounts, or advertisements—only a reminder of an existing invitation. - -[2026-02-18 16:18:42] ADDED_TO_PENDING (msg_f1d43ea3) (102.1s): Delivered: "Voikinfo Bottom Gusset Bags..." - From: "Amazon.com - order-update(a)amazon.com" - - Analysis: KEEP: Standard delivery confirmation from Amazon, not a promotional message. - diff --git a/scripts/email_processor/main.py b/scripts/email_processor/main.py index 9589b15..2d9aa23 100644 --- a/scripts/email_processor/main.py +++ b/scripts/email_processor/main.py @@ -1,297 +1,704 @@ #!/usr/bin/env python3 """ -Email Processor - Auto filter ads using local Qwen3 -Moves ad emails to Trash folder (not permanently deleted) +Email Processor - Learning-based mailbox cleanup using Himalaya + Ollama. + +Uses himalaya CLI for all IMAP operations (no raw imaplib, no stored +credentials). Uses a local Qwen3 model via Ollama for classification, +with few-shot learning from past user decisions. + +All commands are non-interactive — they take arguments, mutate files on +disk, and exit. Suitable for cron (OpenClaw) and scripting. + +Subcommands: + python main.py scan # classify unseen emails + python main.py scan --recent 30 # classify last 30 days + python main.py scan --dry-run # classify only, no changes + python main.py scan --recent 7 --dry-run # combine both + python main.py review list # print pending queue + python main.py review # act on one email + python main.py review all # act on all pending + python main.py review accept # accept all suggestions + python main.py stats # show decision history + python main.py migrate # import old decisions + +Action mapping (what each classification does to the email): + delete -> himalaya message delete (moves to Trash) + archive -> himalaya message move Archive + keep -> no-op (leave unread in inbox) + mark_read -> himalaya flag add seen + label:X -> himalaya message move """ import json -import imaplib -import email -import os +import subprocess +import hashlib import sys -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path -# Config +import classifier +import decision_store + +# --------------------------------------------------------------------------- +# Paths — all relative to the script's own directory +# --------------------------------------------------------------------------- + SCRIPT_DIR = Path(__file__).parent CONFIG_FILE = SCRIPT_DIR / "config.json" LOGS_DIR = SCRIPT_DIR / "logs" DATA_DIR = SCRIPT_DIR / "data" PENDING_FILE = DATA_DIR / "pending_emails.json" + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + def load_config(): - """Load configuration""" + """Load config.json from the script directory. + + Only ollama, rules, and automation settings are needed — himalaya + manages its own IMAP config separately. + """ with open(CONFIG_FILE) as f: return json.load(f) -def connect_imap(config): - """Connect to IMAP server""" - imap_config = config['imap'] - mail = imaplib.IMAP4_SSL(imap_config['host'], imap_config['port']) - mail.login(imap_config['email'], imap_config['password']) - return mail -def get_unseen_emails(mail): - """Get list of unseen email IDs""" - mail.select('INBOX') - _, search_data = mail.search(None, 'UNSEEN') - email_ids = search_data[0].split() - return email_ids +# --------------------------------------------------------------------------- +# Himalaya CLI wrappers +# +# All IMAP operations go through himalaya, which handles connection, +# auth, and protocol details. We call it as a subprocess and parse +# its JSON output. +# --------------------------------------------------------------------------- -def fetch_email(mail, email_id): - """Fetch email content""" - _, msg_data = mail.fetch(email_id, '(RFC822)') - raw_email = msg_data[0][1] - msg = email.message_from_bytes(raw_email) - - # Extract subject - subject = msg['Subject'] or '(No Subject)' - - # Extract sender - sender = msg['From'] or '(Unknown)' - - # Extract recipient - recipient = msg['To'] or '(Unknown)' - - # Extract date - date = msg['Date'] or datetime.now().isoformat() - - # Extract body - body = "" - if msg.is_multipart(): - for part in msg.walk(): - if part.get_content_type() == "text/plain": - try: - body = part.get_payload(decode=True).decode('utf-8', errors='ignore') - break - except: - pass +def _himalaya(*args): + """Run a himalaya command and return its stdout. + + Raises subprocess.CalledProcessError on failure. + """ + result = subprocess.run( + ["himalaya", *args], + capture_output=True, text=True, check=True, + ) + return result.stdout + + +def _himalaya_json(*args): + """Run a himalaya command with JSON output and return parsed result.""" + return json.loads(_himalaya("-o", "json", *args)) + + +# --------------------------------------------------------------------------- +# Email fetching via himalaya +# --------------------------------------------------------------------------- + +def get_unseen_envelopes(): + """Fetch envelope metadata for all unseen emails in INBOX. + + Returns a list of envelope dicts from himalaya's JSON output. + Each has keys like: id, subject, from, to, date, flags. + """ + return _himalaya_json("envelope", "list", "not", "flag", "seen") + + +def get_recent_envelopes(days): + """Fetch envelope metadata for all emails from the last N days. + + Includes both read and unread emails — useful for testing and + bulk-classifying historical mail. + """ + since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") + return _himalaya_json("envelope", "list", "after", since) + + +def read_message(envelope_id): + """Read the full message body without marking it as seen. + + The --preview flag prevents himalaya from adding the \\Seen flag, + so the email stays unread for the actual action to handle. + """ + # Read plain text, no headers, without marking as seen + return _himalaya("message", "read", "--preview", "--no-headers", str(envelope_id)) + + +def build_email_data(envelope, body, config): + """Build the email_data dict expected by classifier and decision_store. + + Combines envelope metadata (from himalaya envelope list) with the + message body (from himalaya message read). + """ + max_body = config.get("rules", {}).get("max_body_length", 1000) + + # himalaya envelope JSON uses "from" as a nested object or string + sender = envelope.get("from", {}) + if isinstance(sender, dict): + # Format: {"name": "Display Name", "addr": "user@example.com"} + name = sender.get("name", "") + addr = sender.get("addr", "") + sender_str = f"{name} <{addr}>" if name else addr + elif isinstance(sender, list) and sender: + first = sender[0] + name = first.get("name", "") + addr = first.get("addr", "") + sender_str = f"{name} <{addr}>" if name else addr else: - try: - body = msg.get_payload(decode=True).decode('utf-8', errors='ignore') - except: - pass - + sender_str = str(sender) + + # Same for "to" + to = envelope.get("to", {}) + if isinstance(to, dict): + name = to.get("name", "") + addr = to.get("addr", "") + to_str = f"{name} <{addr}>" if name else addr + elif isinstance(to, list) and to: + first = to[0] + name = first.get("name", "") + addr = first.get("addr", "") + to_str = f"{name} <{addr}>" if name else addr + else: + to_str = str(to) + return { - 'id': email_id, - 'subject': subject, - 'sender': sender, - 'recipient': recipient, - 'date': date, - 'body': body[:300] # Limit body length + "id": str(envelope.get("id", "")), + "subject": envelope.get("subject", "(No Subject)"), + "sender": sender_str, + "recipient": to_str, + "date": envelope.get("date", ""), + "body": body[:max_body], } -def analyze_with_qwen3(email_data, config): - """Analyze email with local Qwen3 using official library""" - import ollama - import time - - prompt = f"""/no_think -Analyze this email and provide two pieces of information: +# --------------------------------------------------------------------------- +# IMAP actions via himalaya +# +# Each function executes one himalaya command. Returns True on success. +# On failure, prints the error and returns False. +# --------------------------------------------------------------------------- -1. Is this an advertisement/promotional email? -2. Summarize the email in one sentence +def execute_action(envelope_id, action): + """Dispatch an action string to the appropriate himalaya command. -Email details: -Subject: {email_data['subject']} -Sender: {email_data['sender']} -Body: {email_data['body'][:300]} + Action mapping: + "delete" -> himalaya message delete + "archive" -> himalaya message move Archive + "keep" -> no-op (leave unread in inbox) + "mark_read" -> himalaya flag add seen + "label:X" -> himalaya message move -Respond in this exact format: -IsAD: [YES or NO] -Summary: [one sentence summary] -Reason: [brief explanation] -""" - - start_time = time.time() - model = config['ollama'].get('model', 'qwen3:4b') - + Returns True on success, False on failure. + """ + eid = str(envelope_id) try: - response = ollama.generate(model=model, prompt=prompt, options={'temperature': 0.1}) - output = response['response'] - - # Parse output - is_ad = False - summary = "No summary" - reason = "Unknown" - - for line in output.strip().split('\n'): - if line.startswith('IsAD:'): - is_ad = 'YES' in line.upper() - elif line.startswith('Summary:'): - summary = line.replace('Summary:', '').strip()[:200] - elif line.startswith('Reason:'): - reason = line.replace('Reason:', '').strip() - - if is_ad: - result = f"AD: {reason}" + if action == "delete": + _himalaya("message", "delete", eid) + elif action == "archive": + _himalaya("message", "move", "Archive", eid) + elif action == "keep": + pass # leave unread in inbox — no IMAP changes + elif action == "mark_read": + _himalaya("flag", "add", eid, "seen") + elif action.startswith("label:"): + folder = action[6:] + _himalaya("message", "move", folder, eid) else: - result = f"KEEP: {reason}" - - except Exception as e: - result = f"KEEP: error - {str(e)[:100]}" - summary = "Analysis failed" - is_ad = False - - duration = time.time() - start_time - return result, summary, is_ad, duration - -def move_to_trash(mail, email_id): - """Move email to Trash folder""" - # Copy to Trash - result = mail.copy(email_id, 'Trash') - if result[0] == 'OK': - # Mark original as deleted - mail.store(email_id, '+FLAGS', '\\Deleted') + print(f" Unknown action: {action}") + return False return True - return False + except subprocess.CalledProcessError as e: + print(f" Himalaya error: {e.stderr.strip()}") + return False -def log_result(log_file, email_data, analysis, action, duration=None): - """Log processing result with Qwen3 duration""" - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - duration_str = f" ({duration:.1f}s)" if duration else "" - with open(log_file, 'a') as f: - f.write(f"[{timestamp}] {action}{duration_str}: {email_data['subject'][:60]}\n") - f.write(f" From: {email_data['sender']}\n") - f.write(f" Analysis: {analysis}\n\n") + +# --------------------------------------------------------------------------- +# Pending queue — emails awaiting manual review +# +# Stored as a JSON dict in data/pending_emails.json, keyed by msg_id. +# Each entry tracks the envelope ID (for himalaya), classifier suggestion, +# and status (pending/done). +# --------------------------------------------------------------------------- def load_pending(): - """Load pending emails from JSON file""" + """Load the pending queue from disk.""" if not PENDING_FILE.exists(): return {} - with open(PENDING_FILE, 'r', encoding='utf-8') as f: + with open(PENDING_FILE, "r", encoding="utf-8") as f: return json.load(f) + def save_pending(pending): - """Save pending emails to JSON file""" + """Write the pending queue to disk.""" DATA_DIR.mkdir(exist_ok=True) - with open(PENDING_FILE, 'w', encoding='utf-8') as f: + with open(PENDING_FILE, "w", encoding="utf-8") as f: json.dump(pending, f, indent=2, ensure_ascii=False) -def add_to_pending(email_data, summary, imap_uid, recipient): - """Add email to pending queue""" + +def add_to_pending(email_data, summary, reason, action_suggestion, confidence): + """Add an email to the pending queue for manual review. + + Stores the classifier's suggestion and confidence alongside the + email metadata so the user can see what the model thought. + """ pending = load_pending() - - # Generate unique ID - import hashlib - msg_id = f"msg_{hashlib.md5(f'{imap_uid}_{email_data['subject']}'.encode()).hexdigest()[:8]}" - - # Extract date from email - email_date = email_data.get('date', datetime.now().isoformat()) - + + # Generate a stable ID from envelope ID + subject + eid = str(email_data["id"]) + key = f"{eid}_{email_data['subject']}" + msg_id = f"msg_{hashlib.md5(key.encode()).hexdigest()[:8]}" + pending[msg_id] = { - "imap_uid": str(imap_uid), - "subject": email_data['subject'], - "sender": email_data['sender'], - "recipient": recipient, + "envelope_id": eid, + "subject": email_data["subject"], + "sender": email_data["sender"], + "recipient": email_data.get("recipient", ""), "summary": summary, - "email_date": email_date, + "reason": reason, + "suggested_action": action_suggestion, + "confidence": confidence, + "email_date": email_data.get("date", ""), "status": "pending", - "found_at": datetime.now().isoformat() + "found_at": datetime.now().isoformat(), } - save_pending(pending) return msg_id -def main(): - """Main processing function""" - print("📧 Email Processor Starting...") - - # Load config - config = load_config() - - # Setup logging + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- + +def log_result(log_file, email_data, action, detail, duration=None): + """Append a one-line log entry for a processed email.""" + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + dur = f" ({duration:.1f}s)" if duration else "" + with open(log_file, "a") as f: + f.write(f"[{timestamp}] {action}{dur}: {email_data['subject'][:60]}\n") + f.write(f" From: {email_data['sender']}\n") + f.write(f" Detail: {detail}\n\n") + + +# --------------------------------------------------------------------------- +# Subcommand: scan +# --------------------------------------------------------------------------- + +def cmd_scan(config, recent=None, dry_run=False): + """Fetch emails, classify each one, then auto-act or queue. + + Auto-action is based on a single confidence threshold. When the + decision history has fewer than 20 entries, a higher threshold (95%) + is used to be conservative during the learning phase. Once enough + history accumulates, the configured threshold takes over. + + Args: + config: full config dict. + recent: if set, fetch emails from last N days (not just unseen). + dry_run: if True, classify and print but skip all actions. + """ + mode = "DRY RUN" if dry_run else "Scan" + print(f"Email Processor - {mode}") + print("=" * 50) + LOGS_DIR.mkdir(exist_ok=True) log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log" - - try: - # Connect to IMAP - print("Connecting to IMAP...") - mail = connect_imap(config) - print("✅ Connected") - - # Get unseen emails - email_ids = get_unseen_emails(mail) - print(f"Found {len(email_ids)} unread emails") - - if not email_ids: - print("No new emails to process") - mail.logout() - return - - # Process each email - processed = 0 - moved_to_trash = 0 - added_to_pending = 0 - - for email_id in email_ids: - print(f"\nProcessing email {email_id.decode()}...") - - # Fetch email - email_data = fetch_email(mail, email_id) - print(f" Subject: {email_data['subject'][:50]}") - - # Analyze with Qwen3 (one call for both ad detection and summary) - analysis, summary, is_ad, duration = analyze_with_qwen3(email_data, config) - print(f" Analysis: {analysis[:100]}") - print(f" Summary: {summary[:60]}...") - print(f" Qwen3 time: {duration:.1f}s") - - # Check if analysis was successful (not an error) - if 'error -' in analysis.lower(): - # Analysis failed - keep email unread for retry - print(f" -> Analysis failed, keeping unread for retry") - log_result(log_file, email_data, analysis, "FAILED_RETRY", duration) - # Don't increment processed count - will retry next time - continue - - # Analysis successful - determine action - if is_ad: - print(" -> Moving to Trash") - if move_to_trash(mail, email_id): - log_result(log_file, email_data, analysis, "MOVED_TO_TRASH", duration) - moved_to_trash += 1 - else: - log_result(log_file, email_data, analysis, "MOVE_FAILED", duration) + + # Load automation threshold + automation = config.get("automation", {}) + configured_threshold = automation.get("confidence_threshold", 75) + + # Adaptive threshold: be conservative when history is thin + stats = decision_store.get_all_stats() + total_decisions = stats["total"] if stats else 0 + bootstrap_min = automation.get("bootstrap_min_decisions", 20) + if total_decisions < bootstrap_min: + confidence_threshold = 95 + print(f"Learning phase ({total_decisions}/{bootstrap_min} decisions) — threshold: 95%\n") + else: + confidence_threshold = configured_threshold + + # Fetch envelopes via himalaya + if recent: + envelopes = get_recent_envelopes(recent) + print(f"Found {len(envelopes)} emails from last {recent} days\n") + else: + envelopes = get_unseen_envelopes() + print(f"Found {len(envelopes)} unread emails\n") + + if not envelopes: + print("No new emails to process.") + return + + auto_acted = 0 + queued = 0 + + for envelope in envelopes: + eid = envelope.get("id", "?") + print(f"[{eid}] ", end="", flush=True) + + # Read message body without marking as seen + try: + body = read_message(eid) + except subprocess.CalledProcessError: + body = "" + + email_data = build_email_data(envelope, body, config) + print(f"{email_data['subject'][:55]}") + + # Run the LLM classifier (includes few-shot examples from history) + action, confidence, summary, reason, duration = classifier.classify_email( + email_data, config + ) + + print(f" -> {action} (confidence: {confidence}%, {duration:.1f}s)") + print(f" {reason[:80]}") + + # Auto-act if confidence meets threshold + can_auto = confidence >= confidence_threshold + + if dry_run: + # Dry run: log what would happen, touch nothing + log_result(log_file, email_data, f"DRYRUN:{action}@{confidence}%", reason, duration) + if can_auto: + print(f" -> Would AUTO-execute: {action}") + auto_acted += 1 else: - # Non-ad email - add to pending queue - print(" -> Adding to pending queue") - - # Add to pending - msg_internal_id = add_to_pending( - email_data, - summary, - email_id.decode(), - email_data.get('recipient', 'youlu@luyanxin.com') + print(f" -> Would queue for review") + queued += 1 + elif can_auto: + # Auto-execute the action via himalaya + success = execute_action(eid, action) + if success: + decision_store.record_decision( + {**email_data, "summary": summary}, action, source="auto" ) - - # Mark as read (so it won't be processed again) - mail.store(email_id, '+FLAGS', '\\Seen') - - log_result(log_file, email_data, analysis, f"ADDED_TO_PENDING ({msg_internal_id})", duration) - added_to_pending += 1 - - processed += 1 - - # Expunge deleted emails - mail.expunge() - mail.logout() - - # Summary - print(f"\n{'='*50}") - print(f"Total emails checked: {len(email_ids)}") - print(f"Successfully processed: {processed} emails") - print(f" - Moved to trash (ads): {moved_to_trash}") - print(f" - Added to pending queue: {added_to_pending}") - print(f"Failed (will retry next time): {len(email_ids) - processed}") - print(f"\n📁 Pending queue: {PENDING_FILE}") - print(f"📝 Log: {log_file}") - print(f"\n💡 Run 'python process_queue.py' to view and process pending emails") - - except Exception as e: - print(f"❌ Error: {e}") + log_result(log_file, email_data, f"AUTO:{action}", reason, duration) + print(f" ** AUTO-executed: {action}") + auto_acted += 1 + else: + # Himalaya action failed — fall back to queuing + log_result(log_file, email_data, "AUTO_FAILED", reason, duration) + print(f" !! Auto-action failed, queuing instead") + add_to_pending(email_data, summary, reason, action, confidence) + queued += 1 + else: + # Not enough confidence or history — queue for manual review + add_to_pending(email_data, summary, reason, action, confidence) + # Mark as read to prevent re-processing on next scan + if not dry_run: + try: + _himalaya("flag", "add", str(eid), "seen") + except subprocess.CalledProcessError: + pass + log_result(log_file, email_data, f"QUEUED:{action}@{confidence}%", reason, duration) + print(f" -> Queued (confidence {confidence}% < {confidence_threshold}%)") + queued += 1 + + # Print run summary + print(f"\n{'=' * 50}") + print(f"Processed: {len(envelopes)} emails") + print(f" Auto-acted: {auto_acted}") + print(f" Queued for review: {queued}") + print(f"\nRun 'python main.py review list' to see pending emails") + + +# --------------------------------------------------------------------------- +# Subcommand: review +# +# Non-interactive: each invocation takes arguments, acts, and exits. +# No input() calls. Compatible with cron and scripting. +# --------------------------------------------------------------------------- + +def _get_pending_items(): + """Return only pending (not done) items, sorted by found_at.""" + pending = load_pending() + items = {k: v for k, v in pending.items() if v.get("status") == "pending"} + sorted_items = sorted(items.items(), key=lambda x: x[1].get("found_at", "")) + return sorted_items + + +def cmd_review_list(): + """Print the pending queue and exit. + + Shows each email with its number, ID, subject, sender, summary, + and the classifier's suggested action with confidence. + """ + sorted_items = _get_pending_items() + + if not sorted_items: + print("No pending emails to review.") + return + + print(f"Pending emails: {len(sorted_items)}") + print("=" * 60) + + for i, (msg_id, data) in enumerate(sorted_items, 1): + suggested = data.get("suggested_action", "?") + conf = data.get("confidence", "?") + print(f"\n {i}. [{msg_id}]") + print(f" Subject: {data.get('subject', 'N/A')[:55]}") + print(f" From: {data.get('sender', 'N/A')[:55]}") + print(f" To: {data.get('recipient', 'N/A')[:40]}") + print(f" Summary: {data.get('summary', 'N/A')[:70]}") + print(f" Suggested: {suggested} ({conf}% confidence)") + + print(f"\n{'=' * 60}") + print("Usage:") + print(" python main.py review ") + print(" python main.py review all ") + print(" python main.py review accept") + print("Actions: delete / archive / keep / mark_read / label:") + + +def cmd_review_act(selector, action): + """Execute an action on one or more pending emails. + + Args: + selector: a 1-based number, a msg_id string, or "all". + action: one of delete/archive/keep/mark_read/label:. + """ + # Validate action + valid_actions = {"delete", "archive", "keep", "mark_read"} + if action not in valid_actions and not action.startswith("label:"): + print(f"Invalid action: {action}") + print(f"Valid: {', '.join(sorted(valid_actions))}, label:") sys.exit(1) + sorted_items = _get_pending_items() + if not sorted_items: + print("No pending emails to review.") + return + + # Resolve targets + if selector == "all": + targets = sorted_items + else: + target = _resolve_target(selector, sorted_items) + if target is None: + sys.exit(1) + targets = [target] + + LOGS_DIR.mkdir(exist_ok=True) + log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log" + + # Execute action on each target + for msg_id, data in targets: + eid = data.get("envelope_id") or data.get("imap_uid") + if not eid: + print(f" {msg_id}: No envelope ID, skipping") + continue + + success = execute_action(eid, action) + if success: + # Record decision for future learning + decision_store.record_decision(data, action, source="user") + + # Mark as done in pending queue + pending = load_pending() + pending[msg_id]["status"] = "done" + pending[msg_id]["action"] = action + pending[msg_id]["processed_at"] = datetime.now().isoformat() + save_pending(pending) + + log_result(log_file, data, f"REVIEW:{action}", data.get("reason", "")) + print(f" {msg_id}: {action} -> OK ({data['subject'][:40]})") + else: + log_result(log_file, data, f"REVIEW_FAILED:{action}", data.get("reason", "")) + print(f" {msg_id}: {action} -> FAILED") + + +def cmd_review_accept(): + """Accept all classifier suggestions for pending emails. + + For each pending email, executes the suggested_action that the + classifier assigned during scan. Records each as a "user" decision + since the user explicitly chose to accept. + """ + sorted_items = _get_pending_items() + if not sorted_items: + print("No pending emails to review.") + return + + LOGS_DIR.mkdir(exist_ok=True) + log_file = LOGS_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.log" + + for msg_id, data in sorted_items: + action = data.get("suggested_action") + if not action: + print(f" {msg_id}: No suggestion, skipping") + continue + + eid = data.get("envelope_id") or data.get("imap_uid") + if not eid: + print(f" {msg_id}: No envelope ID, skipping") + continue + + success = execute_action(eid, action) + if success: + decision_store.record_decision(data, action, source="user") + + pending = load_pending() + pending[msg_id]["status"] = "done" + pending[msg_id]["action"] = action + pending[msg_id]["processed_at"] = datetime.now().isoformat() + save_pending(pending) + + log_result(log_file, data, f"ACCEPT:{action}", data.get("reason", "")) + print(f" {msg_id}: {action} -> OK ({data['subject'][:40]})") + else: + log_result(log_file, data, f"ACCEPT_FAILED:{action}", data.get("reason", "")) + print(f" {msg_id}: {action} -> FAILED") + + +def _resolve_target(selector, sorted_items): + """Resolve a selector (number or msg_id) to a (msg_id, data) tuple. + + Returns None and prints an error if the selector is invalid. + """ + # Try as 1-based index + try: + idx = int(selector) - 1 + if 0 <= idx < len(sorted_items): + return sorted_items[idx] + else: + print(f"Invalid number. Range: 1-{len(sorted_items)}") + return None + except ValueError: + pass + + # Try as msg_id + for msg_id, data in sorted_items: + if msg_id == selector: + return (msg_id, data) + + print(f"Not found: {selector}") + return None + + +# --------------------------------------------------------------------------- +# Subcommand: stats +# --------------------------------------------------------------------------- + +def cmd_stats(): + """Print a summary of the decision history. + + Shows total decisions, user vs. auto breakdown, action distribution, + top sender domains, and custom labels. + """ + stats = decision_store.get_all_stats() + + if not stats: + print("No decision history yet.") + print("Run 'python main.py scan' and 'python main.py review' to build history.") + return + + print("Decision History Stats") + print("=" * 50) + print(f"Total decisions: {stats['total']}") + + # User vs. auto breakdown + print(f"\nBy source:") + for source, count in sorted(stats["by_source"].items()): + pct = count / stats["total"] * 100 + print(f" {source}: {count} ({pct:.0f}%)") + + auto = stats["by_source"].get("auto", 0) + if stats["total"] > 0: + print(f" Automation rate: {auto / stats['total'] * 100:.0f}%") + + # Action distribution + print(f"\nBy action:") + for action, count in sorted(stats["by_action"].items(), key=lambda x: -x[1]): + print(f" {action}: {count}") + + # Top sender domains with per-domain action counts + print(f"\nTop sender domains:") + for domain, count in stats["top_domains"]: + domain_stats = decision_store.get_sender_stats(domain) + detail = ", ".join( + f"{a}:{c}" for a, c in sorted(domain_stats.items(), key=lambda x: -x[1]) + ) + print(f" {domain}: {count} ({detail})") + + # Custom labels + labels = decision_store.get_known_labels() + if labels: + print(f"\nKnown labels: {', '.join(sorted(labels))}") + + +# --------------------------------------------------------------------------- +# Subcommand: migrate +# --------------------------------------------------------------------------- + +def cmd_migrate(): + """Import old pending_emails.json 'done' entries into decision history. + + Run once after upgrading from the old system. Converts old action + names (archived/kept/deleted) to new ones (archive/keep/delete). + """ + decision_store.migrate_pending() + + +# --------------------------------------------------------------------------- +# Entry point & argument parsing +# +# Simple hand-rolled parser — no external dependencies. Supports: +# main.py [subcommand] [--recent N] [--dry-run] [review-args...] +# --------------------------------------------------------------------------- + if __name__ == "__main__": - main() + args = sys.argv[1:] + subcommand = "scan" + recent = None + dry_run = False + extra_args = [] # for review subcommand arguments + + # Parse args + i = 0 + while i < len(args): + if args[i] == "--recent" and i + 1 < len(args): + recent = int(args[i + 1]) + i += 2 + elif args[i] == "--dry-run": + dry_run = True + i += 1 + elif not args[i].startswith("--") and subcommand == "scan" and not extra_args: + # First positional arg is the subcommand + subcommand = args[i] + i += 1 + elif not args[i].startswith("--"): + # Remaining positional args go to the subcommand + extra_args.append(args[i]) + i += 1 + else: + print(f"Unknown flag: {args[i]}") + sys.exit(1) + + config = load_config() + + if subcommand == "scan": + cmd_scan(config, recent=recent, dry_run=dry_run) + + elif subcommand == "review": + if not extra_args or extra_args[0] == "list": + cmd_review_list() + elif extra_args[0] == "accept": + cmd_review_accept() + elif len(extra_args) == 2: + cmd_review_act(extra_args[0], extra_args[1]) + else: + print("Usage:") + print(" python main.py review list") + print(" python main.py review ") + print(" python main.py review all ") + print(" python main.py review accept") + sys.exit(1) + + elif subcommand == "stats": + cmd_stats() + + elif subcommand == "migrate": + cmd_migrate() + + else: + print(f"Unknown subcommand: {subcommand}") + print("Usage: python main.py [scan|review|stats|migrate] [--recent N] [--dry-run]") + sys.exit(1) diff --git a/scripts/email_processor/move_ad_to_trash.py b/scripts/email_processor/move_ad_to_trash.py deleted file mode 100644 index 117accc..0000000 --- a/scripts/email_processor/move_ad_to_trash.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python3 -"""Move specific email to trash""" -import imaplib -import email - -# Connect -mail = imaplib.IMAP4_SSL('imap.migadu.com', 993) -mail.login('youlu@luyanxin.com', 'kDkNau2r7m.hV!uk*D4Yr8mC7Dyjx9T') -mail.select('INBOX') - -# Search for the email with "10% off" in subject -_, search_data = mail.search(None, 'SUBJECT', '"10% off"') -email_ids = search_data[0].split() - -print(f"Found {len(email_ids)} emails with '10% off' in subject") - -for email_id in email_ids: - # Copy to Trash - result = mail.copy(email_id, 'Trash') - if result[0] == 'OK': - mail.store(email_id, '+FLAGS', '\\Deleted') - print(f"✅ Moved email {email_id.decode()} to Trash") - else: - print(f"❌ Failed to move email {email_id.decode()}") - -mail.expunge() -mail.logout() -print("Done!") diff --git a/scripts/email_processor/process_queue.py b/scripts/email_processor/process_queue.py deleted file mode 100644 index 1334de0..0000000 --- a/scripts/email_processor/process_queue.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3 -""" -Email Queue Processor - Handle user commands for pending emails -Reads pending_emails.json and executes user commands (archive/keep/reply) -""" - -import json -import imaplib -import os -import sys -from datetime import datetime -from pathlib import Path - -SCRIPT_DIR = Path(__file__).parent -DATA_FILE = SCRIPT_DIR / "data" / "pending_emails.json" - -def load_pending(): - """Load pending emails from JSON file""" - if not DATA_FILE.exists(): - return {} - with open(DATA_FILE, 'r', encoding='utf-8') as f: - return json.load(f) - -def save_pending(pending): - """Save pending emails to JSON file""" - DATA_FILE.parent.mkdir(exist_ok=True) - with open(DATA_FILE, 'w', encoding='utf-8') as f: - json.dump(pending, f, indent=2, ensure_ascii=False) - -def connect_imap(config): - """Connect to IMAP server""" - mail = imaplib.IMAP4_SSL(config['imap']['host'], config['imap']['port']) - mail.login(config['imap']['email'], config['imap']['password']) - return mail - -def show_pending_list(): - """Display all pending emails""" - pending = load_pending() - - if not pending: - print("📭 没有待处理的邮件") - return - - print(f"\n📧 待处理邮件列表 ({len(pending)} 封)") - print("=" * 60) - - # Sort by email_date - sorted_items = sorted( - pending.items(), - key=lambda x: x[1].get('email_date', '') - ) - - for msg_id, data in sorted_items: - if data.get('status') == 'pending': - print(f"\n🆔 {msg_id}") - print(f" 主题: {data.get('subject', 'N/A')[:50]}") - print(f" 发件人: {data.get('sender', 'N/A')}") - print(f" 收件人: {data.get('recipient', 'N/A')}") - print(f" 时间: {data.get('email_date', 'N/A')}") - print(f" 摘要: {data.get('summary', 'N/A')[:80]}") - - print("\n" + "=" * 60) - print("\n可用指令:") - print(" • 归档 [ID] - 移动到 Archive 文件夹") - print(" • 保留 [ID] - 标记已读,留在收件箱") - print(" • 删除 [ID] - 移动到 Trash") - print(" • 全部处理 - 列出所有并批量操作") - -def archive_email(config, msg_id): - """Archive a specific email by ID""" - pending = load_pending() - - if msg_id not in pending: - print(f"❌ 未找到邮件 ID: {msg_id}") - return False - - email_data = pending[msg_id] - uid = email_data.get('imap_uid') - - if not uid: - print(f"❌ 邮件 {msg_id} 没有 UID") - return False - - try: - mail = connect_imap(config) - mail.select('INBOX') - - # Copy to Archive - result = mail.copy(uid, 'Archive') - if result[0] == 'OK': - # Mark original as deleted - mail.store(uid, '+FLAGS', '\\Deleted') - mail.expunge() - - # Update status - pending[msg_id]['status'] = 'done' - pending[msg_id]['action'] = 'archived' - pending[msg_id]['processed_at'] = datetime.now().isoformat() - save_pending(pending) - - print(f"✅ 已归档: {email_data.get('subject', 'N/A')[:40]}") - return True - else: - print(f"❌ 归档失败: {result}") - return False - - except Exception as e: - print(f"❌ 错误: {e}") - return False - finally: - try: - mail.logout() - except: - pass - -def keep_email(config, msg_id): - """Keep email in inbox, mark as read""" - pending = load_pending() - - if msg_id not in pending: - print(f"❌ 未找到邮件 ID: {msg_id}") - return False - - email_data = pending[msg_id] - uid = email_data.get('imap_uid') - - if not uid: - print(f"❌ 邮件 {msg_id} 没有 UID") - return False - - try: - mail = connect_imap(config) - mail.select('INBOX') - - # Mark as read (Seen) - mail.store(uid, '+FLAGS', '\\Seen') - - # Update status - pending[msg_id]['status'] = 'done' - pending[msg_id]['action'] = 'kept' - pending[msg_id]['processed_at'] = datetime.now().isoformat() - save_pending(pending) - - print(f"✅ 已保留: {email_data.get('subject', 'N/A')[:40]}") - return True - - except Exception as e: - print(f"❌ 错误: {e}") - return False - finally: - try: - mail.logout() - except: - pass - -def delete_email(config, msg_id): - """Move email to Trash""" - pending = load_pending() - - if msg_id not in pending: - print(f"❌ 未找到邮件 ID: {msg_id}") - return False - - email_data = pending[msg_id] - uid = email_data.get('imap_uid') - - if not uid: - print(f"❌ 邮件 {msg_id} 没有 UID") - return False - - try: - mail = connect_imap(config) - mail.select('INBOX') - - # Copy to Trash - result = mail.copy(uid, 'Trash') - if result[0] == 'OK': - mail.store(uid, '+FLAGS', '\\Deleted') - mail.expunge() - - # Update status - pending[msg_id]['status'] = 'done' - pending[msg_id]['action'] = 'deleted' - pending[msg_id]['processed_at'] = datetime.now().isoformat() - save_pending(pending) - - print(f"✅ 已删除: {email_data.get('subject', 'N/A')[:40]}") - return True - else: - print(f"❌ 删除失败: {result}") - return False - - except Exception as e: - print(f"❌ 错误: {e}") - return False - finally: - try: - mail.logout() - except: - pass - -def main(): - """Main function - show pending list""" - import json - - # Load config - config_file = Path(__file__).parent / "config.json" - with open(config_file) as f: - config = json.load(f) - - show_pending_list() - -if __name__ == "__main__": - main() diff --git a/scripts/email_processor/test_single.py b/scripts/email_processor/test_single.py deleted file mode 100644 index f329f33..0000000 --- a/scripts/email_processor/test_single.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -"""Test single email analysis""" -import requests -import json - -email_data = { - "subject": "Fwd: Get 10% off your next order 🎉", - "sender": "crac1017@hotmail.com", - "body": "Get 10% off your next order! Limited time offer. Shop now and save!" -} - -prompt = f"""Analyze this email and determine if it's an advertisement/promotional email. - -Subject: {email_data['subject']} -Sender: {email_data['sender']} -Body preview: {email_data['body'][:200]} - -Is this an advertisement or promotional email? Answer with ONLY: -- "AD: [brief reason]" if it's an ad/promo -- "KEEP: [brief reason]" if it's important/legitimate - -Be conservative - only mark as AD if clearly promotional.""" - -print("Sending to Qwen3...") -try: - response = requests.post( - "http://localhost:11434/api/generate", - json={ - "model": "qwen3:4b", - "prompt": prompt, - "stream": False - }, - timeout=120 - ) - result = response.json() - print(f"Result: {result.get('response', 'error')}") -except Exception as e: - print(f"Error: {e}") diff --git a/scripts/email_processor/venv/bin/python b/scripts/email_processor/venv/bin/python index b8a0adb..b4d7bb3 120000 --- a/scripts/email_processor/venv/bin/python +++ b/scripts/email_processor/venv/bin/python @@ -1 +1 @@ -python3 \ No newline at end of file +python3.13 \ No newline at end of file diff --git a/scripts/email_processor/venv/bin/python3 b/scripts/email_processor/venv/bin/python3 index ae65fda..b4d7bb3 120000 --- a/scripts/email_processor/venv/bin/python3 +++ b/scripts/email_processor/venv/bin/python3 @@ -1 +1 @@ -/usr/bin/python3 \ No newline at end of file +python3.13 \ No newline at end of file diff --git a/scripts/email_processor/venv/bin/python3.12 b/scripts/email_processor/venv/bin/python3.12 deleted file mode 120000 index b8a0adb..0000000 --- a/scripts/email_processor/venv/bin/python3.12 +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/scripts/email_processor/venv/lib64 b/scripts/email_processor/venv/lib64 deleted file mode 120000 index 7951405..0000000 --- a/scripts/email_processor/venv/lib64 +++ /dev/null @@ -1 +0,0 @@ -lib \ No newline at end of file diff --git a/scripts/email_processor/venv/pyvenv.cfg b/scripts/email_processor/venv/pyvenv.cfg index 225770c..edc7cdd 100644 --- a/scripts/email_processor/venv/pyvenv.cfg +++ b/scripts/email_processor/venv/pyvenv.cfg @@ -1,5 +1,5 @@ -home = /usr/bin +home = /opt/homebrew/opt/python@3.13/bin include-system-site-packages = false -version = 3.12.3 -executable = /usr/bin/python3.12 -command = /usr/bin/python3 -m venv /home/lyx/.openclaw/workspace/scripts/email_processor/venv +version = 3.13.0 +executable = /opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/bin/python3.13 +command = /opt/homebrew/opt/python@3.13/bin/python3.13 -m venv /Users/ylu/Documents/me/youlu-openclaw-workspace/scripts/email_processor/venv