"""Tests for notesearch core functionality.""" import hashlib import json from pathlib import Path from typing import Any from unittest.mock import patch import pytest from llama_index.core.base.embeddings.base import BaseEmbedding from notesearch.core import FALLBACK_EMBEDDING_MODEL, METADATA_FILE, build_index, search class FakeEmbedding(BaseEmbedding): """Deterministic embedding model for testing.""" model_name: str = "test-model" def _get_text_embedding(self, text: str) -> list[float]: h = hashlib.md5(text.encode()).digest() return [b / 255.0 for b in h] * 48 # 768-dim def _get_query_embedding(self, query: str) -> list[float]: return self._get_text_embedding(query) async def _aget_text_embedding(self, text: str) -> list[float]: return self._get_text_embedding(text) async def _aget_query_embedding(self, query: str) -> list[float]: return self._get_text_embedding(query) def _mock_embed_model(*args: Any, **kwargs: Any) -> FakeEmbedding: return FakeEmbedding() @pytest.fixture def sample_vault(tmp_path: Path) -> Path: """Create a temporary vault with sample markdown files.""" vault = tmp_path / "vault" vault.mkdir() (vault / "health").mkdir() (vault / "health" / "allergy.md").write_text( "# Allergy Treatment\n\n" "Started allergy shots in March 2026.\n" "Weekly schedule: Tuesday and Thursday.\n" "Clinic is at 123 Main St.\n" ) (vault / "work").mkdir() (vault / "work" / "project-alpha.md").write_text( "# Project Alpha\n\n" "## Goals\n" "Launch the new API by Q2.\n" "Migrate all users to v2 endpoints.\n\n" "## Status\n" "Backend is 80% done. Frontend blocked on design review.\n" ) (vault / "recipes.md").write_text( "# Favorite Recipes\n\n" "## Pasta Carbonara\n" "Eggs, pecorino, guanciale, black pepper.\n" "Cook pasta al dente, mix off heat.\n" ) return vault @pytest.fixture def empty_vault(tmp_path: Path) -> Path: """Create an empty vault directory.""" vault = tmp_path / "empty_vault" vault.mkdir() return vault class TestBuildIndex: def test_missing_vault(self, tmp_path: Path) -> None: with pytest.raises(FileNotFoundError, match="Vault not found"): build_index(vault_path=str(tmp_path / "nonexistent")) def test_empty_vault(self, empty_vault: Path) -> None: with pytest.raises(ValueError, match="No markdown files found"): build_index(vault_path=str(empty_vault)) @patch("notesearch.core._get_embed_model", _mock_embed_model) def test_builds_index(self, sample_vault: Path, tmp_path: Path) -> None: index_dir = tmp_path / "index" idx_path = build_index( vault_path=str(sample_vault), index_dir=str(index_dir), ) assert idx_path == index_dir assert idx_path.exists() assert (idx_path / METADATA_FILE).exists() meta = json.loads((idx_path / METADATA_FILE).read_text()) assert meta["vault_path"] == str(sample_vault) assert "model" in meta @patch("notesearch.core._get_embed_model", _mock_embed_model) def test_index_stores_model_metadata(self, sample_vault: Path, tmp_path: Path) -> None: index_dir = tmp_path / "index" build_index( vault_path=str(sample_vault), index_dir=str(index_dir), model="custom-model", ) meta = json.loads((index_dir / METADATA_FILE).read_text()) assert meta["model"] == "custom-model" class TestSearch: def test_missing_index(self, tmp_path: Path) -> None: with pytest.raises(FileNotFoundError, match="Index not found"): search("test query", vault_path=str(tmp_path)) @patch("notesearch.core._get_embed_model", _mock_embed_model) def test_search_returns_results(self, sample_vault: Path, tmp_path: Path) -> None: index_dir = tmp_path / "index" build_index(vault_path=str(sample_vault), index_dir=str(index_dir)) results = search( "allergy shots", vault_path=str(sample_vault), index_dir=str(index_dir), top_k=3, ) assert len(results) > 0 assert all("score" in r for r in results) assert all("file" in r for r in results) assert all("text" in r for r in results) @patch("notesearch.core._get_embed_model", _mock_embed_model) def test_search_respects_top_k(self, sample_vault: Path, tmp_path: Path) -> None: index_dir = tmp_path / "index" build_index(vault_path=str(sample_vault), index_dir=str(index_dir)) results = search( "anything", vault_path=str(sample_vault), index_dir=str(index_dir), top_k=1, ) assert len(results) == 1