153 lines
4.8 KiB
Python
153 lines
4.8 KiB
Python
"""Tests for notesearch core functionality."""
|
|
|
|
import hashlib
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from llama_index.core.base.embeddings.base import BaseEmbedding
|
|
from notesearch.core import FALLBACK_EMBEDDING_MODEL, METADATA_FILE, build_index, search
|
|
|
|
|
|
class FakeEmbedding(BaseEmbedding):
|
|
"""Deterministic embedding model for testing."""
|
|
|
|
model_name: str = "test-model"
|
|
|
|
def _get_text_embedding(self, text: str) -> list[float]:
|
|
h = hashlib.md5(text.encode()).digest()
|
|
return [b / 255.0 for b in h] * 48 # 768-dim
|
|
|
|
def _get_query_embedding(self, query: str) -> list[float]:
|
|
return self._get_text_embedding(query)
|
|
|
|
async def _aget_text_embedding(self, text: str) -> list[float]:
|
|
return self._get_text_embedding(text)
|
|
|
|
async def _aget_query_embedding(self, query: str) -> list[float]:
|
|
return self._get_text_embedding(query)
|
|
|
|
|
|
def _mock_embed_model(*args: Any, **kwargs: Any) -> FakeEmbedding:
|
|
return FakeEmbedding()
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_vault(tmp_path: Path) -> Path:
|
|
"""Create a temporary vault with sample markdown files."""
|
|
vault = tmp_path / "vault"
|
|
vault.mkdir()
|
|
|
|
(vault / "health").mkdir()
|
|
(vault / "health" / "allergy.md").write_text(
|
|
"# Allergy Treatment\n\n"
|
|
"Started allergy shots in March 2026.\n"
|
|
"Weekly schedule: Tuesday and Thursday.\n"
|
|
"Clinic is at 123 Main St.\n"
|
|
)
|
|
|
|
(vault / "work").mkdir()
|
|
(vault / "work" / "project-alpha.md").write_text(
|
|
"# Project Alpha\n\n"
|
|
"## Goals\n"
|
|
"Launch the new API by Q2.\n"
|
|
"Migrate all users to v2 endpoints.\n\n"
|
|
"## Status\n"
|
|
"Backend is 80% done. Frontend blocked on design review.\n"
|
|
)
|
|
|
|
(vault / "recipes.md").write_text(
|
|
"# Favorite Recipes\n\n"
|
|
"## Pasta Carbonara\n"
|
|
"Eggs, pecorino, guanciale, black pepper.\n"
|
|
"Cook pasta al dente, mix off heat.\n"
|
|
)
|
|
|
|
return vault
|
|
|
|
|
|
@pytest.fixture
|
|
def empty_vault(tmp_path: Path) -> Path:
|
|
"""Create an empty vault directory."""
|
|
vault = tmp_path / "empty_vault"
|
|
vault.mkdir()
|
|
return vault
|
|
|
|
|
|
class TestBuildIndex:
|
|
def test_missing_vault(self, tmp_path: Path) -> None:
|
|
with pytest.raises(FileNotFoundError, match="Vault not found"):
|
|
build_index(vault_path=str(tmp_path / "nonexistent"))
|
|
|
|
def test_empty_vault(self, empty_vault: Path) -> None:
|
|
with pytest.raises(ValueError, match="No markdown files found"):
|
|
build_index(vault_path=str(empty_vault))
|
|
|
|
@patch("notesearch.core._get_embed_model", _mock_embed_model)
|
|
def test_builds_index(self, sample_vault: Path, tmp_path: Path) -> None:
|
|
index_dir = tmp_path / "index"
|
|
idx_path = build_index(
|
|
vault_path=str(sample_vault),
|
|
index_dir=str(index_dir),
|
|
)
|
|
|
|
assert idx_path == index_dir
|
|
assert idx_path.exists()
|
|
assert (idx_path / METADATA_FILE).exists()
|
|
|
|
meta = json.loads((idx_path / METADATA_FILE).read_text())
|
|
assert meta["vault_path"] == str(sample_vault)
|
|
assert "model" in meta
|
|
|
|
@patch("notesearch.core._get_embed_model", _mock_embed_model)
|
|
def test_index_stores_model_metadata(self, sample_vault: Path, tmp_path: Path) -> None:
|
|
index_dir = tmp_path / "index"
|
|
build_index(
|
|
vault_path=str(sample_vault),
|
|
index_dir=str(index_dir),
|
|
model="custom-model",
|
|
)
|
|
|
|
meta = json.loads((index_dir / METADATA_FILE).read_text())
|
|
assert meta["model"] == "custom-model"
|
|
|
|
|
|
class TestSearch:
|
|
def test_missing_index(self, tmp_path: Path) -> None:
|
|
with pytest.raises(FileNotFoundError, match="Index not found"):
|
|
search("test query", vault_path=str(tmp_path))
|
|
|
|
@patch("notesearch.core._get_embed_model", _mock_embed_model)
|
|
def test_search_returns_results(self, sample_vault: Path, tmp_path: Path) -> None:
|
|
index_dir = tmp_path / "index"
|
|
build_index(vault_path=str(sample_vault), index_dir=str(index_dir))
|
|
|
|
results = search(
|
|
"allergy shots",
|
|
vault_path=str(sample_vault),
|
|
index_dir=str(index_dir),
|
|
top_k=3,
|
|
)
|
|
|
|
assert len(results) > 0
|
|
assert all("score" in r for r in results)
|
|
assert all("file" in r for r in results)
|
|
assert all("text" in r for r in results)
|
|
|
|
@patch("notesearch.core._get_embed_model", _mock_embed_model)
|
|
def test_search_respects_top_k(self, sample_vault: Path, tmp_path: Path) -> None:
|
|
index_dir = tmp_path / "index"
|
|
build_index(vault_path=str(sample_vault), index_dir=str(index_dir))
|
|
|
|
results = search(
|
|
"anything",
|
|
vault_path=str(sample_vault),
|
|
index_dir=str(index_dir),
|
|
top_k=1,
|
|
)
|
|
|
|
assert len(results) == 1
|