Files
youlu-openclaw-workspace/skills/notesearch/tests/test_core.py
2026-04-03 15:44:25 -07:00

153 lines
4.8 KiB
Python

"""Tests for notesearch core functionality."""
import hashlib
import json
from pathlib import Path
from typing import Any
from unittest.mock import patch
import pytest
from llama_index.core.base.embeddings.base import BaseEmbedding
from notesearch.core import FALLBACK_EMBEDDING_MODEL, METADATA_FILE, build_index, search
class FakeEmbedding(BaseEmbedding):
"""Deterministic embedding model for testing."""
model_name: str = "test-model"
def _get_text_embedding(self, text: str) -> list[float]:
h = hashlib.md5(text.encode()).digest()
return [b / 255.0 for b in h] * 48 # 768-dim
def _get_query_embedding(self, query: str) -> list[float]:
return self._get_text_embedding(query)
async def _aget_text_embedding(self, text: str) -> list[float]:
return self._get_text_embedding(text)
async def _aget_query_embedding(self, query: str) -> list[float]:
return self._get_text_embedding(query)
def _mock_embed_model(*args: Any, **kwargs: Any) -> FakeEmbedding:
return FakeEmbedding()
@pytest.fixture
def sample_vault(tmp_path: Path) -> Path:
"""Create a temporary vault with sample markdown files."""
vault = tmp_path / "vault"
vault.mkdir()
(vault / "health").mkdir()
(vault / "health" / "allergy.md").write_text(
"# Allergy Treatment\n\n"
"Started allergy shots in March 2026.\n"
"Weekly schedule: Tuesday and Thursday.\n"
"Clinic is at 123 Main St.\n"
)
(vault / "work").mkdir()
(vault / "work" / "project-alpha.md").write_text(
"# Project Alpha\n\n"
"## Goals\n"
"Launch the new API by Q2.\n"
"Migrate all users to v2 endpoints.\n\n"
"## Status\n"
"Backend is 80% done. Frontend blocked on design review.\n"
)
(vault / "recipes.md").write_text(
"# Favorite Recipes\n\n"
"## Pasta Carbonara\n"
"Eggs, pecorino, guanciale, black pepper.\n"
"Cook pasta al dente, mix off heat.\n"
)
return vault
@pytest.fixture
def empty_vault(tmp_path: Path) -> Path:
"""Create an empty vault directory."""
vault = tmp_path / "empty_vault"
vault.mkdir()
return vault
class TestBuildIndex:
def test_missing_vault(self, tmp_path: Path) -> None:
with pytest.raises(FileNotFoundError, match="Vault not found"):
build_index(vault_path=str(tmp_path / "nonexistent"))
def test_empty_vault(self, empty_vault: Path) -> None:
with pytest.raises(ValueError, match="No markdown files found"):
build_index(vault_path=str(empty_vault))
@patch("notesearch.core._get_embed_model", _mock_embed_model)
def test_builds_index(self, sample_vault: Path, tmp_path: Path) -> None:
index_dir = tmp_path / "index"
idx_path = build_index(
vault_path=str(sample_vault),
index_dir=str(index_dir),
)
assert idx_path == index_dir
assert idx_path.exists()
assert (idx_path / METADATA_FILE).exists()
meta = json.loads((idx_path / METADATA_FILE).read_text())
assert meta["vault_path"] == str(sample_vault)
assert "model" in meta
@patch("notesearch.core._get_embed_model", _mock_embed_model)
def test_index_stores_model_metadata(self, sample_vault: Path, tmp_path: Path) -> None:
index_dir = tmp_path / "index"
build_index(
vault_path=str(sample_vault),
index_dir=str(index_dir),
model="custom-model",
)
meta = json.loads((index_dir / METADATA_FILE).read_text())
assert meta["model"] == "custom-model"
class TestSearch:
def test_missing_index(self, tmp_path: Path) -> None:
with pytest.raises(FileNotFoundError, match="Index not found"):
search("test query", vault_path=str(tmp_path))
@patch("notesearch.core._get_embed_model", _mock_embed_model)
def test_search_returns_results(self, sample_vault: Path, tmp_path: Path) -> None:
index_dir = tmp_path / "index"
build_index(vault_path=str(sample_vault), index_dir=str(index_dir))
results = search(
"allergy shots",
vault_path=str(sample_vault),
index_dir=str(index_dir),
top_k=3,
)
assert len(results) > 0
assert all("score" in r for r in results)
assert all("file" in r for r in results)
assert all("text" in r for r in results)
@patch("notesearch.core._get_embed_model", _mock_embed_model)
def test_search_respects_top_k(self, sample_vault: Path, tmp_path: Path) -> None:
index_dir = tmp_path / "index"
build_index(vault_path=str(sample_vault), index_dir=str(index_dir))
results = search(
"anything",
vault_path=str(sample_vault),
index_dir=str(index_dir),
top_k=1,
)
assert len(results) == 1