Files
clice/tests/integration/test_persistent_cache.py
ykiko 3838bedcbf feat: persistent PCH/PCM cache across sessions (#391)
## Summary

PCH and PCM artifacts are now cached to disk at
`.clice/cache/{pch/,pcm/}` with content-addressed filenames, so they
survive server restarts. Dependency metadata is persisted in
`cache.json` (using eventide serde) with a shared path table for
deduplication.

### Key changes

- **protocol.h**: `output_path` field on `BuildPCHParams` /
`BuildPCMParams` so master specifies where workers write
- **stateless_worker.cpp**: Atomic write via `.tmp` + `fs::rename`;
`CompilationUnit` destroyed before rename to flush the file to disk;
fallback to temp file when `output_path` is empty (unit tests)
- **master_server.h**: `PCMState` struct, `pcm_states` map,
`load_cache()` / `save_cache()` / `cleanup_cache()` methods
- **master_server.cpp**: Cache lifecycle — load from `cache.json` on
startup, save after each PCH/PCM build and on exit; deterministic path
computation (`xxh3` preamble hash for PCH, module name + source path
hash for PCM); stale files (>7 days) cleaned on startup; `cache.json`
uses shared path table to avoid redundant storage of header paths across
entries
- **filesystem.h**: `fs::rename()` helper; `ThreadSafeFS` broadened to
match `.pch` extension instead of `preamble-` prefix
- **tests**: 11 new integration tests covering PCH/PCM persistence,
cross-session reuse, staleness detection, shared preamble dedup, and
restart survival; unit tests updated with `output_path`

### Naming scheme

- **PCH**: `.clice/cache/pch/<016x(xxh3(preamble))>.pch`
- **PCM**:
`.clice/cache/pcm/<module_name>-<016x(xxh3(source_path))>.pcm`

## Test plan

- [x] Unit tests — 448 passed
- [x] Integration tests — 92 passed (including 11 new persistent cache
tests)
- [x] Smoke tests — 1 passed

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 16:29:21 +08:00

361 lines
13 KiB
Python

"""Integration tests for persistent PCH/PCM cache.
Verifies that PCH/PCM artifacts are written to .clice/cache/pch/ and .clice/cache/pcm/
with content-addressed filenames, survive server restarts via cache.json,
and are properly reused across sessions.
"""
import asyncio
import json
from pathlib import Path
import pytest
from lsprotocol.types import (
DidCloseTextDocumentParams,
HoverParams,
Position,
TextDocumentIdentifier,
)
from tests.conftest import CliceClient
def _write_cdb(workspace, files, extra_args=None):
"""Write a compile_commands.json for the given source files."""
entries = []
for f in files:
args = ["clang++", "-std=c++17", "-fsyntax-only"]
if extra_args:
args.extend(extra_args)
args.append(str(workspace / f))
entries.append(
{
"directory": str(workspace),
"file": str(workspace / f),
"arguments": args,
}
)
(workspace / "compile_commands.json").write_text(json.dumps(entries, indent=2))
def _doc(uri: str) -> TextDocumentIdentifier:
return TextDocumentIdentifier(uri=uri)
def _list_pch_files(workspace: Path) -> list[Path]:
"""Return all .pch files in the cache directory."""
pch_dir = workspace / ".clice" / "cache" / "pch"
if not pch_dir.exists():
return []
return sorted(pch_dir.glob("*.pch"))
def _list_pcm_files(workspace: Path) -> list[Path]:
"""Return all .pcm files in the cache directory."""
pcm_dir = workspace / ".clice" / "cache" / "pcm"
if not pcm_dir.exists():
return []
return sorted(pcm_dir.glob("*.pcm"))
def _cache_json(workspace: Path) -> dict | None:
"""Read and parse cache.json, or return None if absent."""
path = workspace / ".clice" / "cache" / "cache.json"
if not path.exists():
return None
return json.loads(path.read_text())
async def _make_client(executable: Path, workspace: Path) -> CliceClient:
"""Spawn a fresh clice server and initialize it with the given workspace."""
c = CliceClient()
await c.start_io(str(executable), "--mode", "pipe")
await c.initialize(workspace)
return c
async def _shutdown_client(c: CliceClient) -> None:
"""Gracefully shut down a client."""
try:
await asyncio.wait_for(c.shutdown_async(None), timeout=5.0)
except Exception:
pass
try:
c.exit(None)
except Exception:
pass
await asyncio.sleep(0.3)
if hasattr(c, "_server") and c._server is not None and c._server.returncode is None:
c._server.kill()
try:
c._stop_event.set()
for task in c._async_tasks:
task.cancel()
await asyncio.sleep(0.1)
except Exception:
pass
# =========================================================================
# PCH persistent cache tests
# =========================================================================
async def test_pch_written_to_cache_dir(client, tmp_path):
"""After opening a file with #include, a .pch file should appear
in .clice/cache/pch/ with a hex-hash filename."""
(tmp_path / "header.h").write_text("#pragma once\nstruct Foo { int x; };\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { Foo f; return f.x; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
diags = client.diagnostics.get(uri, [])
assert len(diags) == 0, f"Expected clean compile, got: {diags}"
# Verify PCH file exists in the cache directory.
pch_files = _list_pch_files(tmp_path)
assert len(pch_files) >= 1, "Expected at least one .pch file in .clice/cache/pch/"
# Filename should be a 16-char hex hash + .pch
assert pch_files[0].stem and len(pch_files[0].stem) == 16, (
f"Expected 16-char hex filename, got: {pch_files[0].name}"
)
async def test_cache_json_persisted(client, tmp_path):
"""After a PCH build, cache.json should be written with the entry."""
(tmp_path / "header.h").write_text("#pragma once\nint global_val = 42;\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { return global_val; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri, [])) == 0
cache = _cache_json(tmp_path)
assert cache is not None, "cache.json should exist after PCH build"
assert "pch" in cache, "cache.json should have 'pch' section"
assert len(cache["pch"]) >= 1, "Expected at least one PCH entry in cache.json"
# Verify the entry has expected fields.
entry = cache["pch"][0]
assert "hash" in entry
assert "build_at" in entry
assert "deps" in entry
assert "source_file" in entry
async def test_pch_reused_on_close_reopen(client, tmp_path):
"""Closing and reopening a file within the same session should reuse
the cached PCH — no additional .pch files should be created."""
(tmp_path / "header.h").write_text("#pragma once\nstruct Bar { int y; };\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { Bar b; return b.y; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
# First open — builds PCH.
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri, [])) == 0
pch_after_first = _list_pch_files(tmp_path)
assert len(pch_after_first) >= 1
# Close.
client.text_document_did_close(DidCloseTextDocumentParams(text_document=_doc(uri)))
await asyncio.sleep(0.5)
# Clear diagnostics so we can wait for fresh ones.
client.diagnostics.pop(uri, None)
# Reopen — should reuse cached PCH.
uri2, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri2, [])) == 0
pch_after_reopen = _list_pch_files(tmp_path)
assert pch_after_first == pch_after_reopen, (
"PCH file set should be identical after close+reopen"
)
async def test_pch_survives_server_restart(executable, tmp_path):
"""PCH cache should survive a full server restart — cache.json is
loaded on startup and the existing .pch file is reused."""
(tmp_path / "header.h").write_text("#pragma once\nstruct Baz { int z; };\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { Baz b; return b.z; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
# Session 1: build PCH.
c1 = await _make_client(executable, tmp_path)
uri, _ = await c1.open_and_wait(tmp_path / "main.cpp")
assert len(c1.diagnostics.get(uri, [])) == 0
pch_files_s1 = _list_pch_files(tmp_path)
assert len(pch_files_s1) >= 1, "PCH should be created in session 1"
pch_mtime_s1 = pch_files_s1[0].stat().st_mtime
cache_s1 = _cache_json(tmp_path)
assert cache_s1 is not None, "cache.json should exist after session 1"
await _shutdown_client(c1)
# Session 2: restart server, reopen file.
c2 = await _make_client(executable, tmp_path)
# Clear so we can detect fresh diagnostics.
uri2, _ = await c2.open_and_wait(tmp_path / "main.cpp")
assert len(c2.diagnostics.get(uri2, [])) == 0
# The same PCH file should still exist, not overwritten.
pch_files_s2 = _list_pch_files(tmp_path)
assert len(pch_files_s2) == len(pch_files_s1), (
"No new PCH files should be created in session 2"
)
pch_mtime_s2 = pch_files_s2[0].stat().st_mtime
assert pch_mtime_s1 == pch_mtime_s2, (
"PCH file should not be rebuilt (mtime should be unchanged)"
)
await _shutdown_client(c2)
async def test_shared_preamble_shares_pch(client, tmp_path):
"""Two files with identical preambles should share the same PCH file
(content-addressed by preamble hash)."""
(tmp_path / "header.h").write_text("#pragma once\nint shared_val = 1;\n")
(tmp_path / "a.cpp").write_text(
'#include "header.h"\nint fa() { return shared_val; }\n'
)
(tmp_path / "b.cpp").write_text(
'#include "header.h"\nint fb() { return shared_val + 1; }\n'
)
_write_cdb(tmp_path, ["a.cpp", "b.cpp"])
await client.initialize(tmp_path)
uri_a, _ = await client.open_and_wait(tmp_path / "a.cpp")
uri_b, _ = await client.open_and_wait(tmp_path / "b.cpp")
assert len(client.diagnostics.get(uri_a, [])) == 0
assert len(client.diagnostics.get(uri_b, [])) == 0
# Both files have the same preamble (#include "header.h").
# Content-addressed naming means only ONE .pch file should exist.
pch_files = _list_pch_files(tmp_path)
assert len(pch_files) == 1, (
f"Expected exactly 1 PCH file for shared preamble, got {len(pch_files)}: "
f"{[f.name for f in pch_files]}"
)
async def test_different_preamble_different_pch(client, tmp_path):
"""Files with different preambles should produce different PCH files."""
(tmp_path / "a.h").write_text("#pragma once\nint val_a = 1;\n")
(tmp_path / "b.h").write_text("#pragma once\nint val_b = 2;\n")
(tmp_path / "a.cpp").write_text('#include "a.h"\nint fa() { return val_a; }\n')
(tmp_path / "b.cpp").write_text('#include "b.h"\nint fb() { return val_b; }\n')
_write_cdb(tmp_path, ["a.cpp", "b.cpp"])
await client.initialize(tmp_path)
uri_a, _ = await client.open_and_wait(tmp_path / "a.cpp")
uri_b, _ = await client.open_and_wait(tmp_path / "b.cpp")
assert len(client.diagnostics.get(uri_a, [])) == 0
assert len(client.diagnostics.get(uri_b, [])) == 0
# Different preambles → different hash → two separate .pch files.
pch_files = _list_pch_files(tmp_path)
assert len(pch_files) == 2, (
f"Expected 2 PCH files for different preambles, got {len(pch_files)}: "
f"{[f.name for f in pch_files]}"
)
async def test_pch_rebuilt_on_header_change(client, tmp_path):
"""When a preamble header changes, a new PCH should be built
(different hash → different filename). The old one remains for cleanup."""
(tmp_path / "header.h").write_text("#pragma once\nstruct V1 { int a; };\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { V1 v; return v.a; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri, [])) == 0
pch_before = _list_pch_files(tmp_path)
assert len(pch_before) >= 1
# Modify header — changes preamble content hash.
await asyncio.sleep(1.1)
(tmp_path / "header.h").write_text("#pragma once\nstruct V2 { int b; };\n")
# Also update main.cpp to use V2 so it compiles cleanly.
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { V2 v; return v.b; }\n'
)
# Close and reopen to get fresh preamble.
client.text_document_did_close(DidCloseTextDocumentParams(text_document=_doc(uri)))
await asyncio.sleep(0.5)
client.diagnostics.pop(uri, None)
uri2, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri2, [])) == 0
pch_after = _list_pch_files(tmp_path)
# The preamble content changed (#include "header.h" is the same text,
# but the preamble hash is computed from the preamble TEXT in the source file,
# not from the header content). Since the #include line is identical,
# the preamble hash is the same → same PCH filename, but deps changed
# so PCH gets rebuilt (overwritten at the same path).
# Either way, compilation should succeed.
assert len(pch_after) >= 1
async def test_no_tmp_files_after_build(client, tmp_path):
"""After a successful PCH build, no .tmp files should remain in the cache dir."""
(tmp_path / "header.h").write_text("#pragma once\nint val = 1;\n")
(tmp_path / "main.cpp").write_text(
'#include "header.h"\nint main() { return val; }\n'
)
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri, [])) == 0
# No .tmp files should linger.
pch_dir = tmp_path / ".clice" / "cache" / "pch"
if pch_dir.exists():
tmp_files = list(pch_dir.glob("*.tmp"))
assert len(tmp_files) == 0, f"Stale .tmp files found: {tmp_files}"
pcm_dir = tmp_path / ".clice" / "cache" / "pcm"
if pcm_dir.exists():
tmp_files = list(pcm_dir.glob("*.tmp"))
assert len(tmp_files) == 0, f"Stale .tmp files found: {tmp_files}"
async def test_cache_dirs_created_on_startup(client, tmp_path):
"""The .clice/cache/pch/ and .clice/cache/pcm/ directories should be created
when the server initializes a workspace."""
(tmp_path / "main.cpp").write_text("int main() { return 0; }\n")
_write_cdb(tmp_path, ["main.cpp"])
await client.initialize(tmp_path)
# Trigger a compilation to ensure load_workspace() has completed
# (it runs asynchronously after initialization).
uri, _ = await client.open_and_wait(tmp_path / "main.cpp")
assert len(client.diagnostics.get(uri, [])) == 0
assert (tmp_path / ".clice" / "cache" / "pch").is_dir(), (
".clice/cache/pch/ should be created"
)
assert (tmp_path / ".clice" / "cache" / "pcm").is_dir(), (
".clice/cache/pcm/ should be created"
)