From d4a0e08ea3d527cc776482d20dc5cc9f8e9e7b56 Mon Sep 17 00:00:00 2001 From: Alessandro Rinaldi Date: Fri, 15 May 2026 13:20:27 +0200 Subject: [PATCH] fix(catalog): version the .cache wire format so parser changes auto-invalidate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catalog cache used to be stale-checked by source-XML mtime only. When a parser change reshapes the in-memory Template / Data / Class without touching the XML, on-disk caches stayed "fresh" and got loaded verbatim — silently feeding the new mythy binary the old parser's output. Anyone upgrading mythy needed to manually wipe Templates/*.cache to see the fix take effect, including during normal development. Wrap every cache file with a small versioned header (magic string + schema version int) ahead of the gob-encoded Template, both written by SaveCache and verified by LoadCache. Any mismatch — wrong magic, wrong version, or any decode failure — routes to ErrCacheStale and the caller falls back to ParseTemplate, which writes a fresh cache. parserSchemaVersion starts at 1 and gets bumped from now on whenever a PR changes how pkg/catalog/ encodes a Template. Pre-versioning caches (bare *Template gob) decode into the new wrapper as malformed data and are correctly rejected — confirmed by test. Closes #5 Co-Authored-By: Claude Opus 4.7 (1M context) --- pkg/catalog/cache.go | 82 +++++++++++++++++++++++++++++++++------ pkg/catalog/cache_test.go | 69 ++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 11 deletions(-) diff --git a/pkg/catalog/cache.go b/pkg/catalog/cache.go index 8a20aea..c1d405a 100644 --- a/pkg/catalog/cache.go +++ b/pkg/catalog/cache.go @@ -7,9 +7,53 @@ import ( "os" ) -// ErrCacheStale is returned by LoadCache when the source XML is newer than -// the cache file. Callers should fall back to ParseTemplate. -var ErrCacheStale = errors.New("cache is older than source") +// ErrCacheStale is returned by LoadCache when the on-disk cache cannot +// be safely loaded into the running build. Three independent triggers: +// +// - the source XML's mtime is newer than the cache file's mtime +// (the catalog changed since the cache was written); +// - the cache's magic header doesn't match cacheFileMagic (a +// pre-versioning legacy cache file or a corrupted one); +// - the cache's parser-schema version doesn't equal +// parserSchemaVersion (the in-memory schema changed since the +// cache was written, even if the source XML didn't). +// +// In every case the right answer is the same: drop the cache and let +// the caller fall back to ParseTemplate, which writes a fresh cache. +var ErrCacheStale = errors.New("cache is stale or incompatible with this build") + +// parserSchemaVersion bumps any time the in-memory shape that gets gob- +// encoded (Template / Group / Data / Class / CompoundFieldOverride / +// related catalog structs) gains, drops, or rewrites a field, OR any +// time a load-time pass — like resolveTypedefs — changes how the same +// XML decodes into those structs. Bumping invalidates every pre- +// existing .cache sibling on disk on the next load, forcing a clean +// reparse so users running a new mythy build don't transparently +// consume an old cache produced by the old parser. +// +// Bumping policy: when a PR touches any file under pkg/catalog/ that +// affects parse output, bump this. Reviewers should treat +// pkg/catalog/ diffs without a version bump as suspicious. Version 1 +// is the first build that emits the versioned format; pre-version +// caches are detected via the magic header and rejected as stale. +const parserSchemaVersion = 1 + +// cacheFileMagic is a sentinel written at the start of the gob stream. +// Pre-versioning caches encoded a bare *Template directly, so any +// decode attempt with the new wrapper fails fast on the magic field +// and routes to ErrCacheStale. Don't change this string — bump +// parserSchemaVersion instead. +const cacheFileMagic = "mythy-catalog-cache" + +// cacheFile is the on-disk wrapper around the parsed Template. The +// gob encoding is "the entire cacheFile struct", so adding fields +// here automatically reaches every existing cache via the version +// check. +type cacheFile struct { + Magic string + SchemaVersion int + Template *Template +} // SaveCache writes a parsed Template to disk as gob. The decoder needs a // concrete type, so gob registrations live here. @@ -20,14 +64,20 @@ func SaveCache(tpl *Template, path string) error { } defer f.Close() enc := gob.NewEncoder(f) - if err := enc.Encode(tpl); err != nil { + if err := enc.Encode(cacheFile{ + Magic: cacheFileMagic, + SchemaVersion: parserSchemaVersion, + Template: tpl, + }); err != nil { return fmt.Errorf("gob encode: %w", err) } return nil } -// LoadCache reads a cached Template from disk. Returns ErrCacheStale if the -// source XML's mtime is newer than the cache's mtime. +// LoadCache reads a cached Template from disk. Returns ErrCacheStale if +// the source XML is newer, if the cache lacks the versioned header +// (legacy format), or if the version doesn't match the current +// parserSchemaVersion. func LoadCache(cachePath, sourcePath string) (*Template, error) { cInfo, err := os.Stat(cachePath) if err != nil { @@ -45,9 +95,19 @@ func LoadCache(cachePath, sourcePath string) (*Template, error) { return nil, err } defer f.Close() - var tpl Template - if err := gob.NewDecoder(f).Decode(&tpl); err != nil { - return nil, fmt.Errorf("gob decode: %w", err) + var wrapped cacheFile + if err := gob.NewDecoder(f).Decode(&wrapped); err != nil { + // Most commonly hit when reading a pre-versioning cache file + // that gob-decodes into the wrapper as a malformed struct. + // Treat any decode failure as stale so the caller reparses. + return nil, ErrCacheStale + } + if wrapped.Magic != cacheFileMagic || wrapped.SchemaVersion != parserSchemaVersion { + return nil, ErrCacheStale + } + tpl := wrapped.Template + if tpl == nil { + return nil, ErrCacheStale } if tpl.Menu != nil { reparent(tpl.Menu, nil) @@ -55,13 +115,13 @@ func LoadCache(cachePath, sourcePath string) (*Template, error) { // Re-link DATA/COMMAND.Message — they're pointers into tpl.Messages, // gob serializes them by value (deep copies). Re-link by name. if tpl.Menu != nil { - linkMenuToMessages(&tpl) + linkMenuToMessages(tpl) } // Normalize TIPO aliases. Idempotent: caches written by newer // builds already carry resolved Tipo, and the typedef map lookup // is a no-op for primitives. tpl.resolveTypedefs() - return &tpl, nil + return tpl, nil } // reparent rebuilds the Group.Parent back-pointers after gob decode. diff --git a/pkg/catalog/cache_test.go b/pkg/catalog/cache_test.go index e5de557..097844d 100644 --- a/pkg/catalog/cache_test.go +++ b/pkg/catalog/cache_test.go @@ -1,6 +1,7 @@ package catalog import ( + "encoding/gob" "os" "path/filepath" "testing" @@ -105,6 +106,74 @@ func TestLoadCacheResolvesTypedefsForStaleCaches(t *testing.T) { } } +func TestLoadCacheRejectsLegacyUnwrappedFormat(t *testing.T) { + // Before issue #5 was fixed, SaveCache wrote a bare gob-encoded + // *Template (no magic, no version). After the fix, LoadCache must + // detect this and route to ErrCacheStale so the caller reparses + // the XML — otherwise a user upgrading mythy without wiping their + // Templates/ would silently keep running on the old parser's output. + tmp := t.TempDir() + src := filepath.Join(tmp, "TEST") + if err := os.WriteFile(src, []byte(""), 0o644); err != nil { + t.Fatal(err) + } + cachePath := filepath.Join(tmp, "TEST.cache") + f, err := os.Create(cachePath) + if err != nil { + t.Fatal(err) + } + // Write a legacy-format cache: bare *Template, no wrapper. + if err := gob.NewEncoder(f).Encode(&Template{Identification: 7}); err != nil { + t.Fatal(err) + } + f.Close() + + // Ensure the cache mtime is newer than the source, so the only + // remaining stale-trigger is the missing header. + future := time.Now().Add(1 * time.Second) + if err := os.Chtimes(cachePath, future, future); err != nil { + t.Fatal(err) + } + + if _, err := LoadCache(cachePath, src); err != ErrCacheStale { + t.Errorf("LoadCache on legacy format: err = %v, want ErrCacheStale", err) + } +} + +func TestLoadCacheRejectsWrongSchemaVersion(t *testing.T) { + // When the parser changes the on-disk shape, parserSchemaVersion + // is bumped. A cache written by an older build must be rejected + // so the caller reparses with the current parser. + tmp := t.TempDir() + src := filepath.Join(tmp, "TEST") + if err := os.WriteFile(src, []byte(""), 0o644); err != nil { + t.Fatal(err) + } + cachePath := filepath.Join(tmp, "TEST.cache") + f, err := os.Create(cachePath) + if err != nil { + t.Fatal(err) + } + // Write a cacheFile with a deliberately-wrong schema version. + if err := gob.NewEncoder(f).Encode(cacheFile{ + Magic: cacheFileMagic, + SchemaVersion: parserSchemaVersion + 1, // pretend we're an older build + Template: &Template{Identification: 7}, + }); err != nil { + t.Fatal(err) + } + f.Close() + + future := time.Now().Add(1 * time.Second) + if err := os.Chtimes(cachePath, future, future); err != nil { + t.Fatal(err) + } + + if _, err := LoadCache(cachePath, src); err != ErrCacheStale { + t.Errorf("LoadCache on wrong-version cache: err = %v, want ErrCacheStale", err) + } +} + func copyFile(t *testing.T, src, dst string) error { t.Helper() b, err := os.ReadFile(src)