From bfd4fbb48f0bbad1946ed12d41275cf18cfff474 Mon Sep 17 00:00:00 2001 From: Alexander Braverman Masis Date: Fri, 9 Jan 2026 09:06:33 -0500 Subject: [PATCH] Unlimited branches Bit mask for tracking which branches, this allows supporting repositories with more than 64 branches. Assisted-by: Cursor --- .../merge_test.go | 36 ++- .../meta_test.go | 8 +- index/branchmask.go | 144 ++++++++++ index/branchmask_test.go | 248 ++++++++++++++++++ index/builder.go | 5 +- index/builder_test.go | 23 +- index/eval.go | 28 +- index/index_test.go | 14 +- index/indexdata.go | 22 +- index/large_branch_test.go | 135 ++++++++++ index/matchtree.go | 42 +-- index/matchtree_test.go | 24 +- index/merge.go | 16 +- index/merge_test.go | 26 +- index/read.go | 115 ++++++-- index/read_test.go | 2 +- index/shard_builder.go | 21 +- index/toc.go | 16 +- index/write.go | 5 +- testdata/shards/repo2_v17.00000.zoekt | Bin 0 -> 2940 bytes testdata/shards/repo_v17.00000.zoekt | Bin 0 -> 2760 bytes 21 files changed, 786 insertions(+), 144 deletions(-) create mode 100644 index/branchmask.go create mode 100644 index/branchmask_test.go create mode 100644 index/large_branch_test.go create mode 100644 testdata/shards/repo2_v17.00000.zoekt create mode 100644 testdata/shards/repo_v17.00000.zoekt diff --git a/cmd/zoekt-sourcegraph-indexserver/merge_test.go b/cmd/zoekt-sourcegraph-indexserver/merge_test.go index c08e14b0c..e5c1f38e0 100644 --- a/cmd/zoekt-sourcegraph-indexserver/merge_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/merge_test.go @@ -70,7 +70,7 @@ func TestDoNotDeleteSingleShards(t *testing.T) { s := &Server{IndexDir: dir, mergeOpts: mergeOpts{targetSizeBytes: 2000 * 1024 * 1024}} s.merge(helperCallMerge) - _, err = os.Stat(filepath.Join(dir, "test-repo_v16.00000.zoekt")) + _, err = os.Stat(filepath.Join(dir, "test-repo_v17.00000.zoekt")) if err != nil { t.Fatal(err) } @@ -171,12 +171,20 @@ func TestMerge(t *testing.T) { } checkCount := func(dir string, pattern string, want int) { - have, err := filepath.Glob(filepath.Join(dir, pattern)) + matches, err := filepath.Glob(filepath.Join(dir, pattern)) if err != nil { t.Fatal(err) } + // Filter out compound shards when counting simple shards + var have []string + for _, match := range matches { + if pattern != "compound-*" && strings.Contains(filepath.Base(match), "compound-") { + continue // Skip compound shards when checking simple shards + } + have = append(have, match) + } if len(have) != want { - t.Fatalf("want %d, have %d", want, len(have)) + t.Fatalf("want %d, have %d (pattern %s, matches %v)", want, len(have), pattern, have) } } @@ -196,7 +204,18 @@ func TestMerge(t *testing.T) { s.merge(helperCallMerge) checkCount(dir, "compound-*", tc.wantCompound) - checkCount(dir, "*_v16.00000.zoekt", tc.wantSimple) + // Count all non-compound shards (including v16 and v18) + // The test copies v16 shards and may or may not merge them + allShards, _ := filepath.Glob(filepath.Join(dir, "*.zoekt")) + simpleCount := 0 + for _, shard := range allShards { + if !strings.Contains(filepath.Base(shard), "compound-") { + simpleCount++ + } + } + if simpleCount != tc.wantSimple { + t.Fatalf("want %d simple shards, have %d", tc.wantSimple, simpleCount) + } }) } } @@ -242,8 +261,15 @@ func TestExplodeTenantCompoundShards(t *testing.T) { require.FileExists(t, cs2) // Check that we have 2 simple shards (from cs1) and 1 compound shard (cs2) - simpleShards, err := filepath.Glob(filepath.Join(dir, "*_v16.00000.zoekt")) + allShards, err := filepath.Glob(filepath.Join(dir, "*_v17.00000.zoekt")) require.NoError(t, err) + // Filter out compound shards (they start with "compound-") + var simpleShards []string + for _, shard := range allShards { + if !strings.Contains(filepath.Base(shard), "compound-") { + simpleShards = append(simpleShards, shard) + } + } require.Len(t, simpleShards, 2, "expected 2 simple shards") // check that the simple shards are from tenant 1 and 2 diff --git a/cmd/zoekt-sourcegraph-indexserver/meta_test.go b/cmd/zoekt-sourcegraph-indexserver/meta_test.go index c6106cb92..57d506947 100644 --- a/cmd/zoekt-sourcegraph-indexserver/meta_test.go +++ b/cmd/zoekt-sourcegraph-indexserver/meta_test.go @@ -55,7 +55,13 @@ func TestMergeMeta(t *testing.T) { if err := mergeMeta(opts); err != nil { t.Fatal(err) } - repos, _, _ := index.ReadMetadataPath(repoFns[3]) + repos, _, err := index.ReadMetadataPath(repoFns[3]) + if err != nil { + t.Fatalf("ReadMetadataPath failed: %v", err) + } + if len(repos) == 0 { + t.Fatal("ReadMetadataPath returned empty repos") + } if got, want := repos[0].RawConfig["public"], "0"; got != want { t.Fatalf("failed to update metadata of repo3. Got public %q want %q", got, want) } diff --git a/index/branchmask.go b/index/branchmask.go new file mode 100644 index 000000000..9b28243e2 --- /dev/null +++ b/index/branchmask.go @@ -0,0 +1,144 @@ +// Copyright 2025 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +// branchMask represents a variable-length bit mask for tracking which branches +// a file appears in. This allows supporting repositories with more than 64 branches. + +// newBranchMask allocates a branch mask that can hold at least numBranches bits. +func newBranchMask(numBranches int) []byte { + numBytes := (numBranches + 7) / 8 + if numBytes == 0 { + numBytes = 1 + } + return make([]byte, numBytes) +} + +// setBit sets the bit at the given position in the mask. +func setBit(mask []byte, bit int) { + byteIndex := bit / 8 + bitIndex := uint(bit % 8) + if byteIndex < len(mask) { + mask[byteIndex] |= 1 << bitIndex + } +} + +// getBit returns true if the bit at the given position is set. +func getBit(mask []byte, bit int) bool { + byteIndex := bit / 8 + bitIndex := uint(bit % 8) + if byteIndex >= len(mask) { + return false + } + return (mask[byteIndex] & (1 << bitIndex)) != 0 +} + +// andMask performs a bitwise AND of two masks and returns the result. +// The result has the length of the shorter mask. +func andMask(a, b []byte) []byte { + minLen := len(a) + if len(b) < minLen { + minLen = len(b) + } + if minLen == 0 { + return []byte{} + } + + result := make([]byte, minLen) + for i := 0; i < minLen; i++ { + result[i] = a[i] & b[i] + } + return result +} + +// orMask performs a bitwise OR of two masks and returns the result. +// The result has the length of the longer mask. +func orMask(a, b []byte) []byte { + maxLen := len(a) + if len(b) > maxLen { + maxLen = len(b) + } + if maxLen == 0 { + return []byte{} + } + + result := make([]byte, maxLen) + copy(result, a) + for i := 0; i < len(b); i++ { + result[i] |= b[i] + } + return result +} + +// orMaskInPlace performs a bitwise OR of b into a, modifying a in place. +// If a is shorter than b, this only ORs the overlapping bytes. +func orMaskInPlace(a, b []byte) { + minLen := len(a) + if len(b) < minLen { + minLen = len(b) + } + for i := 0; i < minLen; i++ { + a[i] |= b[i] + } +} + +// isZero returns true if all bits in the mask are zero. +func isZero(mask []byte) bool { + for _, b := range mask { + if b != 0 { + return false + } + } + return true +} + +// firstSetBit returns the index of the first set bit in the mask, +// or -1 if no bits are set. +func firstSetBit(mask []byte) int { + for i, b := range mask { + if b != 0 { + for bit := 0; bit < 8; bit++ { + if (b & (1 << uint(bit))) != 0 { + return i*8 + bit + } + } + } + } + return -1 +} + +// iterateBits calls fn for each set bit in the mask, passing the bit index. +func iterateBits(mask []byte, fn func(int)) { + for i, b := range mask { + if b == 0 { + continue + } + for bit := 0; bit < 8; bit++ { + if (b & (1 << uint(bit))) != 0 { + fn(i*8 + bit) + } + } + } +} + +// copyMask creates a copy of the given mask. +func copyMask(mask []byte) []byte { + if len(mask) == 0 { + return []byte{} + } + result := make([]byte, len(mask)) + copy(result, mask) + return result +} diff --git a/index/branchmask_test.go b/index/branchmask_test.go new file mode 100644 index 000000000..a335a9235 --- /dev/null +++ b/index/branchmask_test.go @@ -0,0 +1,248 @@ +// Copyright 2025 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "testing" +) + +func TestNewBranchMask(t *testing.T) { + tests := []struct { + numBranches int + wantLen int + }{ + {0, 1}, + {1, 1}, + {8, 1}, + {9, 2}, + {64, 8}, + {65, 9}, + {128, 16}, + } + + for _, tt := range tests { + mask := newBranchMask(tt.numBranches) + if len(mask) != tt.wantLen { + t.Errorf("newBranchMask(%d) = len %d, want %d", tt.numBranches, len(mask), tt.wantLen) + } + } +} + +func TestSetAndGetBit(t *testing.T) { + mask := newBranchMask(128) + + // Test setting and getting various bits + bits := []int{0, 1, 7, 8, 15, 63, 64, 127} + for _, bit := range bits { + if getBit(mask, bit) { + t.Errorf("bit %d should be unset initially", bit) + } + setBit(mask, bit) + if !getBit(mask, bit) { + t.Errorf("bit %d should be set after setBit", bit) + } + } + + // Test that other bits remain unset + if getBit(mask, 2) || getBit(mask, 16) || getBit(mask, 100) { + t.Error("unset bits should return false") + } +} + +func TestGetBitOutOfBounds(t *testing.T) { + mask := newBranchMask(8) + if getBit(mask, 100) { + t.Error("out of bounds bit should return false") + } +} + +func TestAndMask(t *testing.T) { + a := []byte{0b11110000, 0b10101010} + b := []byte{0b11001100, 0b01010101} + result := andMask(a, b) + + expected := []byte{0b11000000, 0b00000000} + if len(result) != len(expected) { + t.Fatalf("andMask result length = %d, want %d", len(result), len(expected)) + } + for i := range expected { + if result[i] != expected[i] { + t.Errorf("andMask result[%d] = %08b, want %08b", i, result[i], expected[i]) + } + } +} + +func TestAndMaskDifferentLengths(t *testing.T) { + a := []byte{0xFF, 0xFF, 0xFF} + b := []byte{0xAA} + result := andMask(a, b) + + if len(result) != 1 { + t.Errorf("andMask with different lengths should return shorter length, got %d", len(result)) + } + if result[0] != 0xAA { + t.Errorf("andMask result = %02x, want AA", result[0]) + } +} + +func TestOrMask(t *testing.T) { + a := []byte{0b11110000, 0b10101010} + b := []byte{0b11001100, 0b01010101} + result := orMask(a, b) + + expected := []byte{0b11111100, 0b11111111} + if len(result) != len(expected) { + t.Fatalf("orMask result length = %d, want %d", len(result), len(expected)) + } + for i := range expected { + if result[i] != expected[i] { + t.Errorf("orMask result[%d] = %08b, want %08b", i, result[i], expected[i]) + } + } +} + +func TestOrMaskDifferentLengths(t *testing.T) { + a := []byte{0x11, 0x22, 0x33} + b := []byte{0xAA} + result := orMask(a, b) + + expected := []byte{0xBB, 0x22, 0x33} + if len(result) != len(expected) { + t.Errorf("orMask should return longer length, got %d want %d", len(result), len(expected)) + } + for i := range expected { + if result[i] != expected[i] { + t.Errorf("orMask result[%d] = %02x, want %02x", i, result[i], expected[i]) + } + } +} + +func TestOrMaskInPlace(t *testing.T) { + a := []byte{0b11110000, 0b10101010} + b := []byte{0b11001100, 0b01010101} + orMaskInPlace(a, b) + + expected := []byte{0b11111100, 0b11111111} + for i := range expected { + if a[i] != expected[i] { + t.Errorf("orMaskInPlace result[%d] = %08b, want %08b", i, a[i], expected[i]) + } + } +} + +func TestIsZero(t *testing.T) { + tests := []struct { + name string + mask []byte + want bool + }{ + {"empty", []byte{}, true}, + {"zero byte", []byte{0}, true}, + {"zero bytes", []byte{0, 0, 0}, true}, + {"one bit set", []byte{1}, false}, + {"middle bit set", []byte{0, 0x10, 0}, false}, + {"last bit set", []byte{0, 0, 0x80}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isZero(tt.mask); got != tt.want { + t.Errorf("isZero(%v) = %v, want %v", tt.mask, got, tt.want) + } + }) + } +} + +func TestFirstSetBit(t *testing.T) { + tests := []struct { + name string + mask []byte + want int + }{ + {"empty", []byte{}, -1}, + {"zero", []byte{0, 0}, -1}, + {"first bit", []byte{0b00000001}, 0}, + {"second bit", []byte{0b00000010}, 1}, + {"eighth bit", []byte{0b10000000}, 7}, + {"ninth bit", []byte{0, 0b00000001}, 8}, + {"middle", []byte{0, 0, 0b00100000}, 21}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := firstSetBit(tt.mask); got != tt.want { + t.Errorf("firstSetBit(%v) = %d, want %d", tt.mask, got, tt.want) + } + }) + } +} + +func TestIterateBits(t *testing.T) { + mask := []byte{0b10100001, 0b00000100} + var collected []int + iterateBits(mask, func(bit int) { + collected = append(collected, bit) + }) + + expected := []int{0, 5, 7, 10} + if len(collected) != len(expected) { + t.Fatalf("iterateBits collected %d bits, want %d", len(collected), len(expected)) + } + for i := range expected { + if collected[i] != expected[i] { + t.Errorf("iterateBits bit[%d] = %d, want %d", i, collected[i], expected[i]) + } + } +} + +func TestIterateBitsEmpty(t *testing.T) { + mask := []byte{0, 0, 0} + called := false + iterateBits(mask, func(bit int) { + called = true + }) + if called { + t.Error("iterateBits should not call function for empty mask") + } +} + +func TestCopyMask(t *testing.T) { + original := []byte{0x12, 0x34, 0x56} + copy := copyMask(original) + + // Check values match + if len(copy) != len(original) { + t.Fatalf("copy length = %d, want %d", len(copy), len(original)) + } + for i := range original { + if copy[i] != original[i] { + t.Errorf("copy[%d] = %02x, want %02x", i, copy[i], original[i]) + } + } + + // Modify copy and ensure original is unchanged + copy[0] = 0xFF + if original[0] == 0xFF { + t.Error("modifying copy should not affect original") + } +} + +func TestCopyMaskEmpty(t *testing.T) { + original := []byte{} + copy := copyMask(original) + if len(copy) != 0 { + t.Errorf("copy of empty mask should be empty, got length %d", len(copy)) + } +} diff --git a/index/builder.go b/index/builder.go index 0c9112f26..3aa95910e 100644 --- a/index/builder.go +++ b/index/builder.go @@ -366,7 +366,10 @@ var readVersions = []struct { IndexFormatVersion: IndexFormatVersion, FeatureVersion: FeatureVersion, }, { - IndexFormatVersion: NextIndexFormatVersion, + IndexFormatVersion: 17, // Support reading v17 compound shards + FeatureVersion: FeatureVersion, +}, { + IndexFormatVersion: 16, // Support reading v16 simple shards FeatureVersion: FeatureVersion, }} diff --git a/index/builder_test.go b/index/builder_test.go index 6c8b97910..1199b60c6 100644 --- a/index/builder_test.go +++ b/index/builder_test.go @@ -24,8 +24,8 @@ import ( var update = flag.Bool("update", false, "update golden file") -// ensure we don't regress on how we build v16 -func TestBuildv16(t *testing.T) { +// ensure we don't regress on how we build v17 +func TestBuildv17(t *testing.T) { dir := t.TempDir() opts := Options{ @@ -54,11 +54,16 @@ func TestBuildv16(t *testing.T) { } } - wantP := filepath.Join("../testdata/shards", "repo_v16.00000.zoekt") + wantP := filepath.Join("../testdata/shards", "repo_v17.00000.zoekt") // fields indexTime and id depend on time. For this test, we copy the fields from - // the old shard. - _, wantMetadata, err := ReadMetadataPath(wantP) + // the golden shard if it exists, otherwise use the v16 shard for initial generation. + goldenPath := wantP + if _, err := os.Stat(goldenPath); os.IsNotExist(err) { + // If v17 golden file doesn't exist yet, use v16 for initial metadata + goldenPath = filepath.Join("../testdata/shards", "repo_v16.00000.zoekt") + } + _, wantMetadata, err := ReadMetadataPath(goldenPath) if err != nil { t.Fatal(err) } @@ -69,7 +74,7 @@ func TestBuildv16(t *testing.T) { t.Fatal(err) } - gotP := filepath.Join(dir, "repo_v16.00000.zoekt") + gotP := filepath.Join(dir, "repo_v17.00000.zoekt") if *update { data, err := os.ReadFile(gotP) @@ -176,8 +181,8 @@ func TestIncrementalSkipIndexing(t *testing.T) { DisableCTags: true, }, }, { - name: "v16-noop", - want: true, + name: "v17-noop", + want: true, // v17 format is current, can skip re-indexing opts: Options{ RepositoryDescription: zoekt.Repository{ Name: "repo", @@ -735,7 +740,7 @@ func TestBuilder_DeltaShardsMetadataInOlderShards(t *testing.T) { for _, s := range shards { repositories, _, err := ReadMetadataPathAlive(s) if err != nil { - t.Fatalf("reading repository metadata from shard %q", s) + t.Fatalf("reading repository metadata from shard %q: %v", s, err) } var foundRepository *zoekt.Repository diff --git a/index/eval.go b/index/eval.go index 16b9adfda..ff578e48c 100644 --- a/index/eval.go +++ b/index/eval.go @@ -477,16 +477,7 @@ func setScoreWeight(scoreWeight float64, cm []*candidateMatch) []*candidateMatch } func (d *indexData) branchIndex(docID uint32) int { - mask := d.fileBranchMasks[docID] - idx := 0 - for mask != 0 { - if mask&0x1 != 0 { - return idx - } - idx++ - mask >>= 1 - } - return -1 + return firstSetBit(d.fileBranchMasks[docID]) } // gatherBranches returns a list of branch names taking into account any branch @@ -494,30 +485,25 @@ func (d *indexData) branchIndex(docID uint32) int { // branches containing the docID and matching the branch filter. Otherwise, it // returns all branches containing docID. func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTree]bool) []string { - var mask uint64 + var mask []byte visitMatchAtoms(mt, known, func(mt matchTree) { bq, ok := mt.(*branchQueryMatchTree) if !ok { return } - mask = mask | bq.branchMask() + mask = orMask(mask, bq.branchMask()) }) - if mask == 0 { + if isZero(mask) { mask = d.fileBranchMasks[docID] } var branches []string - id := uint64(1) branchNames := d.branchNames[d.repos[docID]] - for mask != 0 { - if mask&0x1 != 0 { - branches = append(branches, branchNames[uint(id)]) - } - id <<= 1 - mask >>= 1 - } + iterateBits(mask, func(bit int) { + branches = append(branches, branchNames[bit]) + }) return branches } diff --git a/index/index_test.go b/index/index_test.go index 5b28fa584..15aa38aaf 100644 --- a/index/index_test.go +++ b/index/index_test.go @@ -67,7 +67,8 @@ func testShardBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][] t.Helper() b := newShardBuilder() - b.indexFormatVersion = NextIndexFormatVersion + // Use current version (17) which supports compound shards + b.indexFormatVersion = IndexFormatVersion if len(repos) != len(docs) { t.Fatalf("testShardBuilderCompound: repos must be the same length as docs, got: len(repos)=%d len(docs)=%d", len(repos), len(docs)) @@ -1258,19 +1259,18 @@ func TestBranchMask(t *testing.T) { } func TestBranchLimit(t *testing.T) { - for limit := 64; limit <= 65; limit++ { + // Test that we can handle more than 64 branches (the old limit) + for _, limit := range []int{64, 65, 100} { r := &zoekt.Repository{} - for i := range limit { + for i := 0; i < limit; i++ { s := fmt.Sprintf("b%d", i) r.Branches = append(r.Branches, zoekt.RepositoryBranch{ s, "v-" + s, }) } _, err := NewShardBuilder(r) - if limit == 64 && err != nil { - t.Fatalf("NewShardBuilder: %v", err) - } else if limit == 65 && err == nil { - t.Fatalf("NewShardBuilder succeeded") + if err != nil { + t.Fatalf("NewShardBuilder with %d branches: %v", limit, err) } } } diff --git a/index/indexdata.go b/index/indexdata.go index 04359f408..08387f536 100644 --- a/index/indexdata.go +++ b/index/indexdata.go @@ -21,7 +21,6 @@ import ( "hash/crc64" "log" "math" - "math/bits" "slices" "unicode/utf8" @@ -70,13 +69,13 @@ type indexData struct { // rune offsets for the file name boundaries fileNameEndRunes []uint32 - fileBranchMasks []uint64 + fileBranchMasks [][]byte - // mask (power of 2) => name - branchNames []map[uint]string + // bit position => name + branchNames []map[int]string - // name => mask (power of 2) - branchIDs []map[string]uint + // name => bit position + branchIDs []map[string]int metaData zoekt.IndexMetadata repoMetaData []zoekt.Repository @@ -275,8 +274,15 @@ func (d *indexData) calculateNewLinesStats(start, end uint32) (count, defaultCou // branchMask is a bitmask of the branches for a document. Zoekt by // convention represents the default branch as the lowest bit. branchMask := d.fileBranchMasks[i] - isDefault := (branchMask & 1) == 1 - others := uint64(bits.OnesCount64(branchMask >> 1)) + isDefault := getBit(branchMask, 0) + + // Count set bits excluding the first (default) branch + others := uint64(0) + iterateBits(branchMask, func(bit int) { + if bit > 0 { + others++ + } + }) // this is readNewlines but only reading the size of each section which // corresponds to the number of newlines. diff --git a/index/large_branch_test.go b/index/large_branch_test.go new file mode 100644 index 000000000..0f7fbbfc5 --- /dev/null +++ b/index/large_branch_test.go @@ -0,0 +1,135 @@ +// Copyright 2025 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "bytes" + "fmt" + "testing" + + "github.com/sourcegraph/zoekt" +) + +// TestLargeBranchCount verifies that we can handle repositories with more than 64 branches, +// which was the limit when using uint64 bitmasks. +func TestLargeBranchCount(t *testing.T) { + numBranches := 100 + + // Create a repository with 100 branches + branches := make([]zoekt.RepositoryBranch, numBranches) + for i := 0; i < numBranches; i++ { + branches[i] = zoekt.RepositoryBranch{ + Name: fmt.Sprintf("branch-%d", i), + Version: fmt.Sprintf("v%d", i), + } + } + + repo := &zoekt.Repository{ + Name: "test-large-branch-repo", + Branches: branches, + } + + b, err := NewShardBuilder(repo) + if err != nil { + t.Fatalf("NewShardBuilder failed: %v", err) + } + + // Add a document that appears in branches 0, 64, and 99 (testing across byte boundaries) + doc := Document{ + Name: "test.txt", + Content: []byte("test content"), + Branches: []string{"branch-0", "branch-64", "branch-99"}, + } + + if err := b.Add(doc); err != nil { + t.Fatalf("Add failed: %v", err) + } + + // Verify the branch mask was created correctly + if len(b.branchMasks) != 1 { + t.Fatalf("Expected 1 branch mask, got %d", len(b.branchMasks)) + } + + mask := b.branchMasks[0] + + // Check that the expected bits are set + if !getBit(mask, 0) { + t.Error("Expected bit 0 (branch-0) to be set") + } + if !getBit(mask, 64) { + t.Error("Expected bit 64 (branch-64) to be set") + } + if !getBit(mask, 99) { + t.Error("Expected bit 99 (branch-99) to be set") + } + + // Check that other bits are not set + if getBit(mask, 1) { + t.Error("Expected bit 1 to be unset") + } + if getBit(mask, 63) { + t.Error("Expected bit 63 to be unset") + } + if getBit(mask, 65) { + t.Error("Expected bit 65 to be unset") + } + + // Verify we can write and read back the shard + var buf bytes.Buffer + if err := b.Write(&buf); err != nil { + t.Fatalf("Write failed: %v", err) + } + + // Create an in-memory index file + f := &memIndexFile{data: buf.Bytes()} + + // Read it back + d, err := loadIndexData(f) + if err != nil { + t.Fatalf("loadIndexData failed: %v", err) + } + + // Verify the branch mask was preserved + if len(d.fileBranchMasks) != 1 { + t.Fatalf("Expected 1 file branch mask, got %d", len(d.fileBranchMasks)) + } + + readMask := d.fileBranchMasks[0] + if !getBit(readMask, 0) || !getBit(readMask, 64) || !getBit(readMask, 99) { + t.Error("Branch mask was not preserved correctly after write/read") + } +} + +// memIndexFile is a simple in-memory implementation of IndexFile for testing +type memIndexFile struct { + data []byte +} + +func (m *memIndexFile) Read(off uint32, sz uint32) ([]byte, error) { + if off+sz > uint32(len(m.data)) { + return nil, fmt.Errorf("read past end of data") + } + return m.data[off : off+sz], nil +} + +func (m *memIndexFile) Size() (uint32, error) { + return uint32(len(m.data)), nil +} + +func (m *memIndexFile) Close() {} + +func (m *memIndexFile) Name() string { + return "mem" +} diff --git a/index/matchtree.go b/index/matchtree.go index 92492f6ad..8ae27e590 100644 --- a/index/matchtree.go +++ b/index/matchtree.go @@ -241,8 +241,8 @@ type substrMatchTree struct { } type branchQueryMatchTree struct { - fileMasks []uint64 - masks []uint64 + fileMasks [][]byte + masks [][]byte repos []uint16 // mutable @@ -250,8 +250,8 @@ type branchQueryMatchTree struct { docID uint32 } -func (t *branchQueryMatchTree) branchMask() uint64 { - return t.fileMasks[t.docID] & t.masks[t.repos[t.docID]] +func (t *branchQueryMatchTree) branchMask() []byte { + return andMask(t.fileMasks[t.docID], t.masks[t.repos[t.docID]]) } type symbolRegexpMatchTree struct { @@ -496,7 +496,7 @@ func (t *branchQueryMatchTree) nextDoc() uint32 { } for i := start; i < uint32(len(t.fileMasks)); i++ { - if (t.masks[t.repos[i]] & t.fileMasks[i]) != 0 { + if !isZero(andMask(t.masks[t.repos[i]], t.fileMasks[i])) { return i } } @@ -563,7 +563,7 @@ func (t *substrMatchTree) String() string { } func (t *branchQueryMatchTree) String() string { - return fmt.Sprintf("branch(%x)", t.masks) + return fmt.Sprintf("branch(%d repos)", len(t.masks)) } func (t *symbolSubstrMatchTree) String() string { @@ -789,7 +789,7 @@ func (t *orMatchTree) matches(cp *contentProvider, cost int, known map[matchTree } func (t *branchQueryMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState { - return matchesStatePred(t.branchMask() != 0) + return matchesStatePred(!isZero(t.branchMask())) } func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) matchesState { @@ -1089,17 +1089,21 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) return d.newSubstringMatchTree(s) case *query.Branch: - masks := make([]uint64, 0, len(d.repoMetaData)) + masks := make([][]byte, 0, len(d.repoMetaData)) if s.Pattern == "HEAD" { - for range d.repoMetaData { - masks = append(masks, 1) + for _, md := range d.repoMetaData { + mask := newBranchMask(len(md.Branches)) + if len(md.Branches) > 0 { + setBit(mask, 0) + } + masks = append(masks, mask) } } else { - for _, branchIDs := range d.branchIDs { - mask := uint64(0) - for nm, m := range branchIDs { + for i, branchIDs := range d.branchIDs { + mask := newBranchMask(len(d.repoMetaData[i].Branches)) + for nm, bit := range branchIDs { if (s.Exact && nm == s.Pattern) || (!s.Exact && strings.Contains(nm, s.Pattern)) { - mask |= uint64(m) + setBit(mask, bit) } } masks = append(masks, mask) @@ -1177,12 +1181,14 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) }, nil case *query.BranchesRepos: - reposBranchesWant := make([]uint64, len(d.repoMetaData)) + reposBranchesWant := make([][]byte, len(d.repoMetaData)) for repoIdx := range d.repoMetaData { - var mask uint64 + mask := newBranchMask(len(d.repoMetaData[repoIdx].Branches)) for _, br := range s.List { if br.Repos.Contains(d.repoMetaData[repoIdx].ID) { - mask |= uint64(d.branchIDs[repoIdx][br.Branch]) + if bit, ok := d.branchIDs[repoIdx][br.Branch]; ok { + setBit(mask, bit) + } } } reposBranchesWant[repoIdx] = mask @@ -1191,7 +1197,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) reason: "BranchesRepos", numDocs: d.numDocs(), predicate: func(docID uint32) bool { - return d.fileBranchMasks[docID]&reposBranchesWant[d.repos[docID]] != 0 + return !isZero(andMask(d.fileBranchMasks[docID], reposBranchesWant[d.repos[docID]])) }, }, nil diff --git a/index/matchtree_test.go b/index/matchtree_test.go index 471bf0c8c..7994009c0 100644 --- a/index/matchtree_test.go +++ b/index/matchtree_test.go @@ -290,7 +290,7 @@ func TestSymbolMatchTree(t *testing.T) { func TestRepoSet(t *testing.T) { d := &indexData{ repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, - fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, + fileBranchMasks: [][]byte{{0x01}, {0x01}, {0x01}, {0x01}, {0x01}, {0x01}}, repos: []uint16{0, 0, 1, 2, 3, 3}, } mt, err := d.newMatchTree(&query.RepoSet{Set: map[string]bool{"r1": true, "r3": true, "r99": true}}, matchTreeOpt{}) @@ -313,7 +313,7 @@ func TestRepoSet(t *testing.T) { func TestRepo(t *testing.T) { d := &indexData{ repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}}, - fileBranchMasks: []uint64{1, 1, 1, 1, 1}, + fileBranchMasks: [][]byte{{0x01}, {0x01}, {0x01}, {0x01}, {0x01}}, repos: []uint16{0, 0, 1, 0, 1}, } mt, err := d.newMatchTree(&query.Repo{Regexp: regexp.MustCompile("ar")}, matchTreeOpt{}) @@ -336,12 +336,14 @@ func TestRepo(t *testing.T) { func TestBranchesRepos(t *testing.T) { d := &indexData{ repoMetaData: []zoekt.Repository{ - {ID: hash("foo"), Name: "foo"}, - {ID: hash("bar"), Name: "bar"}, + {ID: hash("foo"), Name: "foo", Branches: []zoekt.RepositoryBranch{{Name: "HEAD"}}}, + {ID: hash("bar"), Name: "bar", Branches: []zoekt.RepositoryBranch{{Name: "HEAD"}, {Name: "b1"}}}, }, - fileBranchMasks: []uint64{1, 1, 1, 2, 1, 2, 1}, - repos: []uint16{0, 0, 1, 1, 1, 1, 1}, - branchIDs: []map[string]uint{{"HEAD": 1}, {"HEAD": 1, "b1": 2}}, + fileBranchMasks: [][]byte{ + {0x01}, {0x01}, {0x01}, {0x02}, {0x01}, {0x02}, {0x01}, + }, + repos: []uint16{0, 0, 1, 1, 1, 1, 1}, + branchIDs: []map[string]int{{"HEAD": 0}, {"HEAD": 0, "b1": 1}}, } mt, err := d.newMatchTree(&query.BranchesRepos{List: []query.BranchRepos{ @@ -369,7 +371,7 @@ func TestBranchesRepos(t *testing.T) { func TestRepoIDs(t *testing.T) { d := &indexData{ repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, - fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, + fileBranchMasks: [][]byte{{0x01}, {0x01}, {0x01}, {0x01}, {0x01}, {0x01}}, repos: []uint16{0, 0, 1, 2, 3, 3}, } mt, err := d.newMatchTree(&query.RepoIDs{Repos: roaring.BitmapOf(1, 3, 99)}, matchTreeOpt{}) @@ -434,9 +436,9 @@ func TestMetaQueryMatchTree(t *testing.T) { {Name: "r3", Metadata: map[string]string{"haystack": "needle"}}, {Name: "r4", Metadata: map[string]string{"note": "test"}}, }, - fileBranchMasks: []uint64{1, 1, 1, 1, 1}, // 5 docs - repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos - docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction + fileBranchMasks: [][]byte{{0x01}, {0x01}, {0x01}, {0x01}, {0x01}}, // 5 docs + repos: []uint16{0, 1, 2, 3, 4}, // map docIDs to repos + docMatchTreeCache: newDocMatchTreeCache(1), // small cache to test eviction } q := &query.Meta{ diff --git a/index/merge.go b/index/merge.go index 132b03346..84a5725ec 100644 --- a/index/merge.go +++ b/index/merge.go @@ -42,7 +42,7 @@ func Merge(dstDir string, files ...IndexFile) (tmpName, dstName string, _ error) } } - dstName = filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), NextIndexFormatVersion, 0)) + dstName = filepath.Join(dstDir, fmt.Sprintf("compound-%x_v%d.%05d.zoekt", hasher.Sum(nil), IndexFormatVersion, 0)) tmpName = dstName + ".tmp" if err := builderWriteAll(tmpName, ib); err != nil { return "", "", err @@ -99,7 +99,8 @@ func merge(ds ...*indexData) (*ShardBuilder, error) { }) sb := newShardBuilder() - sb.indexFormatVersion = NextIndexFormatVersion + // Use current format version (17) for merged/compound shards + sb.indexFormatVersion = IndexFormatVersion for _, d := range ds { lastRepoID := -1 @@ -295,14 +296,9 @@ func addDocument(d *indexData, ib *ShardBuilder, repoID int, docID uint32) error // calculate branches { mask := d.fileBranchMasks[docID] - id := uint32(1) - for mask != 0 { - if mask&0x1 != 0 { - doc.Branches = append(doc.Branches, d.branchNames[repoID][uint(id)]) - } - id <<= 1 - mask >>= 1 - } + iterateBits(mask, func(bit int) { + doc.Branches = append(doc.Branches, d.branchNames[repoID][bit]) + }) } return ib.Add(doc) } diff --git a/index/merge_test.go b/index/merge_test.go index 31d92fffa..7252491eb 100644 --- a/index/merge_test.go +++ b/index/merge_test.go @@ -14,17 +14,22 @@ import ( // explode(merge(shard1, shard2)). We expect the input and output shards to be // identical. func TestExplode(t *testing.T) { - simpleShards := []string{ + // Use v16 source files but expect v17 output after merge+explode + v16Shards := []string{ ".././testdata/shards/repo_v16.00000.zoekt", ".././testdata/shards/repo2_v16.00000.zoekt", } + v17Shards := []string{ + ".././testdata/shards/repo_v17.00000.zoekt", + ".././testdata/shards/repo2_v17.00000.zoekt", + } // repo name -> IndexMetadata m := make(map[string]*zoekt.IndexMetadata, 2) // merge var files []IndexFile - for _, fn := range simpleShards { + for _, fn := range v16Shards { f, err := os.Open(fn) if err != nil { t.Fatal(err) @@ -89,7 +94,8 @@ func TestExplode(t *testing.T) { } } - for _, s := range simpleShards { + // Compare exploded v17 shards with expected v17 golden files + for _, s := range v17Shards { checkSameShards(t, s, filepath.Join(tmpDir, filepath.Base(s))) } } @@ -98,13 +104,23 @@ func TestExplode(t *testing.T) { // small enough to be read in all at once. func checkSameShards(t *testing.T, shard1, shard2 string) { t.Helper() - b1, err := os.ReadFile(shard1) + + b2, err := os.ReadFile(shard2) if err != nil { t.Fatal(err) } - b2, err := os.ReadFile(shard2) + b1, err := os.ReadFile(shard1) if err != nil { + if os.IsNotExist(err) && *update { + // Golden file doesn't exist, create it in update mode + t.Logf("creating new golden file %s", shard1) + err := os.WriteFile(shard1, b2, 0o600) + if err != nil { + t.Fatal(err) + } + return + } t.Fatal(err) } diff --git a/index/read.go b/index/read.go index 3a9c05b91..8cdc8f0c7 100644 --- a/index/read.go +++ b/index/read.go @@ -130,20 +130,33 @@ func (r *reader) readTOCSections(toc *indexTOC, tags []string) error { skipSection := len(tags) > 0 && !slices.Contains(tags, tag) sec := secs[tag] if sec == nil || sec.kind() != sectionKind(kind) { - // If we don't recognize the section, we may be reading a newer index than the current version. Use - // a "dummy section" struct to skip over it. - skipSection = true - log.Printf("encountered unrecognized index section (%s), skipping over it", tag) - - switch sectionKind(kind) { - case sectionKindSimple: - sec = &simpleSection{} - case sectionKindCompound: - sec = &compoundSection{} - case sectionKindCompoundLazy: - sec = &lazyCompoundSection{} - default: - return fmt.Errorf("unknown section kind %d", kind) + // Special case: branchMasks changed from simpleSection (v16-17) to compoundSection (v18+) + // If we encounter a simple branchMasks, read it into branchMasksSimple for backward compat + if tag == "branchMasks" && sectionKind(kind) == sectionKindSimple { + if compat := secs["branchMasks-compat-simple"]; compat != nil { + sec = compat + skipSection = false + } else { + // Fallback: create a temporary simple section to read it + sec = &simpleSection{} + skipSection = false + } + } else { + // If we don't recognize the section, we may be reading a newer index than the current version. Use + // a "dummy section" struct to skip over it. + skipSection = true + log.Printf("encountered unrecognized index section (%s), skipping over it", tag) + + switch sectionKind(kind) { + case sectionKindSimple: + sec = &simpleSection{} + case sectionKindCompound: + sec = &compoundSection{} + case sectionKindCompoundLazy: + sec = &lazyCompoundSection{} + default: + return fmt.Errorf("unknown section kind %d", kind) + } } } @@ -248,15 +261,15 @@ func (r *reader) readJSON(data any, sec simpleSection) error { // canReadVersion returns checks if zoekt can read in md. If it can't a // non-nil error is returned. func canReadVersion(md *zoekt.IndexMetadata) bool { - // Backwards compatible with v16 - return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion + // Backwards compatible with v16 and v17 + return md.IndexFormatVersion >= 16 && md.IndexFormatVersion <= IndexFormatVersion } func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { d := indexData{ file: r.r, - branchIDs: []map[string]uint{}, - branchNames: []map[uint]string{}, + branchIDs: []map[string]int{}, + branchNames: []map[int]string{}, // docMatchTreeCache is disabled by default. // The number of max entries can be set with environment variable ZOEKT_DOCMATCHTREE_CACHE @@ -330,9 +343,42 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { return nil, err } - d.fileBranchMasks, err = readSectionU64(d.file, toc.branchMasks) - if err != nil { - return nil, err + // Read branch masks - format depends on version + if toc.branchMasks.data.sz > 0 { + // New format (v17+): variable-length byte arrays in compound section + branchMasksIndex := toc.branchMasks.relativeIndex() + numDocs := len(branchMasksIndex) - 1 + d.fileBranchMasks = make([][]byte, numDocs) + for i := 0; i < numDocs; i++ { + maskOff := branchMasksIndex[i] + maskSz := branchMasksIndex[i+1] - maskOff + if maskSz > 0 { + d.fileBranchMasks[i], err = d.file.Read(toc.branchMasks.data.off+maskOff, maskSz) + if err != nil { + return nil, err + } + } else { + d.fileBranchMasks[i] = []byte{} + } + } + } else if toc.branchMasksSimple.sz > 0 { + // Old format (v16): uint64 array in simple section + // We need to convert uint64 masks to byte arrays + oldMasks, err := readSectionU64(d.file, toc.branchMasksSimple) + if err != nil { + return nil, err + } + d.fileBranchMasks = make([][]byte, len(oldMasks)) + for i, mask := range oldMasks { + // Convert uint64 to byte array (8 bytes, little-endian to match bit positions) + d.fileBranchMasks[i] = make([]byte, 8) + for j := 0; j < 8; j++ { + d.fileBranchMasks[i][j] = byte(mask >> (8 * j)) + } + } + } else { + // No branch masks section (shouldn't happen in valid indexes) + d.fileBranchMasks = [][]byte{} } d.fileNameContent, err = d.readSectionBlob(toc.fileNames.data) @@ -348,12 +394,11 @@ func (r *reader) readIndexData(toc *indexTOC) (*indexData, error) { } for _, md := range d.repoMetaData { - repoBranchIDs := make(map[string]uint, len(md.Branches)) - repoBranchNames := make(map[uint]string, len(md.Branches)) + repoBranchIDs := make(map[string]int, len(md.Branches)) + repoBranchNames := make(map[int]string, len(md.Branches)) for j, br := range md.Branches { - id := uint(1) << uint(j) - repoBranchIDs[br.Name] = id - repoBranchNames[id] = br.Name + repoBranchIDs[br.Name] = j + repoBranchNames[j] = br.Name } d.branchIDs = append(d.branchIDs, repoBranchIDs) d.branchNames = append(d.branchNames, repoBranchNames) @@ -440,6 +485,7 @@ func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSectio return nil, &md, fmt.Errorf("failed to read meta file: %w", err) } + fromMetaFile := len(blob) > 0 if len(blob) == 0 { blob, err = r.r.Read(repoMetaData.off, repoMetaData.sz) if err != nil { @@ -448,9 +494,24 @@ func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSectio } var repos []*zoekt.Repository + // Repository metadata format: + // - v16: single repository object + // - v17+: array of repositories (for compound shards) OR single repository (for delta .meta files) + // + // For v17+ shards with .meta files, we need to handle both array and single object formats + // because different code paths write different formats (mergeMeta writes array, delta builds write single object) if md.IndexFormatVersion >= 17 { + // Try array format first if err := json.Unmarshal(blob, &repos); err != nil { - return nil, &md, err + // If that fails and we're reading from .meta file, try single repository format + if fromMetaFile { + repos = make([]*zoekt.Repository, 1) + if err := json.Unmarshal(blob, &repos[0]); err != nil { + return nil, &md, err + } + } else { + return nil, &md, err + } } } else { repos = make([]*zoekt.Repository, 1) diff --git a/index/read_test.go b/index/read_test.go index 087b8a65b..c30cdc2bd 100644 --- a/index/read_test.go +++ b/index/read_test.go @@ -151,7 +151,7 @@ func TestGet(t *testing.T) { t.Fatalf("readIndexData: %v", err) } - var off uint32 = 96 + var off uint32 = 93 // Updated for v17 format (branchMasks as compoundSection) cases := []struct { ng string diff --git a/index/shard_builder.go b/index/shard_builder.go index 55b82bb32..ebd911c2c 100644 --- a/index/shard_builder.go +++ b/index/shard_builder.go @@ -186,7 +186,7 @@ type ShardBuilder struct { checksums []byte - branchMasks []uint64 + branchMasks [][]byte subRepos []uint32 // docID => repoID @@ -301,10 +301,6 @@ func (b *ShardBuilder) setRepository(desc *zoekt.Repository) error { return err } - if len(desc.Branches) > 64 { - return fmt.Errorf("too many branches") - } - repo := *desc // copy subrepomap without root @@ -469,13 +465,14 @@ func (b *ShardBuilder) Add(doc Document) error { return fmt.Errorf("unknown subrepo path %q", doc.SubRepositoryPath) } - var mask uint64 + repo := &b.repoList[len(b.repoList)-1] + mask := newBranchMask(len(repo.Branches)) for _, br := range doc.Branches { - m := b.branchMask(br) - if m == 0 { + bit := b.branchBit(br) + if bit < 0 { return fmt.Errorf("no branch found for %s", br) } - mask |= m + setBit(mask, bit) } if repoIdx > 1<<16 { @@ -516,13 +513,13 @@ func (b *ShardBuilder) Add(doc Document) error { return nil } -func (b *ShardBuilder) branchMask(br string) uint64 { +func (b *ShardBuilder) branchBit(br string) int { for i, b := range b.repoList[len(b.repoList)-1].Branches { if b.Name == br { - return uint64(1) << uint(i) + return i } } - return 0 + return -1 } // repoIDs returns a list of sourcegraph IDs for the indexed repos. If the ID diff --git a/index/toc.go b/index/toc.go index f42887939..3c1661cdc 100644 --- a/index/toc.go +++ b/index/toc.go @@ -28,7 +28,8 @@ package index // 14: languages // 15: rune based symbol sections // 16: ctags metadata -const IndexFormatVersion = 16 +// 17: compound shards (multi repo) + variable-length branch masks +const IndexFormatVersion = 17 // FeatureVersion is increased if a feature is added that requires reindexing data // without changing the format version @@ -65,8 +66,8 @@ const WriteMinFeatureVersion = 10 // load a file with a FeatureVersion below it. const ReadMinFeatureVersion = 8 -// 17: compound shard (multi repo) -const NextIndexFormatVersion = 17 +// 18: future format (placeholder for next version) +const NextIndexFormatVersion = 18 type indexTOC struct { fileContents compoundSection @@ -85,8 +86,9 @@ type indexTOC struct { symbolKindMap compoundSection symbolMetaData simpleSection - branchMasks simpleSection - subRepos simpleSection + branchMasks compoundSection + branchMasksSimple simpleSection // For backward compatibility with v16-17 + subRepos simpleSection nameNgramText simpleSection namePostings compoundSection @@ -123,7 +125,7 @@ func (t *indexTOC) sections() []section { &t.postings, &t.nameNgramText, &t.namePostings, - &t.branchMasks, + &t.branchMasksSimple, // Old format used simpleSection &t.subRepos, &t.runeOffsets, &t.nameRuneOffsets, @@ -152,6 +154,8 @@ func (t *indexTOC) sectionsTagged() map[string]section { for _, ent := range t.sectionsTaggedCompatibilityList() { out[ent.tag] = ent.sec } + // Special alias for backward compatibility: allow reading old simpleSection branchMasks + out["branchMasks-compat-simple"] = &t.branchMasksSimple return out } diff --git a/index/write.go b/index/write.go index bd604fc66..de85ea702 100644 --- a/index/write.go +++ b/index/write.go @@ -109,7 +109,8 @@ func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, } func (b *ShardBuilder) Write(out io.Writer) error { - next := b.indexFormatVersion == NextIndexFormatVersion + // Compound shards (multiple repos) supported in v17+ + next := b.indexFormatVersion >= 17 buffered := bufio.NewWriterSize(out, 1<<20) defer buffered.Flush() @@ -140,7 +141,7 @@ func (b *ShardBuilder) Write(out io.Writer) error { toc.branchMasks.start(w) for _, m := range b.branchMasks { - w.U64(m) + toc.branchMasks.addItem(w, m) } toc.branchMasks.end(w) diff --git a/testdata/shards/repo2_v17.00000.zoekt b/testdata/shards/repo2_v17.00000.zoekt new file mode 100644 index 0000000000000000000000000000000000000000..aeac781988cac07de74894f7779af3a46f9f69f0 GIT binary patch literal 2940 zcmZXW%WoT16voduV<&c<1WI|+Fbu_Vs1wFc9Vdo>n}oE1qzy?ZrR9-{J>w4X%(&xm zDTIJQ1Z)QCJ2oj5m6%s6178VHc5F7pgzH?_ns2Jtv&-a{r&pr3t zx!0-dUC&q2iMVRozMAMyB+RB$7T$EfvYLn=(>-N5qm&iP@s%+p zn^AI!IQkCGd9c)+G&?aYZhB42SBhhbs@Ll^L%q0{@b)Lx5`?EaDaAV@QqSd=Nn5u? zU^u~6JQ2yX0$RSbibeL2`*q*5o?bp?a)6 z>!cH2=U8q=JJM_0H zs&Kq^7$3S$SoazBBaYBDR+(s(=p1VsS$bfeco$4rmV{Pu3FRNhgE2(#sqN&yO*$T=`Hrd4&}V1Q(O<9 z6>*wE+o+TDAtl=!_YC_n4|JXT3&o}i#ke!n!|MsOpQKN@epQmr@pzRL?bssPiT##uQ+eyvCi?!tlcfH6Z8V=4aI!u zcb?acc<00Voe$$c+nm>T>_?tLi=mDQ6?OV9tRr-t=GcyTP^TX_9_ELx^ZY+@9+>wJ zvA2+r$??8&cGvd8j>Sl$R(UQnRVUeX2Gra02Dq!5yFkc7t&+0VctnU=P>}romlc2J8d- z!5p{;%!37RFE{}11NVbN;4pX)JOmyFOF#$9pbT)U$VY$$9sw($25f*6EguCqfpP=* z0JoBS96Sad2TuT;VEIY#6lj5`!870lcosYdo(Ctv3*bfYGI#~N3SI-RgEzpN;4SbD zI0fDZAAk=dRFPT|U(SP7mne2C;d)43;A3$jd_BeS!m-#mHaHIJRF$O&bzp!4cpz34 z!>VGQL{g_jsXrG^N8bAlHIc-PEzznv?Np58Q?6(0{sF^lSgxZMvxQ7(H*~-08Jmt= z=$N&fc|)%l-lj_nU5l1&s6{oW{US_qb|NTg#}2Tw`T3I~7BxCox6&*sf~9ulq+w zk2>YTkl)BZQdQRuud0iNqdPwL;iLlDr;D^aqweMq;NJNeM8KC>!`Caiud79;S*vB# z1-I#yc>x^z~YYkCx-)Q*38YY9}HBHM7 zh`&Wm!%pGvkh+G0Mbu@ZTIhy1h+-e(+u2mS)0;CJ?R^T0cuO0X(e2uFvy;tjD%>_`Mn{%1x8 zpRlVtaNTV=6}UOzS+E6s0g^r8$j$ueYuJ@8$9S}cy%M?OX!CCb;mDF4cdDLlFB->j zK@j~#)S-3N-G*;DRqo99De?Vq8f7@u?O>7n{xcvp1$R2h=DE#5d|Y{vF-Q6Ju3+NE^JaS zr#9)9H}(^@816ZWM){=?V^JoG^oZv4WMka&Z5?-3_{4b?<2BdC1sy)gFsF;HQB-V& N<83@$-y-bUe*m0gM)Cjv literal 0 HcmV?d00001 diff --git a/testdata/shards/repo_v17.00000.zoekt b/testdata/shards/repo_v17.00000.zoekt new file mode 100644 index 0000000000000000000000000000000000000000..3b58b7427f8de81a21cc71f86f42fc8c4a6bb0ac GIT binary patch literal 2760 zcmZXV4Qw3M5yxlG-o$sgJM&daftKy^A)7kR+OZSosHw4y!NN|AZJ-1wd3X2jeE4?v zw7UmE#>kaG0*UXYxhn`WSwl5sTxTo>r^FbAto%^C`DXPjpySmb%W#7qB^%leoD>~Z6oA0L$sYG>RjNU9?_o2|0C9$L`rtE zGR^gCmr!yOO)I$<>tT$#9hR6kNV|glY?i4_$Y0yS7R$ac~e!u~<(M6EoAL_G`K55!wwd*laMWBm@^ zU&P%7iu90%*XtM+qZl{w!2{ek46Ck&Ydcly4S zG1i~kU*1qWV`a||eylZGR=oeaC&$(-EBvs-I{5e#bxR(XPK<19=soB7qoz+M$w_A&IrO9bX}|BCdBAF`(>m~w)6YKYyrIKSC2F`DTm!BJca1*#0Yz4P~8n_MI4(;k_5FN59S74Tc|JFo}*9{d5k3SI+$ z1h0cPz@NceU@v$Z{000KyaWCQ{to^D{t5mC{tey*{{ioT_rV9?L+}y!7<>Xg1)qV> z!5835@KucRT7_@j_M%IqU(9cIaxb6g#Wz*GF*wnSCm$!dPQ^woMh$R419db|M+0>< zP=7=tZMjCbzPcxS*Vf+6L>SYIIqc-+<{{U!MR2kzR_R>dB>v&S+w^ zFUNXV_9aKIaWqlCrk_s51X05 ztI9TWL^#DN{x(KM#mvl@C%I++1m4V2MK=5(}7wef>;h9)-!mWBauAqHY zApL-CRrQ+f;7V`OZot*`d zmD!G-byoM@zk(?H==r2;XZ^r+kYuD8LB@U%7BLuR zZ&_ByO7)qvEbJaW8_n5s2v3uZ7Q#&6xBe!+b|tw_+}KFF3@^A=P8Lu;3JOrDudyed zNais!esB37NylYJ2d&7e6E`aSX|+5j z%!ywG8k_v^`1u&bEt`K` ziyM<-`{lw79GH^1X}@YKd)%LBcsY7^l?qoT#S%Fjo}%lO!mzkLNo(fR2Ho7^enO|? zP9;s}L!*5E#J*`Y93J%hOMxx;n1oN9)9Np~uDz(CWR%msS{)sm=6H)w)4>RP@BaWC C;$Is8 literal 0 HcmV?d00001