Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,29 @@ func (ac *arrayContainer) rank(x uint16) int {
return -answer - 1
}

// getCardinalityInRange returns the number of values in the half-open range [start, end).
func (ac *arrayContainer) getCardinalityInRange(start, end uint) int {
if start >= end {
return 0
}
// Find the first index >= start
loIdx := binarySearch(ac.content, uint16(start))
if loIdx < 0 {
loIdx = -loIdx - 1
}
// end can be up to 65536 (1<<16), which overflows uint16.
// In that case, all elements from loIdx onward are included.
if end > MaxUint16 {
return len(ac.content) - loIdx
}
// Find the first index >= end (i.e., past the last included value)
hiIdx := binarySearch(ac.content, uint16(end))
if hiIdx < 0 {
hiIdx = -hiIdx - 1
}
return hiIdx - loIdx
}

func (ac *arrayContainer) selectInt(x uint16) int {
return int(ac.content[x])
}
Expand Down
52 changes: 52 additions & 0 deletions benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1304,3 +1304,55 @@ func BenchmarkAndNot(b *testing.B) {
})
}
}

// BenchmarkCardinalityInRange compares CardinalityInRange vs the naïve Rank(end-1)-Rank(start-1) approach
// across different bitmap sizes and range widths to demonstrate the O(log n + k) vs O(n)
// difference.
func BenchmarkCardinalityInRange(b *testing.B) {
// Build bitmaps of varying sizes: each has numContainers containers with 100 values each.
for _, numContainers := range []int{10, 100, 1000} {
// Build the bitmap: numContainers containers, each with 100 values.
rb := NewBitmap()
for c := 0; c < numContainers; c++ {
base := uint32(c) << 16 // each container has a different high-16-bit key
for v := uint32(0); v < 100; v++ {
rb.Add(base + v*10)
}
}

for _, rangeContainers := range []int{1, 10, 100, 1000} {
if rangeContainers > numContainers {
continue
}
// Place the range in the middle of the bitmap so both methods do real work.
mid := numContainers / 2
rangeStart := uint64(uint32(mid-(rangeContainers/2)) << 16)
rangeEnd := uint64(uint32(mid+(rangeContainers+1)/2) << 16)

label := fmt.Sprintf("containers=%d/rangeSpan=%d", numContainers, rangeContainers)

b.Run(label+"/RankViaTwoRanks", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := rb.Rank(uint32(rangeEnd - 1))
if rangeStart > 0 {
r -= rb.Rank(uint32(rangeStart - 1))
}
if r == 0 && numContainers == 0 {
b.Fatal("unexpected") // prevent dead-code elimination
}
}
})

b.Run(label+"/CardinalityInRange", func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := rb.CardinalityInRange(rangeStart, rangeEnd)
if r == 0 && numContainers == 0 {
b.Fatal("unexpected") // prevent dead-code elimination
}
}
})
}
}
}
73 changes: 73 additions & 0 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,79 @@ func (rb *Bitmap) Rank(x uint32) uint64 {
return size
}

// CardinalityInRange returns the number of integers that are in the half-open range [start, end).
// It is equivalent to Rank(uint32(end-1)) - Rank(uint32(start-1)) for start > 0,
// but is optimized to only scan containers that overlap the range, making it
// O(k) in the number of containers spanned by [start, end) rather than O(n)
// in total containers. The parameter type is uint64 to allow end = 1<<32
// (the full 32-bit range).
func (rb *Bitmap) CardinalityInRange(start, end uint64) uint64 {
if start >= end {
return 0
}
if end > MaxUint32+1 {
end = MaxUint32 + 1
}

hbStart := highbits(uint32(start))
hbEnd := highbits(uint32(end - 1)) // end-1 is the last included value

size := rb.highlowcontainer.size()

// Binary-search to find the first container index >= hbStart.
startIdx := rb.highlowcontainer.getIndex(hbStart)
if startIdx < 0 {
startIdx = -startIdx - 1 // insertion point
}
if startIdx >= size {
return 0
}

result := uint64(0)

// Handle the case where start and end are in the same container.
if hbStart == hbEnd {
key := rb.highlowcontainer.getKeyAtIndex(startIdx)
if key == hbStart {
lo := uint(lowbits(uint32(start)))
hi := uint(lowbits(uint32(end-1))) + 1
return uint64(rb.highlowcontainer.getContainerAtIndex(startIdx).getCardinalityInRange(lo, hi))
}
return 0
}

// Handle the first container (may be partial).
key := rb.highlowcontainer.getKeyAtIndex(startIdx)
if key == hbStart {
lo := uint(lowbits(uint32(start)))
result += uint64(rb.highlowcontainer.getContainerAtIndex(startIdx).getCardinalityInRange(lo, 1<<16))
startIdx++
}

// Binary-search to find the last container index <= hbEnd.
endIdx := rb.highlowcontainer.getIndex(hbEnd)
endPresent := endIdx >= 0
if endIdx < 0 {
endIdx = -endIdx - 2 // index of the last container with key < hbEnd
}

// Tight loop over middle containers — no per-iteration key comparisons.
for i := startIdx; i <= endIdx; i++ {
if endPresent && i == endIdx {
break // this is the end container, handled below
}
result += uint64(rb.highlowcontainer.getContainerAtIndex(i).getCardinality())
}

// Handle the last container (may be partial).
if endPresent {
hi := uint(lowbits(uint32(end-1))) + 1
result += uint64(rb.highlowcontainer.getContainerAtIndex(endIdx).getCardinalityInRange(0, hi))
}

return result
}

// Select returns the xth integer in the bitmap. If you pass 0, you get
// the smallest element. Note that this function differs in convention from
// the Rank function which returns 1 on the smallest value.
Expand Down
146 changes: 146 additions & 0 deletions roaring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -877,6 +877,152 @@ func TestBitmapRank2(t *testing.T) {
assert.EqualValues(t, 32, rank)
}

// TestCardinalityInRangeEquivalence verifies that CardinalityInRange(start, end) == Rank(end-1) - Rank(start-1)
// for many inputs, across all container types.
func TestCardinalityInRangeEquivalence(t *testing.T) {
for N := uint32(1); N <= 1048576; N *= 2 {
t.Run("N="+strconv.Itoa(int(N)), func(t *testing.T) {
for gap := uint32(1); gap <= 65536; gap *= 2 {
rb := NewBitmap()
for x := uint32(0); x <= N; x += gap {
rb.Add(x)
}
// Test various [start, end) ranges
testPoints := []uint64{0, 1, 63, 64, 65, 100, 1000, uint64(N / 2), uint64(N), uint64(N + 1), 1 << 16, (1 << 16) + 1}
for _, s := range testPoints {
for _, e := range testPoints {
if s >= e {
continue
}
got := rb.CardinalityInRange(s, e)
// Compute expected via two Rank calls.
var expected uint64
if s == 0 {
expected = rb.Rank(uint32(e - 1))
} else {
expected = rb.Rank(uint32(e-1)) - rb.Rank(uint32(s-1))
}
if got != expected {
t.Errorf("CardinalityInRange(%d, %d) = %d, want %d (N=%d, gap=%d)",
s, e, got, expected, N, gap)
}
}
}
}
})
}
}

// TestCardinalityInRangeEmpty tests that empty ranges return 0.
func TestCardinalityInRangeEmpty(t *testing.T) {
rb := NewBitmap()
rb.AddRange(100, 200)
assert.EqualValues(t, 0, rb.CardinalityInRange(50, 50)) // empty range
assert.EqualValues(t, 0, rb.CardinalityInRange(100, 100)) // empty range
assert.EqualValues(t, 0, rb.CardinalityInRange(200, 100)) // inverted range
assert.EqualValues(t, 0, rb.CardinalityInRange(300, 400)) // no values in range
assert.EqualValues(t, 0, rb.CardinalityInRange(0, 100)) // no values before the range
}

// TestCardinalityInRangeSingleContainer tests ranges within a single container.
func TestCardinalityInRangeSingleContainer(t *testing.T) {
rb := NewBitmap()
for i := uint32(0); i < 100; i++ {
rb.Add(i * 3) // values: 0, 3, 6, 9, ..., 297
}
// All within container 0 (high bits = 0)
assert.EqualValues(t, 100, rb.CardinalityInRange(0, 300)) // all values
assert.EqualValues(t, 1, rb.CardinalityInRange(0, 1)) // just 0
assert.EqualValues(t, 1, rb.CardinalityInRange(0, 3)) // just 0
assert.EqualValues(t, 2, rb.CardinalityInRange(0, 4)) // 0 and 3
assert.EqualValues(t, 1, rb.CardinalityInRange(3, 4)) // just 3
assert.EqualValues(t, 1, rb.CardinalityInRange(3, 6)) // just 3
assert.EqualValues(t, 2, rb.CardinalityInRange(3, 7)) // 3 and 6
}

// TestCardinalityInRangeMultiContainer tests ranges spanning multiple containers (high 16 bits).
func TestCardinalityInRangeMultiContainer(t *testing.T) {
rb := NewBitmap()
// Put values in 3 different containers:
// Container 0 (key=0): values 0..99
for i := uint32(0); i < 100; i++ {
rb.Add(i)
}
// Container 1 (key=1): values 65536..65635
for i := uint32(0); i < 100; i++ {
rb.Add(65536 + i)
}
// Container 3 (key=3): values 196608..196707 (skip container 2)
for i := uint32(0); i < 100; i++ {
rb.Add(196608 + i)
}

// Entire range
assert.EqualValues(t, 300, rb.CardinalityInRange(0, 200000))
// Just container 0
assert.EqualValues(t, 100, rb.CardinalityInRange(0, 65536))
// Just container 1
assert.EqualValues(t, 100, rb.CardinalityInRange(65536, 131072))
// Containers 0 and 1
assert.EqualValues(t, 200, rb.CardinalityInRange(0, 131072))
// Partial container 0 + full container 1 + partial container 3
assert.EqualValues(t, 50+100+50, rb.CardinalityInRange(50, 196658))
// Range in the gap (container 2 doesn't exist)
assert.EqualValues(t, 0, rb.CardinalityInRange(131072, 196608))
// Range spanning the gap
assert.EqualValues(t, 100+100, rb.CardinalityInRange(65536, 196708))
}

// TestCardinalityInRangeRunOptimized tests CardinalityInRange with run-compressed containers.
func TestCardinalityInRangeRunOptimized(t *testing.T) {
rb := NewBitmap()
// Create a large consecutive range which will be run-optimized.
rb.AddRange(1000, 5000)
rb.AddRange(70000, 80000) // second container (key=1)
rb.RunOptimize()

assert.EqualValues(t, 4000, rb.CardinalityInRange(1000, 5000))
assert.EqualValues(t, 10000, rb.CardinalityInRange(70000, 80000))
assert.EqualValues(t, 100, rb.CardinalityInRange(1000, 1100))
assert.EqualValues(t, 4000+10000, rb.CardinalityInRange(0, 100000))
assert.EqualValues(t, 0, rb.CardinalityInRange(5000, 70000)) // gap between runs
assert.EqualValues(t, 4000+10000, rb.CardinalityInRange(1000, 80000))
// Partial into both
assert.EqualValues(t, 3000+5000, rb.CardinalityInRange(2000, 75000))
}

// TestCardinalityInRangeBitmapContainer tests CardinalityInRange with bitmap containers (>4096 values in one container).
func TestCardinalityInRangeBitmapContainer(t *testing.T) {
rb := NewBitmap()
// Add enough values to trigger bitmap container: every other value in [0, 16384).
for i := uint32(0); i < 16384; i += 2 {
rb.Add(i)
}
// 8192 values: 0, 2, 4, ..., 16382

assert.EqualValues(t, 8192, rb.CardinalityInRange(0, 16384))
assert.EqualValues(t, 1, rb.CardinalityInRange(0, 1)) // just 0
assert.EqualValues(t, 1, rb.CardinalityInRange(0, 2)) // just 0
assert.EqualValues(t, 2, rb.CardinalityInRange(0, 3)) // 0, 2
assert.EqualValues(t, 50, rb.CardinalityInRange(0, 100)) // 0, 2, 4, ..., 98
assert.EqualValues(t, 50, rb.CardinalityInRange(100, 200)) // 100, 102, ..., 198
assert.EqualValues(t, 0, rb.CardinalityInRange(1, 2)) // no even values in [1, 2)
assert.EqualValues(t, 1, rb.CardinalityInRange(1, 3)) // just 2
}

// TestCardinalityInRangeFullUint32Range tests CardinalityInRange with end = 1<<32 (full uint32 range).
func TestCardinalityInRangeFullUint32Range(t *testing.T) {
rb := NewBitmap()
rb.Add(0)
rb.Add(0xFFFFFFFF) // MaxUint32
rb.Add(0x80000000) // mid-point

assert.EqualValues(t, 3, rb.CardinalityInRange(0, 1<<32))
assert.EqualValues(t, 1, rb.CardinalityInRange(0, 1))
assert.EqualValues(t, 1, rb.CardinalityInRange(0xFFFFFFFF, 1<<32))
assert.EqualValues(t, 2, rb.CardinalityInRange(0x80000000, 1<<32))
}

func TestBitmapRank(t *testing.T) {
for N := uint32(1); N <= 1048576; N *= 2 {
t.Run("rank tests"+strconv.Itoa(int(N)), func(t *testing.T) {
Expand Down
5 changes: 5 additions & 0 deletions roaringarray.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ type container interface {
// smaller or equal to x. rank(infinity) would be getCardinality().
rank(uint16) int

// getCardinalityInRange returns the number of integers that are
// within the half-open range [start, end). It is equivalent to
// rank(end-1) - rank(start-1) but may be faster.
getCardinalityInRange(start, end uint) int

iadd(x uint16) bool // inplace, returns true if x was new.
iaddReturnMinimized(uint16) container // may change return type to minimize storage.

Expand Down
Loading
Loading