Skip to content

Commit e52a475

Browse files
author
Anton Kutuzov
committed
Improve overlap percent estimation for low-density ranges in StatisticRange
1 parent 81f8713 commit e52a475

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

core/trino-main/src/main/java/io/trino/cost/StatisticRange.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public class StatisticRange
3232
{
3333
private static final double INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.25;
3434
private static final double INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.5;
35+
private static final double DENSITY_HEURISTIC_THRESHOLD = 1e-3;
3536

3637
// TODO unify field and method names with SymbolStatsEstimate
3738
/**
@@ -122,7 +123,15 @@ public double overlapPercentWith(StatisticRange other)
122123
if (isInfinite(length()) && isFinite(lengthOfIntersect)) {
123124
return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR;
124125
}
126+
125127
if (lengthOfIntersect > 0) {
128+
double thisDensity = this.distinctValues / length();
129+
double otherDensity = other.distinctValues / other.length();
130+
double minDensity = minExcludeNaN(thisDensity, otherDensity);
131+
132+
if (!isNaN(minDensity) && minDensity < DENSITY_HEURISTIC_THRESHOLD && isFinite(length())) {
133+
return minExcludeNaN(this.distinctValues, other.distinctValues) / this.distinctValues;
134+
}
126135
return lengthOfIntersect / length();
127136
}
128137

core/trino-main/src/test/java/io/trino/cost/TestStatisticRange.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static java.lang.Double.NaN;
2121
import static java.lang.Double.POSITIVE_INFINITY;
2222
import static org.assertj.core.api.Assertions.assertThat;
23+
import static org.assertj.core.api.AssertionsForClassTypes.within;
2324

2425
public class TestStatisticRange
2526
{
@@ -59,6 +60,55 @@ public void testOverlapPercentWith()
5960
assertOverlap(unboundedRange(0.0), unboundedRange(0), 0);
6061
}
6162

63+
@Test
64+
public void testLowDensityOverlap()
65+
{
66+
StatisticRange sparseRange = range(1, 3662098119.0, 14);
67+
StatisticRange filterRange = range(1, 4, 4);
68+
69+
double expectedOverlap = 4.0 / 14.0;
70+
assertOverlap(sparseRange, filterRange, expectedOverlap);
71+
}
72+
73+
@Test
74+
public void testDensityThresholdBoundary()
75+
{
76+
StatisticRange boundaryRange = range(0, 10000, 10);
77+
StatisticRange smallFilter = range(0, 100, 5);
78+
79+
double overlap = boundaryRange.overlapPercentWith(smallFilter);
80+
assertThat(overlap).isBetween(0.01, 0.5);
81+
}
82+
83+
@Test
84+
public void testHighDensityOverlap()
85+
{
86+
StatisticRange denseRange = range(0, 100, 50);
87+
StatisticRange filterRange = range(20, 30, 5);
88+
89+
assertOverlap(denseRange, filterRange, 0.1);
90+
}
91+
92+
@Test
93+
public void testVeryLowDensity()
94+
{
95+
StatisticRange verySparse = range(0, 1e9, 10);
96+
StatisticRange filterRange = range(100, 200, 5);
97+
98+
double expected = 5.0 / 10.0;
99+
double actual = verySparse.overlapPercentWith(filterRange);
100+
assertThat(actual).isCloseTo(expected, within(0.1));
101+
}
102+
103+
@Test
104+
public void testDensityWithZeroDistinctValues()
105+
{
106+
StatisticRange zeroDistinct = range(0, 1000, 0);
107+
StatisticRange filterRange = range(100, 200, 5);
108+
109+
assertOverlap(zeroDistinct, filterRange, 0);
110+
}
111+
62112
@Test
63113
public void testIntersect()
64114
{

0 commit comments

Comments
 (0)