Skip to content

Commit b283201

Browse files
authored
Unrolled build for #149946
Rollup merge of #149946 - Zalathar:buckets, r=Nadrieril mir_build: Move and rename code for partitioning match candidates I've always found it confusing that this code uses the word “sort” to describe partitioning candidates into buckets, since it isn't *sorting* them in the usual sense. This PR therefore renames the relevant methods: - `sort_candidates` → `partition_candidates_into_buckets` - `sort_candidate` → `choose_bucket_for_candidate` In addition, this PR moves those methods into their own `buckets` module, updates some comments to reflect the renaming, and also makes `partition_candidates_into_buckets` return a named struct instead of a tuple. There should be no change to compiler behaviour.
2 parents 693f365 + bc11e7e commit b283201

File tree

3 files changed

+362
-344
lines changed

3 files changed

+362
-344
lines changed
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
use std::cmp::Ordering;
2+
3+
use rustc_data_structures::fx::FxIndexMap;
4+
use rustc_middle::mir::{BinOp, Place};
5+
use rustc_middle::span_bug;
6+
use tracing::debug;
7+
8+
use crate::builder::Builder;
9+
use crate::builder::matches::test::is_switch_ty;
10+
use crate::builder::matches::{Candidate, Test, TestBranch, TestCase, TestKind};
11+
12+
/// Output of [`Builder::partition_candidates_into_buckets`].
13+
pub(crate) struct PartitionedCandidates<'tcx, 'b, 'c> {
14+
/// For each possible outcome of the test, the candidates that are matched in that outcome.
15+
pub(crate) target_candidates: FxIndexMap<TestBranch<'tcx>, Vec<&'b mut Candidate<'tcx>>>,
16+
/// The remaining candidates that weren't associated with any test outcome.
17+
pub(crate) remaining_candidates: &'b mut [&'c mut Candidate<'tcx>],
18+
}
19+
20+
impl<'a, 'tcx> Builder<'a, 'tcx> {
21+
/// Given a test, we partition the input candidates into several buckets.
22+
/// If a candidate matches in exactly one of the branches of `test`
23+
/// (and no other branches), we put it into the corresponding bucket.
24+
/// If it could match in more than one of the branches of `test`, the test
25+
/// doesn't usefully apply to it, and we stop partitioning candidates.
26+
///
27+
/// Importantly, we also **mutate** the branched candidates to remove match pairs
28+
/// that are entailed by the outcome of the test, and add any sub-pairs of the
29+
/// removed pairs.
30+
///
31+
/// For example:
32+
/// ```
33+
/// # let (x, y, z) = (true, true, true);
34+
/// match (x, y, z) {
35+
/// (true , _ , true ) => true, // (0)
36+
/// (false, false, _ ) => false, // (1)
37+
/// (_ , true , _ ) => true, // (2)
38+
/// (true , _ , false) => false, // (3)
39+
/// }
40+
/// # ;
41+
/// ```
42+
///
43+
/// Assume we are testing on `x`. Conceptually, there are 2 overlapping candidate sets:
44+
/// - If the outcome is that `x` is true, candidates {0, 2, 3} are possible
45+
/// - If the outcome is that `x` is false, candidates {1, 2} are possible
46+
///
47+
/// Following our algorithm:
48+
/// - Candidate 0 is bucketed into outcome `x == true`
49+
/// - Candidate 1 is bucketed into outcome `x == false`
50+
/// - Candidate 2 remains unbucketed, because testing `x` has no effect on it
51+
/// - Candidate 3 remains unbucketed, because a previous candidate (2) was unbucketed
52+
/// - This helps preserve the illusion that candidates are tested "in order"
53+
///
54+
/// The bucketed candidates are mutated to remove entailed match pairs:
55+
/// - candidate 0 becomes `[z @ true]` since we know that `x` was `true`;
56+
/// - candidate 1 becomes `[y @ false]` since we know that `x` was `false`.
57+
pub(super) fn partition_candidates_into_buckets<'b, 'c>(
58+
&mut self,
59+
match_place: Place<'tcx>,
60+
test: &Test<'tcx>,
61+
mut candidates: &'b mut [&'c mut Candidate<'tcx>],
62+
) -> PartitionedCandidates<'tcx, 'b, 'c> {
63+
// For each of the possible outcomes, collect a vector of candidates that apply if the test
64+
// has that particular outcome.
65+
let mut target_candidates: FxIndexMap<_, Vec<&mut Candidate<'_>>> = Default::default();
66+
67+
let total_candidate_count = candidates.len();
68+
69+
// Partition the candidates into the appropriate vector in `target_candidates`.
70+
// Note that at some point we may encounter a candidate where the test is not relevant;
71+
// at that point, we stop partitioning.
72+
while let Some(candidate) = candidates.first_mut() {
73+
let Some(branch) =
74+
self.choose_bucket_for_candidate(match_place, test, candidate, &target_candidates)
75+
else {
76+
break;
77+
};
78+
let (candidate, rest) = candidates.split_first_mut().unwrap();
79+
target_candidates.entry(branch).or_insert_with(Vec::new).push(candidate);
80+
candidates = rest;
81+
}
82+
83+
// At least the first candidate ought to be tested
84+
assert!(
85+
total_candidate_count > candidates.len(),
86+
"{total_candidate_count}, {candidates:#?}"
87+
);
88+
debug!("tested_candidates: {}", total_candidate_count - candidates.len());
89+
debug!("untested_candidates: {}", candidates.len());
90+
91+
PartitionedCandidates { target_candidates, remaining_candidates: candidates }
92+
}
93+
94+
/// Given that we are performing `test` against `test_place`, this job
95+
/// sorts out what the status of `candidate` will be after the test. See
96+
/// `test_candidates` for the usage of this function. The candidate may
97+
/// be modified to update its `match_pairs`.
98+
///
99+
/// So, for example, if this candidate is `x @ Some(P0)` and the `Test` is
100+
/// a variant test, then we would modify the candidate to be `(x as
101+
/// Option).0 @ P0` and return the index corresponding to the variant
102+
/// `Some`.
103+
///
104+
/// However, in some cases, the test may just not be relevant to candidate.
105+
/// For example, suppose we are testing whether `foo.x == 22`, but in one
106+
/// match arm we have `Foo { x: _, ... }`... in that case, the test for
107+
/// the value of `x` has no particular relevance to this candidate. In
108+
/// such cases, this function just returns None without doing anything.
109+
/// This is used by the overall `match_candidates` algorithm to structure
110+
/// the match as a whole. See `match_candidates` for more details.
111+
///
112+
/// FIXME(#29623). In some cases, we have some tricky choices to make. for
113+
/// example, if we are testing that `x == 22`, but the candidate is `x @
114+
/// 13..55`, what should we do? In the event that the test is true, we know
115+
/// that the candidate applies, but in the event of false, we don't know
116+
/// that it *doesn't* apply. For now, we return false, indicate that the
117+
/// test does not apply to this candidate, but it might be we can get
118+
/// tighter match code if we do something a bit different.
119+
fn choose_bucket_for_candidate(
120+
&mut self,
121+
test_place: Place<'tcx>,
122+
test: &Test<'tcx>,
123+
candidate: &mut Candidate<'tcx>,
124+
// Other candidates that have already been partitioned into a bucket for this test, if any
125+
prior_candidates: &FxIndexMap<TestBranch<'tcx>, Vec<&mut Candidate<'tcx>>>,
126+
) -> Option<TestBranch<'tcx>> {
127+
// Find the match_pair for this place (if any). At present,
128+
// afaik, there can be at most one. (In the future, if we
129+
// adopted a more general `@` operator, there might be more
130+
// than one, but it'd be very unusual to have two sides that
131+
// both require tests; you'd expect one side to be simplified
132+
// away.)
133+
let (match_pair_index, match_pair) = candidate
134+
.match_pairs
135+
.iter()
136+
.enumerate()
137+
.find(|&(_, mp)| mp.place == Some(test_place))?;
138+
139+
// If true, the match pair is completely entailed by its corresponding test
140+
// branch, so it can be removed. If false, the match pair is _compatible_
141+
// with its test branch, but still needs a more specific test.
142+
let fully_matched;
143+
let ret = match (&test.kind, &match_pair.test_case) {
144+
// If we are performing a variant switch, then this
145+
// informs variant patterns, but nothing else.
146+
(
147+
&TestKind::Switch { adt_def: tested_adt_def },
148+
&TestCase::Variant { adt_def, variant_index },
149+
) => {
150+
assert_eq!(adt_def, tested_adt_def);
151+
fully_matched = true;
152+
Some(TestBranch::Variant(variant_index))
153+
}
154+
155+
// If we are performing a switch over integers, then this informs integer
156+
// equality, but nothing else.
157+
//
158+
// FIXME(#29623) we could use PatKind::Range to rule
159+
// things out here, in some cases.
160+
//
161+
// FIXME(Zalathar): Is the `is_switch_ty` test unnecessary?
162+
(TestKind::SwitchInt, &TestCase::Constant { value })
163+
if is_switch_ty(match_pair.pattern_ty) =>
164+
{
165+
// An important invariant of candidate bucketing is that a candidate
166+
// must not match in multiple branches. For `SwitchInt` tests, adding
167+
// a new value might invalidate that property for range patterns that
168+
// have already been partitioned into the failure arm, so we must take care
169+
// not to add such values here.
170+
let is_covering_range = |test_case: &TestCase<'tcx>| {
171+
test_case.as_range().is_some_and(|range| {
172+
matches!(range.contains(value, self.tcx), None | Some(true))
173+
})
174+
};
175+
let is_conflicting_candidate = |candidate: &&mut Candidate<'tcx>| {
176+
candidate
177+
.match_pairs
178+
.iter()
179+
.any(|mp| mp.place == Some(test_place) && is_covering_range(&mp.test_case))
180+
};
181+
if prior_candidates
182+
.get(&TestBranch::Failure)
183+
.is_some_and(|candidates| candidates.iter().any(is_conflicting_candidate))
184+
{
185+
fully_matched = false;
186+
None
187+
} else {
188+
fully_matched = true;
189+
Some(TestBranch::Constant(value))
190+
}
191+
}
192+
(TestKind::SwitchInt, TestCase::Range(range)) => {
193+
// When performing a `SwitchInt` test, a range pattern can be
194+
// sorted into the failure arm if it doesn't contain _any_ of
195+
// the values being tested. (This restricts what values can be
196+
// added to the test by subsequent candidates.)
197+
fully_matched = false;
198+
let not_contained = prior_candidates
199+
.keys()
200+
.filter_map(|br| br.as_constant())
201+
.all(|val| matches!(range.contains(val, self.tcx), Some(false)));
202+
203+
not_contained.then(|| {
204+
// No switch values are contained in the pattern range,
205+
// so the pattern can be matched only if this test fails.
206+
TestBranch::Failure
207+
})
208+
}
209+
210+
(TestKind::If, TestCase::Constant { value }) => {
211+
fully_matched = true;
212+
let value = value.try_to_bool().unwrap_or_else(|| {
213+
span_bug!(test.span, "expected boolean value but got {value:?}")
214+
});
215+
Some(if value { TestBranch::Success } else { TestBranch::Failure })
216+
}
217+
218+
(
219+
&TestKind::Len { len: test_len, op: BinOp::Eq },
220+
&TestCase::Slice { len, variable_length },
221+
) => {
222+
match (test_len.cmp(&(len as u64)), variable_length) {
223+
(Ordering::Equal, false) => {
224+
// on true, min_len = len = $actual_length,
225+
// on false, len != $actual_length
226+
fully_matched = true;
227+
Some(TestBranch::Success)
228+
}
229+
(Ordering::Less, _) => {
230+
// test_len < pat_len. If $actual_len = test_len,
231+
// then $actual_len < pat_len and we don't have
232+
// enough elements.
233+
fully_matched = false;
234+
Some(TestBranch::Failure)
235+
}
236+
(Ordering::Equal | Ordering::Greater, true) => {
237+
// This can match both if $actual_len = test_len >= pat_len,
238+
// and if $actual_len > test_len. We can't advance.
239+
fully_matched = false;
240+
None
241+
}
242+
(Ordering::Greater, false) => {
243+
// test_len != pat_len, so if $actual_len = test_len, then
244+
// $actual_len != pat_len.
245+
fully_matched = false;
246+
Some(TestBranch::Failure)
247+
}
248+
}
249+
}
250+
(
251+
&TestKind::Len { len: test_len, op: BinOp::Ge },
252+
&TestCase::Slice { len, variable_length },
253+
) => {
254+
// the test is `$actual_len >= test_len`
255+
match (test_len.cmp(&(len as u64)), variable_length) {
256+
(Ordering::Equal, true) => {
257+
// $actual_len >= test_len = pat_len,
258+
// so we can match.
259+
fully_matched = true;
260+
Some(TestBranch::Success)
261+
}
262+
(Ordering::Less, _) | (Ordering::Equal, false) => {
263+
// test_len <= pat_len. If $actual_len < test_len,
264+
// then it is also < pat_len, so the test passing is
265+
// necessary (but insufficient).
266+
fully_matched = false;
267+
Some(TestBranch::Success)
268+
}
269+
(Ordering::Greater, false) => {
270+
// test_len > pat_len. If $actual_len >= test_len > pat_len,
271+
// then we know we won't have a match.
272+
fully_matched = false;
273+
Some(TestBranch::Failure)
274+
}
275+
(Ordering::Greater, true) => {
276+
// test_len < pat_len, and is therefore less
277+
// strict. This can still go both ways.
278+
fully_matched = false;
279+
None
280+
}
281+
}
282+
}
283+
284+
(TestKind::Range(test), TestCase::Range(pat)) => {
285+
if test == pat {
286+
fully_matched = true;
287+
Some(TestBranch::Success)
288+
} else {
289+
fully_matched = false;
290+
// If the testing range does not overlap with pattern range,
291+
// the pattern can be matched only if this test fails.
292+
if !test.overlaps(pat, self.tcx)? { Some(TestBranch::Failure) } else { None }
293+
}
294+
}
295+
(TestKind::Range(range), &TestCase::Constant { value }) => {
296+
fully_matched = false;
297+
if !range.contains(value, self.tcx)? {
298+
// `value` is not contained in the testing range,
299+
// so `value` can be matched only if this test fails.
300+
Some(TestBranch::Failure)
301+
} else {
302+
None
303+
}
304+
}
305+
306+
(TestKind::Eq { value: test_val, .. }, TestCase::Constant { value: case_val }) => {
307+
if test_val == case_val {
308+
fully_matched = true;
309+
Some(TestBranch::Success)
310+
} else {
311+
fully_matched = false;
312+
Some(TestBranch::Failure)
313+
}
314+
}
315+
316+
(TestKind::Deref { temp: test_temp, .. }, TestCase::Deref { temp, .. })
317+
if test_temp == temp =>
318+
{
319+
fully_matched = true;
320+
Some(TestBranch::Success)
321+
}
322+
323+
(TestKind::Never, _) => {
324+
fully_matched = true;
325+
Some(TestBranch::Success)
326+
}
327+
328+
(
329+
TestKind::Switch { .. }
330+
| TestKind::SwitchInt { .. }
331+
| TestKind::If
332+
| TestKind::Len { .. }
333+
| TestKind::Range { .. }
334+
| TestKind::Eq { .. }
335+
| TestKind::Deref { .. },
336+
_,
337+
) => {
338+
fully_matched = false;
339+
None
340+
}
341+
};
342+
343+
if fully_matched {
344+
// Replace the match pair by its sub-pairs.
345+
let match_pair = candidate.match_pairs.remove(match_pair_index);
346+
candidate.match_pairs.extend(match_pair.subpairs);
347+
// Move or-patterns to the end.
348+
candidate.sort_match_pairs();
349+
}
350+
351+
ret
352+
}
353+
}

0 commit comments

Comments
 (0)