rapx/analysis/core/alias_analysis/mfp/interproc.rs
1/// Interprocedural analysis utilities
2use rustc_hir::def_id::DefId;
3use rustc_middle::mir::{Body, TerminatorKind};
4use rustc_mir_dataflow::ResultsCursor;
5use std::collections::HashSet;
6
7use super::super::{AliasPair, FnAliasPairs};
8use super::intraproc::{FnAliasAnalyzer, PlaceId};
9
10/// Extract root local and field path from a PlaceId
11/// Returns (root_local, field_path)
12fn extract_fields(place: &PlaceId) -> (usize, Vec<usize>) {
13 let mut fields = Vec::new();
14 let mut current = place;
15
16 // Traverse from leaf to root, collecting field indices
17 loop {
18 match current {
19 PlaceId::Local(idx) => return (*idx, fields),
20 PlaceId::Field { base, field_idx } => {
21 fields.push(*field_idx);
22 current = base;
23 }
24 }
25 }
26}
27
28/// Extract only the field path from a PlaceId (excluding the root local)
29/// Returns field indices in order from root to leaf
30/// Example: _15.0.1 returns [0, 1]
31fn extract_field_path(place: &PlaceId) -> Vec<usize> {
32 let mut fields = Vec::new();
33 let mut current = place;
34
35 // Traverse from leaf to root
36 loop {
37 match current {
38 PlaceId::Local(_) => {
39 fields.reverse(); // Reverse to get root-to-leaf order
40 return fields;
41 }
42 PlaceId::Field { base, field_idx } => {
43 fields.push(*field_idx);
44 current = base;
45 }
46 }
47 }
48}
49
50/// Check if one field path is a proper prefix of another
51/// Returns true if `prefix` is a prefix of `full`
52///
53/// Examples:
54/// - is_field_prefix([], [0]) = true (parent-child)
55/// - is_field_prefix([0], [0, 1]) = true (parent-child)
56/// - is_field_prefix([0], [1]) = false (siblings, not prefix)
57/// - is_field_prefix([0, 1], [0, 2]) = false (different fields at same level)
58fn is_field_prefix(prefix: &[usize], full: &[usize]) -> bool {
59 if prefix.len() > full.len() {
60 return false;
61 }
62 prefix == &full[..prefix.len()]
63}
64
65/// Extract function summary from analysis results
66///
67/// This function uses transitive closure to identify all aliases related to
68/// function parameters and return values, including those connected through
69/// temporary variables.
70pub fn extract_summary<'tcx>(
71 results: &mut ResultsCursor<'_, 'tcx, FnAliasAnalyzer<'tcx>>,
72 body: &Body<'tcx>,
73 _def_id: DefId,
74) -> FnAliasPairs {
75 let arg_count = body.arg_count;
76 let mut summary = FnAliasPairs::new(arg_count);
77
78 // Find all Return terminators and extract aliases at those points
79 for (block_id, block_data) in body.basic_blocks.iter_enumerated() {
80 if let Some(terminator) = &block_data.terminator {
81 if matches!(terminator.kind, TerminatorKind::Return) {
82 // Seek to the end of this block (before the terminator)
83 results.seek_to_block_end(block_id);
84
85 let state = results.get();
86 let analyzer = results.analysis();
87 let place_info = analyzer.place_info();
88
89 // Step 1: Collect all alias pairs at this return point
90 // We need to examine all aliases, not just those directly involving args/return
91 let mut all_pairs = Vec::new();
92 for (idx_i, idx_j) in state.get_all_alias_pairs() {
93 if let (Some(place_i), Some(place_j)) =
94 (place_info.get_place(idx_i), place_info.get_place(idx_j))
95 {
96 all_pairs.push((idx_i, idx_j, place_i, place_j));
97 }
98 }
99
100 // Step 2: Initialize relevant_places with all places whose root is a parameter or return value
101 // Index 0 is return value, indices 1..=arg_count are arguments
102 let mut relevant_places = HashSet::new();
103 for idx in 0..place_info.num_places() {
104 if let Some(place) = place_info.get_place(idx) {
105 if place.root_local() <= arg_count {
106 relevant_places.insert(idx);
107 }
108 }
109 }
110
111 // Step 3: Expand relevant_places using transitive closure
112 // If a place aliases to a relevant place, it becomes relevant too
113 // This captures aliases that flow through temporary variables
114 // Example: _0 aliases _2, and _2 aliases _1.0, then _2 is relevant
115 const MAX_ITERATIONS: usize = 10;
116 for _iteration in 0..MAX_ITERATIONS {
117 let mut changed = false;
118 for &(idx_i, idx_j, _, _) in &all_pairs {
119 // If one place is relevant and the other isn't, make the other relevant
120 if relevant_places.contains(&idx_i) && !relevant_places.contains(&idx_j) {
121 relevant_places.insert(idx_j);
122 changed = true;
123 }
124 if relevant_places.contains(&idx_j) && !relevant_places.contains(&idx_i) {
125 relevant_places.insert(idx_i);
126 changed = true;
127 }
128 }
129 // Converged when no more places become relevant
130 if !changed {
131 break;
132 }
133 }
134
135 // Step 4: Collect candidate aliases from two sources
136 //
137 // We collect all potential aliases between parameters and return values into
138 // a candidate set, which will be filtered and compressed in Step 5.
139 //
140 // Two sources of candidates:
141 // 4.1: Aliases derived through bridge variables (indirect connections)
142 // 4.2: Aliases directly present in all_pairs (direct connections)
143 //
144 let mut candidate_aliases = std::collections::HashSet::new();
145
146 // Step 4.1: Derive aliases through bridge variables
147 //
148 // Problem: When analyzing complex nested structures (e.g., Vec::from_raw_parts_in),
149 // we may have aliases like:
150 // - _0.0.0.0 ≈ _15 (deeply nested field aliases with temporary variable)
151 // - _1 ≈ _15.0 (parameter aliases with field of temporary variable)
152 //
153 // Both sides are connected through the bridge variable _15, but:
154 // 1. _15 is a temporary (root > arg_count), so it won't appear in final summary
155 // 2. There's no direct alias between _0.x and _1 in all_pairs
156 // 3. Union-Find guarantees transitivity within all_pairs, but the connection
157 // happens at different structural levels (parent vs child fields)
158 //
159 // Solution: For alias pairs connected through the same bridge variable where one
160 // side references the bridge's parent and the other references a child field,
161 // we derive a direct alias between the parameter/return places, compressing
162 // deep field paths to maintain precision while avoiding temporary variables.
163 //
164 // Example derivation:
165 // Input: _0.0.0.0 ≈ _15 (place_i ≈ place_j)
166 // _1 ≈ _15.0 (place_k ≈ place_m)
167 // Check: place_j.root (15) == place_m.root (15) ✓ (same bridge)
168 // _15 is prefix of _15.0 ✓ (parent-child relation)
169 // Output: _0.0 ≈ _1 (compress _0's deep fields to first level)
170 //
171 for &(idx_i, idx_j, place_i, place_j) in &all_pairs {
172 if !relevant_places.contains(&idx_i) || !relevant_places.contains(&idx_j) {
173 continue;
174 }
175
176 for &(idx_k, idx_m, place_k, place_m) in &all_pairs {
177 if !relevant_places.contains(&idx_k) || !relevant_places.contains(&idx_m) {
178 continue;
179 }
180
181 // Check if both alias pairs share the same bridge variable (by root)
182 if place_j.root_local() != place_m.root_local() {
183 continue;
184 }
185
186 // Extract field paths for prefix checking
187 let j_fields = extract_field_path(place_j);
188 let m_fields = extract_field_path(place_m);
189
190 // Verify parent-child relationship (proper prefix, not sibling fields)
191 // E.g., [] is prefix of [0], but [0] is NOT prefix of [1]
192 if !is_field_prefix(&j_fields, &m_fields)
193 && !is_field_prefix(&m_fields, &j_fields)
194 {
195 continue;
196 }
197
198 // Extract roots and fields from both sides
199 let (root_i, mut fields_i) = extract_fields(place_i);
200 let (root_k, fields_k) = extract_fields(place_k);
201
202 // Only derive aliases between parameters and return value
203 if root_i > arg_count || root_k > arg_count {
204 continue;
205 }
206
207 // Compress deep field paths to first level only
208 // This balances precision (distinguishing struct fields) with
209 // generality (avoiding over-specification)
210 fields_i.reverse(); // extract_fields returns reversed order
211 if fields_i.len() > 1 {
212 fields_i = vec![fields_i[0]];
213 }
214
215 let mut fields_k_reversed = fields_k.clone();
216 fields_k_reversed.reverse();
217
218 // Create the derived alias and add to candidates
219 let mut alias = AliasPair::new(root_i, root_k);
220 alias.lhs_fields = fields_i;
221 alias.rhs_fields = fields_k_reversed;
222
223 candidate_aliases.insert(alias);
224 }
225 }
226
227 // Step 4.2: Direct aliases from all_pairs
228 //
229 // Collect aliases that are directly present between parameters and return values
230 // in the Union-Find analysis results. These may overlap with derived aliases from
231 // Step 4.1, but duplicates are automatically handled by the HashSet.
232 //
233 for (idx_i, idx_j, place_i, place_j) in all_pairs {
234 if relevant_places.contains(&idx_i) && relevant_places.contains(&idx_j) {
235 let (root_i, mut fields_i) = extract_fields(place_i);
236 let (root_j, mut fields_j) = extract_fields(place_j);
237
238 // Only include if both roots are parameters or return value
239 if root_i <= arg_count && root_j <= arg_count {
240 // Fields were collected from leaf to root, reverse them
241 fields_i.reverse();
242 fields_j.reverse();
243
244 // Create field-sensitive AliasPair and add to candidates
245 let mut alias = AliasPair::new(root_i, root_j);
246 alias.lhs_fields = fields_i;
247 alias.rhs_fields = fields_j;
248 candidate_aliases.insert(alias);
249 }
250 }
251 }
252
253 // Step 5: Normalize and filter redundant aliases
254 //
255 // Step 5.1: Normalize alias order for consistent comparison
256 //
257 // Alias relationships are symmetric: (A, B) ≡ (B, A)
258 // However, without normalization, (3, 0.0.0) and (0.0, 3) would be
259 // treated as having different local pairs and cannot be compared for
260 // subsumption relationships.
261 //
262 // We normalize by ensuring left_local <= right_local, which allows
263 // the filter to recognize that (0.0, 3) subsumes (0.0.0, 3) even if
264 // the latter was originally collected as (3, 0.0.0).
265 //
266 let normalized_aliases: std::collections::HashSet<_> = candidate_aliases
267 .iter()
268 .map(|alias| {
269 let mut normalized = alias.clone();
270 if normalized.left_local > normalized.right_local {
271 normalized.swap(); // Swap both locals and fields
272 }
273 normalized
274 })
275 .collect();
276
277 // Step 5.2: Filter redundant aliases
278 //
279 // The normalized candidate set may contain redundant aliases such as:
280 // - Self-aliases: (0, 0), (1, 1)
281 // - Prefix-subsumed aliases: (0.0, 1) subsumes (0.0.0, 1), (0.0.0.0, 1)
282 // - Reversed but equivalent: (3, 0.0.0) and (0.0, 3) after normalization
283 //
284 // We filter these to produce a minimal, canonical summary that retains
285 // precision while avoiding over-specification.
286 //
287 let filtered_aliases = filter_redundant_aliases(normalized_aliases);
288 for alias in filtered_aliases.clone() {
289 summary.add_alias(alias);
290 }
291 }
292 }
293 }
294
295 summary
296}
297
298/// Join two function summaries
299pub fn join_fn_summaries(summary1: &FnAliasPairs, summary2: &FnAliasPairs) -> FnAliasPairs {
300 // TODO: Implement summary join operation
301 let mut result = FnAliasPairs::new(summary1.arg_size());
302
303 // Add all aliases from both summaries
304 for alias in summary1.aliases() {
305 result.add_alias(alias.clone());
306 }
307
308 for alias in summary2.aliases() {
309 result.add_alias(alias.clone());
310 }
311
312 result
313}
314
315/// Filter redundant aliases from a candidate set
316///
317/// This function removes several types of redundant aliases to produce a minimal,
318/// canonical summary:
319///
320/// 1. **Self-aliases**: Aliases where both sides refer to the same place
321/// Example: (0, 0), (1, 1)
322///
323/// 2. **Prefix-subsumed aliases**: When one alias is strictly more general than another
324///
325/// Examples of redundancy patterns eliminated:
326///
327/// a) Single-side prefix (left more specific):
328/// - (1.0, 2) subsumes (1.0.0.0, 2) → keep (1.0, 2), remove (1.0.0.0, 2)
329/// - Rationale: If field 1.0 aliases with 2, then 1.0.0.0 (a sub-field) also aliases
330///
331/// b) Single-side prefix (right more specific):
332/// - (1.0, 2) subsumes (1.0, 2.0) → keep (1.0, 2), remove (1.0, 2.0)
333/// - Rationale: If 1.0 aliases with field 2, then 1.0 aliases with 2.0 (a sub-field)
334///
335/// c) Double-side prefix (synchronized fields):
336/// - (0, 1) subsumes (0.1, 1.1) → keep (0, 1), remove (0.1, 1.1)
337/// - Rationale: If 0 and 1 alias, their corresponding fields also alias
338/// - This handles cases like struct field synchronization
339///
340/// The filtering strategy:
341/// - For each pair of aliases with the same (left_local, right_local):
342/// - Check bidirectional subsumption (a subsumes b, or b subsumes a)
343/// - Keep the more general alias (shorter total field depth)
344/// - This ensures order-independence and catches all redundancy patterns
345///
346/// Returns: A filtered HashSet containing only non-redundant aliases
347fn filter_redundant_aliases(
348 aliases: std::collections::HashSet<AliasPair>,
349) -> std::collections::HashSet<AliasPair> {
350 use std::collections::HashSet;
351
352 let aliases_vec: Vec<_> = aliases.iter().cloned().collect();
353 let mut to_remove = HashSet::new();
354
355 for i in 0..aliases_vec.len() {
356 let alias_a = &aliases_vec[i];
357
358 // Skip if already marked for removal
359 if to_remove.contains(alias_a) {
360 continue;
361 }
362
363 // Rule 1: Remove self-aliases (same local)
364 // Example: (0, 0), (1, 1)
365 if alias_a.left_local == alias_a.right_local {
366 to_remove.insert(alias_a.clone());
367 continue;
368 }
369
370 // Rule 2: Check for prefix subsumption with other aliases
371 for j in 0..aliases_vec.len() {
372 if i == j {
373 continue;
374 }
375 let alias_b = &aliases_vec[j];
376
377 // Skip if already marked for removal
378 if to_remove.contains(alias_b) {
379 continue;
380 }
381
382 // Only compare aliases with the same locals
383 // Note: After normalization, this ensures left_local <= right_local for both,
384 // so (3, 0.0.0) normalized to (0.0.0, 3) can be compared with (0.0, 3)
385 if alias_a.left_local != alias_b.left_local
386 || alias_a.right_local != alias_b.right_local
387 {
388 continue;
389 }
390
391 // Check bidirectional subsumption relationships
392 //
393 // Subsumption means one alias is more general (has shorter field paths)
394 // than another. We check both directions:
395 // - Does a subsume b? (a is more general)
396 // - Does b subsume a? (b is more general)
397
398 // Check if a's fields subsume b's fields
399 let lhs_a_subsumes_b = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields)
400 || alias_a.lhs_fields == alias_b.lhs_fields;
401 let rhs_a_subsumes_b = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields)
402 || alias_a.rhs_fields == alias_b.rhs_fields;
403
404 // Check if b's fields subsume a's fields
405 let lhs_b_subsumes_a = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields)
406 || alias_b.lhs_fields == alias_a.lhs_fields;
407 let rhs_b_subsumes_a = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields)
408 || alias_b.rhs_fields == alias_a.rhs_fields;
409
410 // Determine if there's a subsumption relationship
411 let lhs_a_strict = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields);
412 let rhs_a_strict = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields);
413 let lhs_b_strict = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields);
414 let rhs_b_strict = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields);
415
416 // a subsumes b if both sides subsume and at least one is strict
417 let a_subsumes_b =
418 lhs_a_subsumes_b && rhs_a_subsumes_b && (lhs_a_strict || rhs_a_strict);
419 // b subsumes a if both sides subsume and at least one is strict
420 let b_subsumes_a =
421 lhs_b_subsumes_a && rhs_b_subsumes_a && (lhs_b_strict || rhs_b_strict);
422
423 // If there's a subsumption relationship, mark the more specific one for removal
424 if a_subsumes_b || b_subsumes_a {
425 // Compare specificity: more general alias has lower total field depth
426 let spec_a = alias_specificity(alias_a);
427 let spec_b = alias_specificity(alias_b);
428
429 if spec_a < spec_b {
430 // a is more general, mark b for removal
431 to_remove.insert(alias_b.clone());
432 } else if spec_b < spec_a {
433 // b is more general, mark a for removal
434 to_remove.insert(alias_a.clone());
435 break; // Stop comparing alias_a since it's marked for removal
436 }
437 // If equal specificity, keep both (shouldn't happen with strict prefix)
438 }
439 }
440 }
441
442 // Return the result with marked aliases removed
443 aliases.difference(&to_remove).cloned().collect()
444}
445
446/// Calculate the specificity of an alias based on total field depth
447///
448/// Specificity is measured as the sum of field depths on both sides.
449/// Lower values indicate more general (less specific) aliases.
450///
451/// Examples:
452/// - (0, 1): specificity = 0 + 0 = 0 (most general)
453/// - (0.1, 1): specificity = 1 + 0 = 1
454/// - (0.1, 1.1): specificity = 1 + 1 = 2
455/// - (1.0.0.0, 2): specificity = 3 + 0 = 3
456///
457/// When two aliases have a subsumption relationship, we keep the one
458/// with lower specificity (more general).
459fn alias_specificity(alias: &AliasPair) -> usize {
460 alias.lhs_fields.len() + alias.rhs_fields.len()
461}
462
463/// Check if `prefix` is a strict prefix of `full`
464///
465/// A strict prefix means:
466/// - `prefix` is shorter than `full`
467/// - All elements of `prefix` match the corresponding elements in `full`
468///
469/// Examples:
470/// - is_strict_prefix([0], [0, 1]) = true
471/// - is_strict_prefix([], [0]) = true
472/// - is_strict_prefix([0], [0]) = false (equal, not strict)
473/// - is_strict_prefix([0], [1]) = false (not a prefix)
474fn is_strict_prefix(prefix: &[usize], full: &[usize]) -> bool {
475 prefix.len() < full.len() && prefix == &full[..prefix.len()]
476}