rapx/analysis/core/alias_analysis/mfp/
interproc.rs

1/// Interprocedural analysis utilities
2use rustc_hir::def_id::DefId;
3use rustc_middle::mir::{Body, TerminatorKind};
4use rustc_mir_dataflow::ResultsCursor;
5use std::collections::HashSet;
6
7use super::super::{AliasPair, FnAliasPairs};
8use super::intraproc::{FnAliasAnalyzer, PlaceId};
9
10/// Extract root local and field path from a PlaceId
11/// Returns (root_local, field_path)
12fn extract_fields(place: &PlaceId) -> (usize, Vec<usize>) {
13    let mut fields = Vec::new();
14    let mut current = place;
15
16    // Traverse from leaf to root, collecting field indices
17    loop {
18        match current {
19            PlaceId::Local(idx) => return (*idx, fields),
20            PlaceId::Field { base, field_idx } => {
21                fields.push(*field_idx);
22                current = base;
23            }
24        }
25    }
26}
27
28/// Extract only the field path from a PlaceId (excluding the root local)
29/// Returns field indices in order from root to leaf
30/// Example: _15.0.1 returns [0, 1]
31fn extract_field_path(place: &PlaceId) -> Vec<usize> {
32    let mut fields = Vec::new();
33    let mut current = place;
34
35    // Traverse from leaf to root
36    loop {
37        match current {
38            PlaceId::Local(_) => {
39                fields.reverse(); // Reverse to get root-to-leaf order
40                return fields;
41            }
42            PlaceId::Field { base, field_idx } => {
43                fields.push(*field_idx);
44                current = base;
45            }
46        }
47    }
48}
49
50/// Check if one field path is a proper prefix of another
51/// Returns true if `prefix` is a prefix of `full`
52///
53/// Examples:
54///   - is_field_prefix([], [0]) = true  (parent-child)
55///   - is_field_prefix([0], [0, 1]) = true  (parent-child)
56///   - is_field_prefix([0], [1]) = false  (siblings, not prefix)
57///   - is_field_prefix([0, 1], [0, 2]) = false  (different fields at same level)
58fn is_field_prefix(prefix: &[usize], full: &[usize]) -> bool {
59    if prefix.len() > full.len() {
60        return false;
61    }
62    prefix == &full[..prefix.len()]
63}
64
65/// Extract function summary from analysis results
66///
67/// This function uses transitive closure to identify all aliases related to
68/// function parameters and return values, including those connected through
69/// temporary variables.
70pub fn extract_summary<'tcx>(
71    results: &mut ResultsCursor<'_, 'tcx, FnAliasAnalyzer<'tcx>>,
72    body: &Body<'tcx>,
73    _def_id: DefId,
74) -> FnAliasPairs {
75    let arg_count = body.arg_count;
76    let mut summary = FnAliasPairs::new(arg_count);
77
78    // Find all Return terminators and extract aliases at those points
79    for (block_id, block_data) in body.basic_blocks.iter_enumerated() {
80        if let Some(terminator) = &block_data.terminator {
81            if matches!(terminator.kind, TerminatorKind::Return) {
82                // Seek to the end of this block (before the terminator)
83                results.seek_to_block_end(block_id);
84
85                let state = results.get();
86                let analyzer = results.analysis();
87                let place_info = analyzer.place_info();
88
89                // Step 1: Collect all alias pairs at this return point
90                // We need to examine all aliases, not just those directly involving args/return
91                let mut all_pairs = Vec::new();
92                for (idx_i, idx_j) in state.get_all_alias_pairs() {
93                    if let (Some(place_i), Some(place_j)) =
94                        (place_info.get_place(idx_i), place_info.get_place(idx_j))
95                    {
96                        all_pairs.push((idx_i, idx_j, place_i, place_j));
97                    }
98                }
99
100                // Step 2: Initialize relevant_places with all places whose root is a parameter or return value
101                // Index 0 is return value, indices 1..=arg_count are arguments
102                let mut relevant_places = HashSet::new();
103                for idx in 0..place_info.num_places() {
104                    if let Some(place) = place_info.get_place(idx) {
105                        if place.root_local() <= arg_count {
106                            relevant_places.insert(idx);
107                        }
108                    }
109                }
110
111                // Step 3: Expand relevant_places using transitive closure
112                // If a place aliases to a relevant place, it becomes relevant too
113                // This captures aliases that flow through temporary variables
114                // Example: _0 aliases _2, and _2 aliases _1.0, then _2 is relevant
115                const MAX_ITERATIONS: usize = 10;
116                for _iteration in 0..MAX_ITERATIONS {
117                    let mut changed = false;
118                    for &(idx_i, idx_j, _, _) in &all_pairs {
119                        // If one place is relevant and the other isn't, make the other relevant
120                        if relevant_places.contains(&idx_i) && !relevant_places.contains(&idx_j) {
121                            relevant_places.insert(idx_j);
122                            changed = true;
123                        }
124                        if relevant_places.contains(&idx_j) && !relevant_places.contains(&idx_i) {
125                            relevant_places.insert(idx_i);
126                            changed = true;
127                        }
128                    }
129                    // Converged when no more places become relevant
130                    if !changed {
131                        break;
132                    }
133                }
134
135                // Step 4: Collect candidate aliases from two sources
136                //
137                // We collect all potential aliases between parameters and return values into
138                // a candidate set, which will be filtered and compressed in Step 5.
139                //
140                // Two sources of candidates:
141                //   4.1: Aliases derived through bridge variables (indirect connections)
142                //   4.2: Aliases directly present in all_pairs (direct connections)
143                //
144                let mut candidate_aliases = std::collections::HashSet::new();
145
146                // Step 4.1: Derive aliases through bridge variables
147                //
148                // Problem: When analyzing complex nested structures (e.g., Vec::from_raw_parts_in),
149                // we may have aliases like:
150                //   - _0.0.0.0 ≈ _15 (deeply nested field aliases with temporary variable)
151                //   - _1 ≈ _15.0 (parameter aliases with field of temporary variable)
152                //
153                // Both sides are connected through the bridge variable _15, but:
154                //   1. _15 is a temporary (root > arg_count), so it won't appear in final summary
155                //   2. There's no direct alias between _0.x and _1 in all_pairs
156                //   3. Union-Find guarantees transitivity within all_pairs, but the connection
157                //      happens at different structural levels (parent vs child fields)
158                //
159                // Solution: For alias pairs connected through the same bridge variable where one
160                // side references the bridge's parent and the other references a child field,
161                // we derive a direct alias between the parameter/return places, compressing
162                // deep field paths to maintain precision while avoiding temporary variables.
163                //
164                // Example derivation:
165                //   Input:  _0.0.0.0 ≈ _15 (place_i ≈ place_j)
166                //           _1 ≈ _15.0 (place_k ≈ place_m)
167                //   Check:  place_j.root (15) == place_m.root (15) ✓ (same bridge)
168                //           _15 is prefix of _15.0 ✓ (parent-child relation)
169                //   Output: _0.0 ≈ _1 (compress _0's deep fields to first level)
170                //
171                for &(idx_i, idx_j, place_i, place_j) in &all_pairs {
172                    if !relevant_places.contains(&idx_i) || !relevant_places.contains(&idx_j) {
173                        continue;
174                    }
175
176                    for &(idx_k, idx_m, place_k, place_m) in &all_pairs {
177                        if !relevant_places.contains(&idx_k) || !relevant_places.contains(&idx_m) {
178                            continue;
179                        }
180
181                        // Check if both alias pairs share the same bridge variable (by root)
182                        if place_j.root_local() != place_m.root_local() {
183                            continue;
184                        }
185
186                        // Extract field paths for prefix checking
187                        let j_fields = extract_field_path(place_j);
188                        let m_fields = extract_field_path(place_m);
189
190                        // Verify parent-child relationship (proper prefix, not sibling fields)
191                        // E.g., [] is prefix of [0], but [0] is NOT prefix of [1]
192                        if !is_field_prefix(&j_fields, &m_fields)
193                            && !is_field_prefix(&m_fields, &j_fields)
194                        {
195                            continue;
196                        }
197
198                        // Extract roots and fields from both sides
199                        let (root_i, mut fields_i) = extract_fields(place_i);
200                        let (root_k, fields_k) = extract_fields(place_k);
201
202                        // Only derive aliases between parameters and return value
203                        if root_i > arg_count || root_k > arg_count {
204                            continue;
205                        }
206
207                        // Compress deep field paths to first level only
208                        // This balances precision (distinguishing struct fields) with
209                        // generality (avoiding over-specification)
210                        fields_i.reverse(); // extract_fields returns reversed order
211                        if fields_i.len() > 1 {
212                            fields_i = vec![fields_i[0]];
213                        }
214
215                        let mut fields_k_reversed = fields_k.clone();
216                        fields_k_reversed.reverse();
217
218                        // Create the derived alias and add to candidates
219                        let mut alias = AliasPair::new(root_i, root_k);
220                        alias.lhs_fields = fields_i;
221                        alias.rhs_fields = fields_k_reversed;
222
223                        candidate_aliases.insert(alias);
224                    }
225                }
226
227                // Step 4.2: Direct aliases from all_pairs
228                //
229                // Collect aliases that are directly present between parameters and return values
230                // in the Union-Find analysis results. These may overlap with derived aliases from
231                // Step 4.1, but duplicates are automatically handled by the HashSet.
232                //
233                for (idx_i, idx_j, place_i, place_j) in all_pairs {
234                    if relevant_places.contains(&idx_i) && relevant_places.contains(&idx_j) {
235                        let (root_i, mut fields_i) = extract_fields(place_i);
236                        let (root_j, mut fields_j) = extract_fields(place_j);
237
238                        // Only include if both roots are parameters or return value
239                        if root_i <= arg_count && root_j <= arg_count {
240                            // Fields were collected from leaf to root, reverse them
241                            fields_i.reverse();
242                            fields_j.reverse();
243
244                            // Create field-sensitive AliasPair and add to candidates
245                            let mut alias = AliasPair::new(root_i, root_j);
246                            alias.lhs_fields = fields_i;
247                            alias.rhs_fields = fields_j;
248                            candidate_aliases.insert(alias);
249                        }
250                    }
251                }
252
253                // Step 5: Normalize and filter redundant aliases
254                //
255                // Step 5.1: Normalize alias order for consistent comparison
256                //
257                // Alias relationships are symmetric: (A, B) ≡ (B, A)
258                // However, without normalization, (3, 0.0.0) and (0.0, 3) would be
259                // treated as having different local pairs and cannot be compared for
260                // subsumption relationships.
261                //
262                // We normalize by ensuring left_local <= right_local, which allows
263                // the filter to recognize that (0.0, 3) subsumes (0.0.0, 3) even if
264                // the latter was originally collected as (3, 0.0.0).
265                //
266                let normalized_aliases: std::collections::HashSet<_> = candidate_aliases
267                    .iter()
268                    .map(|alias| {
269                        let mut normalized = alias.clone();
270                        if normalized.left_local > normalized.right_local {
271                            normalized.swap(); // Swap both locals and fields
272                        }
273                        normalized
274                    })
275                    .collect();
276
277                // Step 5.2: Filter redundant aliases
278                //
279                // The normalized candidate set may contain redundant aliases such as:
280                //   - Self-aliases: (0, 0), (1, 1)
281                //   - Prefix-subsumed aliases: (0.0, 1) subsumes (0.0.0, 1), (0.0.0.0, 1)
282                //   - Reversed but equivalent: (3, 0.0.0) and (0.0, 3) after normalization
283                //
284                // We filter these to produce a minimal, canonical summary that retains
285                // precision while avoiding over-specification.
286                //
287                let filtered_aliases = filter_redundant_aliases(normalized_aliases);
288                for alias in filtered_aliases.clone() {
289                    summary.add_alias(alias);
290                }
291            }
292        }
293    }
294
295    summary
296}
297
298/// Join two function summaries
299pub fn join_fn_summaries(summary1: &FnAliasPairs, summary2: &FnAliasPairs) -> FnAliasPairs {
300    // TODO: Implement summary join operation
301    let mut result = FnAliasPairs::new(summary1.arg_size());
302
303    // Add all aliases from both summaries
304    for alias in summary1.aliases() {
305        result.add_alias(alias.clone());
306    }
307
308    for alias in summary2.aliases() {
309        result.add_alias(alias.clone());
310    }
311
312    result
313}
314
315/// Filter redundant aliases from a candidate set
316///
317/// This function removes several types of redundant aliases to produce a minimal,
318/// canonical summary:
319///
320/// 1. **Self-aliases**: Aliases where both sides refer to the same place
321///    Example: (0, 0), (1, 1)
322///
323/// 2. **Prefix-subsumed aliases**: When one alias is strictly more general than another
324///
325///    Examples of redundancy patterns eliminated:
326///
327///    a) Single-side prefix (left more specific):
328///       - (1.0, 2) subsumes (1.0.0.0, 2) → keep (1.0, 2), remove (1.0.0.0, 2)
329///       - Rationale: If field 1.0 aliases with 2, then 1.0.0.0 (a sub-field) also aliases
330///
331///    b) Single-side prefix (right more specific):
332///       - (1.0, 2) subsumes (1.0, 2.0) → keep (1.0, 2), remove (1.0, 2.0)
333///       - Rationale: If 1.0 aliases with field 2, then 1.0 aliases with 2.0 (a sub-field)
334///
335///    c) Double-side prefix (synchronized fields):
336///       - (0, 1) subsumes (0.1, 1.1) → keep (0, 1), remove (0.1, 1.1)
337///       - Rationale: If 0 and 1 alias, their corresponding fields also alias
338///       - This handles cases like struct field synchronization
339///
340/// The filtering strategy:
341///   - For each pair of aliases with the same (left_local, right_local):
342///     - Check bidirectional subsumption (a subsumes b, or b subsumes a)
343///     - Keep the more general alias (shorter total field depth)
344///     - This ensures order-independence and catches all redundancy patterns
345///
346/// Returns: A filtered HashSet containing only non-redundant aliases
347fn filter_redundant_aliases(
348    aliases: std::collections::HashSet<AliasPair>,
349) -> std::collections::HashSet<AliasPair> {
350    use std::collections::HashSet;
351
352    let aliases_vec: Vec<_> = aliases.iter().cloned().collect();
353    let mut to_remove = HashSet::new();
354
355    for i in 0..aliases_vec.len() {
356        let alias_a = &aliases_vec[i];
357
358        // Skip if already marked for removal
359        if to_remove.contains(alias_a) {
360            continue;
361        }
362
363        // Rule 1: Remove self-aliases (same local)
364        // Example: (0, 0), (1, 1)
365        if alias_a.left_local == alias_a.right_local {
366            to_remove.insert(alias_a.clone());
367            continue;
368        }
369
370        // Rule 2: Check for prefix subsumption with other aliases
371        for j in 0..aliases_vec.len() {
372            if i == j {
373                continue;
374            }
375            let alias_b = &aliases_vec[j];
376
377            // Skip if already marked for removal
378            if to_remove.contains(alias_b) {
379                continue;
380            }
381
382            // Only compare aliases with the same locals
383            // Note: After normalization, this ensures left_local <= right_local for both,
384            // so (3, 0.0.0) normalized to (0.0.0, 3) can be compared with (0.0, 3)
385            if alias_a.left_local != alias_b.left_local
386                || alias_a.right_local != alias_b.right_local
387            {
388                continue;
389            }
390
391            // Check bidirectional subsumption relationships
392            //
393            // Subsumption means one alias is more general (has shorter field paths)
394            // than another. We check both directions:
395            //   - Does a subsume b? (a is more general)
396            //   - Does b subsume a? (b is more general)
397
398            // Check if a's fields subsume b's fields
399            let lhs_a_subsumes_b = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields)
400                || alias_a.lhs_fields == alias_b.lhs_fields;
401            let rhs_a_subsumes_b = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields)
402                || alias_a.rhs_fields == alias_b.rhs_fields;
403
404            // Check if b's fields subsume a's fields
405            let lhs_b_subsumes_a = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields)
406                || alias_b.lhs_fields == alias_a.lhs_fields;
407            let rhs_b_subsumes_a = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields)
408                || alias_b.rhs_fields == alias_a.rhs_fields;
409
410            // Determine if there's a subsumption relationship
411            let lhs_a_strict = is_strict_prefix(&alias_a.lhs_fields, &alias_b.lhs_fields);
412            let rhs_a_strict = is_strict_prefix(&alias_a.rhs_fields, &alias_b.rhs_fields);
413            let lhs_b_strict = is_strict_prefix(&alias_b.lhs_fields, &alias_a.lhs_fields);
414            let rhs_b_strict = is_strict_prefix(&alias_b.rhs_fields, &alias_a.rhs_fields);
415
416            // a subsumes b if both sides subsume and at least one is strict
417            let a_subsumes_b =
418                lhs_a_subsumes_b && rhs_a_subsumes_b && (lhs_a_strict || rhs_a_strict);
419            // b subsumes a if both sides subsume and at least one is strict
420            let b_subsumes_a =
421                lhs_b_subsumes_a && rhs_b_subsumes_a && (lhs_b_strict || rhs_b_strict);
422
423            // If there's a subsumption relationship, mark the more specific one for removal
424            if a_subsumes_b || b_subsumes_a {
425                // Compare specificity: more general alias has lower total field depth
426                let spec_a = alias_specificity(alias_a);
427                let spec_b = alias_specificity(alias_b);
428
429                if spec_a < spec_b {
430                    // a is more general, mark b for removal
431                    to_remove.insert(alias_b.clone());
432                } else if spec_b < spec_a {
433                    // b is more general, mark a for removal
434                    to_remove.insert(alias_a.clone());
435                    break; // Stop comparing alias_a since it's marked for removal
436                }
437                // If equal specificity, keep both (shouldn't happen with strict prefix)
438            }
439        }
440    }
441
442    // Return the result with marked aliases removed
443    aliases.difference(&to_remove).cloned().collect()
444}
445
446/// Calculate the specificity of an alias based on total field depth
447///
448/// Specificity is measured as the sum of field depths on both sides.
449/// Lower values indicate more general (less specific) aliases.
450///
451/// Examples:
452///   - (0, 1): specificity = 0 + 0 = 0 (most general)
453///   - (0.1, 1): specificity = 1 + 0 = 1
454///   - (0.1, 1.1): specificity = 1 + 1 = 2
455///   - (1.0.0.0, 2): specificity = 3 + 0 = 3
456///
457/// When two aliases have a subsumption relationship, we keep the one
458/// with lower specificity (more general).
459fn alias_specificity(alias: &AliasPair) -> usize {
460    alias.lhs_fields.len() + alias.rhs_fields.len()
461}
462
463/// Check if `prefix` is a strict prefix of `full`
464///
465/// A strict prefix means:
466///   - `prefix` is shorter than `full`
467///   - All elements of `prefix` match the corresponding elements in `full`
468///
469/// Examples:
470///   - is_strict_prefix([0], [0, 1]) = true
471///   - is_strict_prefix([], [0]) = true
472///   - is_strict_prefix([0], [0]) = false (equal, not strict)
473///   - is_strict_prefix([0], [1]) = false (not a prefix)
474fn is_strict_prefix(prefix: &[usize], full: &[usize]) -> bool {
475    prefix.len() < full.len() && prefix == &full[..prefix.len()]
476}