rapx/verify/
path.rs

1//! Path extraction for verification targets.
2//!
3//! This module builds finite paths from a function CFG to each unsafe callsite.
4//! Loops are kept finite by treating a loop as a single step through one of its
5//! exits when the target callsite is outside the loop.  If the target callsite is
6//! inside a loop, the path starts at the loop header and reaches the callsite
7//! within one loop iteration.
8
9use std::fmt;
10
11use rustc_data_structures::fx::{FxHashMap, FxHashSet};
12use rustc_hir::def_id::DefId;
13use rustc_middle::{mir::BasicBlock, ty::TyCtxt};
14
15use crate::utils::scc::Scc;
16
17use super::helpers::{CFG, Callsite, CallsiteLocation};
18
19/// Extracts loop-aware paths for one function body.
20pub struct PathExtractor<'tcx> {
21    cfg: CFG,
22    callsites: Vec<Callsite<'tcx>>,
23    loops: Vec<LoopInfo>,
24    block_to_loop: FxHashMap<BasicBlock, LoopId>,
25    paths: FxHashMap<CallsiteLocation, Vec<Path>>,
26}
27
28impl<'tcx> PathExtractor<'tcx> {
29    /// Create a path extractor for `def_id` and the callsites found in that body.
30    pub fn new(tcx: TyCtxt<'tcx>, def_id: DefId, callsites: Vec<Callsite<'tcx>>) -> Self {
31        Self {
32            cfg: CFG::new(tcx, def_id),
33            callsites,
34            loops: Vec::new(),
35            block_to_loop: FxHashMap::default(),
36            paths: FxHashMap::default(),
37        }
38    }
39
40    /// Run loop detection and path extraction, then return the collected result.
41    pub fn run(mut self) -> PathResult<'tcx> {
42        self.find_loops();
43        self.find_paths();
44        PathResult {
45            callsites: self.callsites,
46            loops: self.loops,
47            paths: self.paths,
48        }
49    }
50
51    /// Detect loops in the function CFG and store their block-to-loop map.
52    fn find_loops(&mut self) {
53        let (loops, block_to_loop) = find_loops(&self.cfg);
54        self.loops = loops;
55        self.block_to_loop = block_to_loop;
56    }
57
58    /// Extract paths for every callsite owned by this extractor.
59    fn find_paths(&mut self) {
60        let by_block = self.callsites_by_block();
61        for index in 0..self.callsites.len() {
62            let callsite = self.callsites[index].clone();
63            let paths = self.find_paths_for_callsite(&callsite, &by_block);
64            self.paths.insert(callsite.location(), paths);
65        }
66    }
67
68    /// Build a map from callsite basic blocks to their stable locations.
69    fn callsites_by_block(&self) -> FxHashMap<BasicBlock, CallsiteLocation> {
70        self.callsites
71            .iter()
72            .map(|callsite| (callsite.block, callsite.location()))
73            .collect()
74    }
75
76    /// Extract paths for one callsite according to whether it is inside a loop.
77    fn find_paths_for_callsite(
78        &self,
79        callsite: &Callsite<'tcx>,
80        by_block: &FxHashMap<BasicBlock, CallsiteLocation>,
81    ) -> Vec<Path> {
82        let target = callsite.location();
83        if let Some(loop_id) = self.block_to_loop.get(&callsite.block).copied() {
84            self.find_loop_paths(loop_id, target, callsite.block)
85        } else {
86            self.find_entry_paths(target, callsite.block, by_block)
87        }
88    }
89
90    /// Enumerate finite paths from function entry to a callsite outside loops.
91    ///
92    /// The search does not expand loop bodies.  When a successor enters a loop
93    /// that does not contain the target callsite, the path records one loop-exit
94    /// step and continues from the exit destination.
95    fn find_entry_paths(
96        &self,
97        target: CallsiteLocation,
98        target_block: BasicBlock,
99        by_block: &FxHashMap<BasicBlock, CallsiteLocation>,
100    ) -> Vec<Path> {
101        const PATH_LIMIT: usize = 1024;
102        let mut results = Vec::new();
103        let mut stack = vec![PathStep::Block(self.cfg.entry)];
104        let mut visited = FxHashSet::default();
105        visited.insert(self.cfg.entry);
106        self.dfs_entry_paths(
107            self.cfg.entry,
108            target,
109            target_block,
110            by_block,
111            &mut visited,
112            &mut stack,
113            &mut results,
114            PATH_LIMIT,
115        );
116        results
117    }
118
119    /// Search from the current block to an outside-loop target callsite.
120    ///
121    /// Normal blocks are recorded directly.  Entering a loop records a
122    /// `PathStep::LoopExit` for each loop exit rather than visiting the loop
123    /// body, which keeps the produced paths finite.
124    fn dfs_entry_paths(
125        &self,
126        current: BasicBlock,
127        target: CallsiteLocation,
128        target_block: BasicBlock,
129        by_block: &FxHashMap<BasicBlock, CallsiteLocation>,
130        visited: &mut FxHashSet<BasicBlock>,
131        stack: &mut Vec<PathStep>,
132        results: &mut Vec<Path>,
133        limit: usize,
134    ) {
135        if results.len() >= limit {
136            return;
137        }
138
139        if current == target_block {
140            stack.push(PathStep::Callsite(target));
141            results.push(Path {
142                target,
143                start: PathStart::FunctionEntry,
144                steps: stack.clone(),
145            });
146            stack.pop();
147            return;
148        }
149
150        for &next in self.cfg.successors(current) {
151            if results.len() >= limit {
152                break;
153            }
154
155            if let Some(loop_id) = self.block_to_loop.get(&next).copied() {
156                if self.block_to_loop.get(&target_block).copied() == Some(loop_id) {
157                    continue;
158                }
159                self.follow_loop_exits(
160                    loop_id,
161                    target,
162                    target_block,
163                    by_block,
164                    visited,
165                    stack,
166                    results,
167                    limit,
168                );
169                continue;
170            }
171
172            if visited.contains(&next) || has_other_callsite(next, target, by_block) {
173                continue;
174            }
175
176            stack.push(PathStep::Block(next));
177            visited.insert(next);
178            self.dfs_entry_paths(
179                next,
180                target,
181                target_block,
182                by_block,
183                visited,
184                stack,
185                results,
186                limit,
187            );
188            visited.remove(&next);
189            stack.pop();
190        }
191    }
192
193    /// Continue an entry path through every exit of a loop.
194    ///
195    /// This routine is used only when the target callsite is outside the loop.
196    /// It records the selected exit and resumes the DFS at the exit destination.
197    fn follow_loop_exits(
198        &self,
199        loop_id: LoopId,
200        target: CallsiteLocation,
201        target_block: BasicBlock,
202        by_block: &FxHashMap<BasicBlock, CallsiteLocation>,
203        visited: &mut FxHashSet<BasicBlock>,
204        stack: &mut Vec<PathStep>,
205        results: &mut Vec<Path>,
206        limit: usize,
207    ) {
208        let loop_info = &self.loops[loop_id.index()];
209        for exit in &loop_info.exits {
210            if results.len() >= limit {
211                break;
212            }
213            if visited.contains(&exit.to) {
214                continue;
215            }
216
217            stack.push(PathStep::LoopExit {
218                loop_id,
219                from: exit.from,
220                to: exit.to,
221            });
222            stack.push(PathStep::Block(exit.to));
223            visited.insert(exit.to);
224            self.dfs_entry_paths(
225                exit.to,
226                target,
227                target_block,
228                by_block,
229                visited,
230                stack,
231                results,
232                limit,
233            );
234            visited.remove(&exit.to);
235            stack.pop();
236            stack.pop();
237        }
238    }
239
240    /// Enumerate paths from a loop header to a callsite inside that loop.
241    ///
242    /// These paths represent one possible iteration reaching the callsite.  The
243    /// number of earlier iterations is intentionally not encoded in the path;
244    /// later verification stages should use loop facts to describe the header
245    /// state.
246    fn find_loop_paths(
247        &self,
248        loop_id: LoopId,
249        target: CallsiteLocation,
250        target_block: BasicBlock,
251    ) -> Vec<Path> {
252        const PATH_LIMIT: usize = 1024;
253        let loop_info = &self.loops[loop_id.index()];
254        let loop_blocks: FxHashSet<BasicBlock> = loop_info.blocks.iter().copied().collect();
255        let mut results = Vec::new();
256        let mut stack = vec![PathStep::Block(loop_info.header)];
257        let mut visited = FxHashSet::default();
258        visited.insert(loop_info.header);
259        self.dfs_loop_paths(
260            loop_id,
261            loop_info.header,
262            target,
263            target_block,
264            &loop_blocks,
265            &mut visited,
266            &mut stack,
267            &mut results,
268            PATH_LIMIT,
269        );
270        results
271    }
272
273    /// Search inside one loop from its header to an internal callsite.
274    ///
275    /// Successors that leave the loop are ignored because outside-loop paths are
276    /// represented by loop exits.  This search only describes how an iteration
277    /// reaches a callsite located in the loop body.
278    fn dfs_loop_paths(
279        &self,
280        loop_id: LoopId,
281        current: BasicBlock,
282        target: CallsiteLocation,
283        target_block: BasicBlock,
284        loop_blocks: &FxHashSet<BasicBlock>,
285        visited: &mut FxHashSet<BasicBlock>,
286        stack: &mut Vec<PathStep>,
287        results: &mut Vec<Path>,
288        limit: usize,
289    ) {
290        if results.len() >= limit {
291            return;
292        }
293
294        if current == target_block {
295            stack.push(PathStep::Callsite(target));
296            results.push(Path {
297                target,
298                start: PathStart::LoopHeader { loop_id },
299                steps: stack.clone(),
300            });
301            stack.pop();
302            return;
303        }
304
305        for &next in self.cfg.successors(current) {
306            if !loop_blocks.contains(&next) || visited.contains(&next) {
307                continue;
308            }
309            stack.push(PathStep::Block(next));
310            visited.insert(next);
311            self.dfs_loop_paths(
312                loop_id,
313                next,
314                target,
315                target_block,
316                loop_blocks,
317                visited,
318                stack,
319                results,
320                limit,
321            );
322            visited.remove(&next);
323            stack.pop();
324        }
325    }
326}
327
328/// Result produced by a completed path extraction run.
329pub struct PathResult<'tcx> {
330    callsites: Vec<Callsite<'tcx>>,
331    loops: Vec<LoopInfo>,
332    paths: FxHashMap<CallsiteLocation, Vec<Path>>,
333}
334
335impl<'tcx> PathResult<'tcx> {
336    /// Return all callsites used during path extraction.
337    pub fn callsites(&self) -> &[Callsite<'tcx>] {
338        &self.callsites
339    }
340
341    /// Return all loops detected in the function CFG.
342    pub fn loops(&self) -> &[LoopInfo] {
343        &self.loops
344    }
345
346    /// Return the paths extracted for one callsite location.
347    pub fn paths_for(&self, location: CallsiteLocation) -> &[Path] {
348        self.paths.get(&location).map(Vec::as_slice).unwrap_or(&[])
349    }
350}
351
352/// A finite verification path reaching one callsite.
353#[derive(Clone, Debug)]
354pub struct Path {
355    /// Callsite reached by this path.
356    pub target: CallsiteLocation,
357    /// Where the path starts.
358    pub start: PathStart,
359    /// Ordered steps from the start point to the target callsite.
360    pub steps: Vec<PathStep>,
361}
362
363impl Path {
364    /// Render this path as a compact string for diagnostics.
365    pub fn describe(&self) -> String {
366        self.steps
367            .iter()
368            .map(|step| match step {
369                PathStep::Block(bb) => format!("bb{}", bb.as_usize()),
370                PathStep::LoopExit { loop_id, from, to } => {
371                    format!(
372                        "Loop#{}.exit(bb{} -> bb{})",
373                        loop_id.index(),
374                        from.as_usize(),
375                        to.as_usize()
376                    )
377                }
378                PathStep::Callsite(location) => {
379                    format!("callsite(bb{})", location.block.as_usize())
380                }
381            })
382            .collect::<Vec<_>>()
383            .join(" -> ")
384    }
385}
386
387/// Start point for a finite verification path.
388#[derive(Clone, Copy, Debug, Eq, PartialEq)]
389pub enum PathStart {
390    /// The path starts at the function entry block.
391    FunctionEntry,
392    /// The path starts at the header of a loop containing the target callsite.
393    LoopHeader { loop_id: LoopId },
394}
395
396/// One step in a finite verification path.
397#[derive(Clone, Debug)]
398pub enum PathStep {
399    /// A normal MIR basic block.
400    Block(BasicBlock),
401    /// An abstract step that enters a loop and leaves through one exit edge.
402    LoopExit {
403        loop_id: LoopId,
404        from: BasicBlock,
405        to: BasicBlock,
406    },
407    /// The target callsite that terminates the path.
408    Callsite(CallsiteLocation),
409}
410
411/// Identifier for a detected loop in one function body.
412#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
413pub struct LoopId(usize);
414
415impl LoopId {
416    /// Create a loop id from its index in the local loop list.
417    fn new(index: usize) -> Self {
418        Self(index)
419    }
420
421    /// Return the index of this loop id in the local loop list.
422    pub fn index(self) -> usize {
423        self.0
424    }
425}
426
427impl fmt::Display for LoopId {
428    /// Format a loop id as its numeric index.
429    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
430        write!(f, "{}", self.0)
431    }
432}
433
434/// Information recorded for one detected loop.
435#[derive(Clone, Debug)]
436pub struct LoopInfo {
437    /// Local id of this loop.
438    pub id: LoopId,
439    /// Header block chosen for this loop.
440    pub header: BasicBlock,
441    /// Blocks that belong to the loop.
442    pub blocks: Vec<BasicBlock>,
443    /// Edges that leave the loop.
444    pub exits: Vec<LoopExit>,
445    /// Edges inside the loop that go back to an earlier block or the header.
446    pub backedges: Vec<(BasicBlock, BasicBlock)>,
447    /// Current summary status for the loop.
448    pub transfer: LoopTransfer,
449}
450
451/// An edge that leaves a detected loop.
452#[derive(Clone, Debug)]
453pub struct LoopExit {
454    /// Source block inside the loop.
455    pub from: BasicBlock,
456    /// Destination block outside the loop.
457    pub to: BasicBlock,
458}
459
460/// Conservative summary status for a loop.
461#[derive(Clone, Debug)]
462pub enum LoopTransfer {
463    /// The loop has not yet been classified by later verification stages.
464    Unknown,
465    /// The loop can be ignored for the current obligation.
466    Skip,
467    /// The loop may modify relevant state without a precise summary.
468    Havoc,
469}
470
471/// Return true when `block` contains a non-target callsite.
472fn has_other_callsite(
473    block: BasicBlock,
474    target: CallsiteLocation,
475    by_block: &FxHashMap<BasicBlock, CallsiteLocation>,
476) -> bool {
477    by_block
478        .get(&block)
479        .map(|location| *location != target)
480        .unwrap_or(false)
481}
482
483/// Detect loops in a CFG using SCCs.
484fn find_loops(cfg: &CFG) -> (Vec<LoopInfo>, FxHashMap<BasicBlock, LoopId>) {
485    let mut detector = SccDetector::new(cfg.successors.clone());
486    detector.find_scc();
487
488    let mut loops = Vec::new();
489    let mut block_to_loop = FxHashMap::default();
490    for mut component in detector.components {
491        component.sort_unstable();
492        let is_self_loop = component.len() == 1
493            && cfg.successors[component[0]]
494                .iter()
495                .any(|succ| succ.as_usize() == component[0]);
496        if component.len() <= 1 && !is_self_loop {
497            continue;
498        }
499
500        let id = LoopId::new(loops.len());
501        let header = BasicBlock::from_usize(component[0]);
502        let block_set: FxHashSet<usize> = component.iter().copied().collect();
503        let mut exits = Vec::new();
504        let mut backedges = Vec::new();
505
506        for &block_idx in &component {
507            let block = BasicBlock::from_usize(block_idx);
508            for &succ in cfg.successors(block) {
509                let succ_idx = succ.as_usize();
510                if block_set.contains(&succ_idx) {
511                    if succ_idx <= block_idx || succ == header {
512                        backedges.push((block, succ));
513                    }
514                } else {
515                    exits.push(LoopExit {
516                        from: block,
517                        to: succ,
518                    });
519                }
520            }
521        }
522
523        for &block_idx in &component {
524            block_to_loop.insert(BasicBlock::from_usize(block_idx), id);
525        }
526
527        loops.push(LoopInfo {
528            id,
529            header,
530            blocks: component.into_iter().map(BasicBlock::from_usize).collect(),
531            exits,
532            backedges,
533            transfer: LoopTransfer::Unknown,
534        });
535    }
536
537    (loops, block_to_loop)
538}
539
540/// Adapter that lets the shared SCC utility run over a MIR CFG.
541struct SccDetector {
542    successors: Vec<Vec<BasicBlock>>,
543    components: Vec<Vec<usize>>,
544}
545
546impl SccDetector {
547    /// Create an SCC detector over a concrete successor list.
548    fn new(successors: Vec<Vec<BasicBlock>>) -> Self {
549        Self {
550            successors,
551            components: Vec::new(),
552        }
553    }
554}
555
556impl Scc for SccDetector {
557    /// Store every SCC discovered by the shared Tarjan utility.
558    fn on_scc_found(&mut self, _root: usize, scc_components: &[usize]) {
559        self.components.push(scc_components.to_vec());
560    }
561
562    /// Return outgoing successor indices for the SCC traversal.
563    fn get_next(&mut self, root: usize) -> FxHashSet<usize> {
564        self.successors
565            .get(root)
566            .into_iter()
567            .flat_map(|successors| successors.iter().map(|bb| bb.as_usize()))
568            .collect()
569    }
570
571    /// Return the number of CFG nodes in the detector graph.
572    fn get_size(&mut self) -> usize {
573        self.successors.len()
574    }
575}