From 7a21c40febba0182a08eba8646780ad11f4afd0b Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 16 Jul 2025 16:19:51 +0200 Subject: [PATCH 01/35] [LifetimeSafety] Make the dataflow analysis generic (#148222) Refactored the lifetime safety analysis to use a generic dataflow framework with a policy-based design. - Introduced a generic `DataflowAnalysis` template class that can be specialized for different analyses - Renamed `LifetimeLattice` to `LoanPropagationLattice` to better reflect its purpose - Created a `LoanPropagationAnalysis` class that inherits from the generic framework - Moved transfer functions from the standalone `Transferer` class into the analysis class - Restructured the code to separate the dataflow engine from the specific analysis logic - Updated debug output and test expectations to use the new class names In order to add more analyses, e.g. [loan expiry](https://github.com/llvm/llvm-project/pull/148712) and origin liveness, the previous implementation would have separate, nearly identical dataflow runners for each analysis. This change creates a single, reusable component, which will make it much simpler to add subsequent analyses without repeating boilerplate code. This is quite close to the existing dataflow framework! --- clang/lib/Analysis/LifetimeSafety.cpp | 395 ++++++++++-------- .../Sema/warn-lifetime-safety-dataflow.cpp | 28 +- 2 files changed, 232 insertions(+), 191 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index bf67bea6c9933..9c623e3a5693b 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" @@ -496,7 +497,168 @@ class FactGenerator : public ConstStmtVisitor { }; // ========================================================================= // -// The Dataflow Lattice +// Generic Dataflow Analysis +// ========================================================================= // +/// A generic, policy-based driver for forward dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state at the function entry. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template class DataflowAnalysis { +public: + using Lattice = LatticeType; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + llvm::DenseMap BlockEntryStates; + llvm::DenseMap BlockExitStates; + +protected: + FactManager &AllFacts; + + explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F) + : Cfg(C), AC(AC), AllFacts(F) {} + +public: + void run() { + Derived &D = static_cast(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + ForwardDataflowWorklist Worklist(Cfg, AC); + const CFGBlock *Entry = &Cfg.getEntry(); + BlockEntryStates[Entry] = D.getInitialState(); + Worklist.enqueueBlock(Entry); + llvm::SmallBitVector Visited; + Visited.resize(Cfg.getNumBlockIDs() + 1); + + while (const CFGBlock *B = Worklist.dequeue()) { + Lattice EntryState = getEntryState(B); + Lattice ExitState = transferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + Visited.set(B->getBlockID()); + + for (const CFGBlock *Successor : B->succs()) { + Lattice OldSuccEntryState = getEntryState(Successor); + Lattice NewSuccEntryState = D.join(OldSuccEntryState, ExitState); + + // Enqueue the successor if its entry state has changed or if we have + // never visited it. + if (!Visited.test(Successor->getBlockID()) || + NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); + } + } + } + } + + Lattice getEntryState(const CFGBlock *B) const { + return BlockEntryStates.lookup(B); + } + + Lattice getExitState(const CFGBlock *B) const { + return BlockExitStates.lookup(B); + } + + void dump() const { + const Derived *D = static_cast(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = Cfg.getExit(); + getExitState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + /// TODO: We might need to store intermediate states per-fact in the block for + /// later analysis. + Lattice transferBlock(const CFGBlock *Block, Lattice EntryState) { + Lattice BlockState = EntryState; + for (const Fact *F : AllFacts.getFacts(Block)) { + BlockState = transferFact(BlockState, F); + } + return BlockState; + } + + Lattice transferFact(Lattice In, const Fact *F) { + Derived *d = static_cast(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return d->transfer(In, *F->getAs()); + case Fact::Kind::Expire: + return d->transfer(In, *F->getAs()); + case Fact::Kind::AssignOrigin: + return d->transfer(In, *F->getAs()); + case Fact::Kind::ReturnOfOrigin: + return d->transfer(In, *F->getAs()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const AssignOriginFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } +}; + +namespace utils { + +/// Computes the union of two ImmutableSets. +template +llvm::ImmutableSet join(llvm::ImmutableSet A, llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template +llvm::ImmutableMap +join(llvm::ImmutableMap A, llvm::ImmutableMap B, + typename llvm::ImmutableMap::Factory &F, Joiner joinValues) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + if (const V *ValA = A.lookup(Key)) + A = F.add(A, Key, joinValues(*ValA, ValB)); + else + A = F.add(A, Key, ValB); + } + return A; +} +} // namespace utils + +// ========================================================================= // +// Loan Propagation Analysis // ========================================================================= // // Using LLVM's immutable collections is efficient for dataflow analysis @@ -509,82 +671,37 @@ using OriginLoanMap = llvm::ImmutableMap; /// that all created states share the same underlying memory management. struct LifetimeFactory { OriginLoanMap::Factory OriginMapFactory; - LoanSet::Factory LoanSetFact; + LoanSet::Factory LoanSetFactory; /// Creates a singleton set containing only the given loan ID. LoanSet createLoanSet(LoanID LID) { - return LoanSetFact.add(LoanSetFact.getEmptySet(), LID); + return LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID); } }; -/// LifetimeLattice represents the state of our analysis at a given program -/// point. It is an immutable object, and all operations produce a new -/// instance rather than modifying the existing one. -struct LifetimeLattice { +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct LoanPropagationLattice { /// The map from an origin to the set of loans it contains. - /// The lattice has a finite height: An origin's loan set is bounded by the - /// total number of loans in the function. - /// TODO(opt): To reduce the lattice size, propagate origins of declarations, - /// not expressions, because expressions are not visible across blocks. OriginLoanMap Origins = OriginLoanMap(nullptr); - explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} - LifetimeLattice() = default; + explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} + LoanPropagationLattice() = default; - bool operator==(const LifetimeLattice &Other) const { + bool operator==(const LoanPropagationLattice &Other) const { return Origins == Other.Origins; } - bool operator!=(const LifetimeLattice &Other) const { + bool operator!=(const LoanPropagationLattice &Other) const { return !(*this == Other); } - LoanSet getLoans(OriginID OID) const { - if (auto *Loans = Origins.lookup(OID)) - return *Loans; - return LoanSet(nullptr); - } - - /// Computes the union of two lattices by performing a key-wise join of - /// their OriginLoanMaps. - // TODO(opt): This key-wise join is a performance bottleneck. A more - // efficient merge could be implemented using a Patricia Trie or HAMT - // instead of the current AVL-tree-based ImmutableMap. - // TODO(opt): Keep the state small by removing origins which become dead. - LifetimeLattice join(const LifetimeLattice &Other, - LifetimeFactory &Factory) const { - /// Merge the smaller map into the larger one ensuring we iterate over the - /// smaller map. - if (Origins.getHeight() < Other.Origins.getHeight()) - return Other.join(*this, Factory); - - OriginLoanMap JoinedState = Origins; - // For each origin in the other map, union its loan set with ours. - for (const auto &Entry : Other.Origins) { - OriginID OID = Entry.first; - LoanSet OtherLoanSet = Entry.second; - JoinedState = Factory.OriginMapFactory.add( - JoinedState, OID, join(getLoans(OID), OtherLoanSet, Factory)); - } - return LifetimeLattice(JoinedState); - } - - LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { - /// Merge the smaller set into the larger one ensuring we iterate over the - /// smaller set. - if (a.getHeight() < b.getHeight()) - std::swap(a, b); - LoanSet Result = a; - for (LoanID LID : b) { - /// TODO(opt): Profiling shows that this loop is a major performance - /// bottleneck. Investigate using a BitVector to represent the set of - /// loans for improved join performance. - Result = Factory.LoanSetFact.add(Result, LID); - } - return Result; - } - void dump(llvm::raw_ostream &OS) const { - OS << "LifetimeLattice State:\n"; + OS << "LoanPropagationLattice State:\n"; if (Origins.isEmpty()) OS << " \n"; for (const auto &Entry : Origins) { @@ -596,143 +713,66 @@ struct LifetimeLattice { } }; -// ========================================================================= // -// The Transfer Function -// ========================================================================= // -class Transferer { - FactManager &AllFacts; +/// The analysis that tracks which loans belong to which origins. +class LoanPropagationAnalysis + : public DataflowAnalysis { + LifetimeFactory &Factory; public: - explicit Transferer(FactManager &F, LifetimeFactory &Factory) - : AllFacts(F), Factory(Factory) {} + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LifetimeFactory &Factory) + : DataflowAnalysis(C, AC, F), Factory(Factory) {} - /// Computes the exit state of a block by applying all its facts sequentially - /// to a given entry state. - /// TODO: We might need to store intermediate states per-fact in the block for - /// later analysis. - LifetimeLattice transferBlock(const CFGBlock *Block, - LifetimeLattice EntryState) { - LifetimeLattice BlockState = EntryState; - llvm::ArrayRef Facts = AllFacts.getFacts(Block); + using DataflowAnalysis::transfer; - for (const Fact *F : Facts) { - BlockState = transferFact(BlockState, F); - } - return BlockState; - } + StringRef getAnalysisName() const { return "LoanPropagation"; } -private: - LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) { - switch (F->getKind()) { - case Fact::Kind::Issue: - return transfer(In, *F->getAs()); - case Fact::Kind::AssignOrigin: - return transfer(In, *F->getAs()); - // Expire and ReturnOfOrigin facts don't modify the Origins and the State. - case Fact::Kind::Expire: - case Fact::Kind::ReturnOfOrigin: - return In; - } - llvm_unreachable("Unknown fact kind"); + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = + utils::join(A.Origins, B.Origins, Factory.OriginMapFactory, + [this](LoanSet S1, LoanSet S2) { + return utils::join(S1, S2, Factory.LoanSetFactory); + }); + return Lattice(JoinedOrigins); } /// A new loan is issued to the origin. Old loans are erased. - LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) { + Lattice transfer(Lattice In, const IssueFact &F) { OriginID OID = F.getOriginID(); LoanID LID = F.getLoanID(); - return LifetimeLattice(Factory.OriginMapFactory.add( + return LoanPropagationLattice(Factory.OriginMapFactory.add( In.Origins, OID, Factory.createLoanSet(LID))); } /// The destination origin's loan set is replaced by the source's. /// This implicitly "resets" the old loans of the destination. - LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) { + Lattice transfer(Lattice In, const AssignOriginFact &F) { OriginID DestOID = F.getDestOriginID(); OriginID SrcOID = F.getSrcOriginID(); - LoanSet SrcLoans = InState.getLoans(SrcOID); - return LifetimeLattice( - Factory.OriginMapFactory.add(InState.Origins, DestOID, SrcLoans)); + LoanSet SrcLoans = getLoans(In, SrcOID); + return LoanPropagationLattice( + Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans)); } -}; -// ========================================================================= // -// Dataflow analysis -// ========================================================================= // - -/// Drives the intra-procedural dataflow analysis. -/// -/// Orchestrates the analysis by iterating over the CFG using a worklist -/// algorithm. It computes a fixed point by propagating the LifetimeLattice -/// state through each block until the state no longer changes. -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -class LifetimeDataflow { - const CFG &Cfg; - AnalysisDeclContext &AC; - LifetimeFactory LifetimeFact; - - Transferer Xfer; - - /// Stores the merged analysis state at the entry of each CFG block. - llvm::DenseMap BlockEntryStates; - /// Stores the analysis state at the exit of each CFG block, after the - /// transfer function has been applied. - llvm::DenseMap BlockExitStates; - -public: - LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) - : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {} - - void run() { - llvm::TimeTraceScope TimeProfile("Lifetime Dataflow"); - ForwardDataflowWorklist Worklist(Cfg, AC); - const CFGBlock *Entry = &Cfg.getEntry(); - BlockEntryStates[Entry] = LifetimeLattice{}; - Worklist.enqueueBlock(Entry); - while (const CFGBlock *B = Worklist.dequeue()) { - LifetimeLattice EntryState = getEntryState(B); - LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState); - BlockExitStates[B] = ExitState; - - for (const CFGBlock *Successor : B->succs()) { - auto SuccIt = BlockEntryStates.find(Successor); - LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) - ? SuccIt->second - : LifetimeLattice{}; - LifetimeLattice NewSuccEntryState = - OldSuccEntryState.join(ExitState, LifetimeFact); - // Enqueue the successor if its entry state has changed. - // TODO(opt): Consider changing 'join' to report a change if != - // comparison is found expensive. - if (SuccIt == BlockEntryStates.end() || - NewSuccEntryState != OldSuccEntryState) { - BlockEntryStates[Successor] = NewSuccEntryState; - Worklist.enqueueBlock(Successor); - } - } - } - } - - void dump() const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Dataflow results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = Cfg.getExit(); - getExitState(&B).dump(llvm::dbgs()); - } - - LifetimeLattice getEntryState(const CFGBlock *B) const { - return BlockEntryStates.lookup(B); - } - - LifetimeLattice getExitState(const CFGBlock *B) const { - return BlockExitStates.lookup(B); +private: + LoanSet getLoans(Lattice L, OriginID OID) { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return Factory.LoanSetFactory.getEmptySet(); } }; // ========================================================================= // -// TODO: Analysing dataflow results and error reporting. +// TODO: +// - Modifying loan propagation to answer `LoanSet getLoans(Origin O, Point P)` +// - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)` +// - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)` +// - Using the above three to perform the final error reporting. // ========================================================================= // } // anonymous namespace @@ -755,8 +795,9 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, /// blocks; only Decls are visible. Therefore, loans in a block that /// never reach an Origin associated with a Decl can be safely dropped by /// the analysis. - LifetimeDataflow Dataflow(Cfg, FactMgr, AC); - Dataflow.run(); - DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); + LifetimeFactory Factory; + LoanPropagationAnalysis LoanPropagation(Cfg, AC, FactMgr, Factory); + LoanPropagation.run(); + DEBUG_WITH_TYPE("LifetimeLoanPropagation", LoanPropagation.dump()); } } // namespace clang diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index a956386ae9332..c5748e32f5b8c 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -19,7 +19,7 @@ MyObj* return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) // CHECK: Expire (LoanID: [[L_X]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] // CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] @@ -47,7 +47,7 @@ MyObj* assign_and_return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) // CHECK: Expire (LoanID: [[L_Y]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] // CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] // CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] @@ -65,7 +65,7 @@ int return_int_val() { return x; } // CHECK-NEXT: End of Block -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK: @@ -79,7 +79,7 @@ void loan_expires_cpp() { // CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) // CHECK: Expire (LoanID: [[L_OBJ]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] // CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] @@ -96,7 +96,7 @@ void loan_expires_trivial() { // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] // CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] @@ -119,7 +119,7 @@ void conditional(bool condition) { // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] // CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] @@ -163,7 +163,7 @@ void pointers_in_a_cycle(bool condition) { } // At the end of the analysis, the origins for the pointers involved in the cycle // (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] @@ -195,7 +195,7 @@ void overwrite_origin() { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] @@ -213,7 +213,7 @@ void reassign_to_null() { } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK: Origin [[O_P]] contains no loans @@ -235,7 +235,7 @@ void reassign_in_if(bool condition) { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] @@ -276,7 +276,7 @@ void assign_in_switch(int mode) { // CHECK-DAG: Expire (LoanID: [[L_S2]]) // CHECK-DAG: Expire (LoanID: [[L_S1]]) } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]] @@ -299,7 +299,7 @@ void loan_in_loop(bool condition) { // CHECK: Expire (LoanID: [[L_INNER]]) } } -// CHECK: Dataflow results: +// CHECK: LoanPropagation results: // CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] // CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] @@ -326,7 +326,7 @@ void loop_with_break(int count) { // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK-LABEL: Dataflow results: +// CHECK-LABEL: LoanPropagation results: // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] @@ -355,7 +355,7 @@ void nested_scopes() { // CHECK: Expire (LoanID: [[L_OUTER]]) } -// CHECK-LABEL: Dataflow results: +// CHECK-LABEL: LoanPropagation results: // CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] // CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] // CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]] From 093fe2063996993696efb285ef4799caa0b7f116 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 16 Jul 2025 16:25:48 +0200 Subject: [PATCH 02/35] [LifetimeSafety] Support bidirectional dataflow analysis (#148967) Generalize the dataflow analysis to support both forward and backward analyses. Some program analyses would be expressed as backward dataflow problems (like liveness analysis). This change enables the framework to support both forward analyses (like the loan propagation analysis) and backward analyses with the same infrastructure. --- clang/lib/Analysis/LifetimeSafety.cpp | 108 ++++++++++++++------------ 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 9c623e3a5693b..e3a03cf93880e 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -499,13 +499,16 @@ class FactGenerator : public ConstStmtVisitor { // ========================================================================= // // Generic Dataflow Analysis // ========================================================================= // -/// A generic, policy-based driver for forward dataflow analyses. It combines + +enum class Direction { Forward, Backward }; + +/// A generic, policy-based driver for dataflow analyses. It combines /// the dataflow runner and the transferer logic into a single class hierarchy. /// /// The derived class is expected to provide: /// - A `Lattice` type. /// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state at the function entry. +/// - `Lattice getInitialState();` The initial state of the analysis. /// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. /// - `Lattice transfer(Lattice, const FactType&);` Defines how a single /// lifetime-relevant `Fact` transforms the lattice state. Only overloads @@ -514,18 +517,23 @@ class FactGenerator : public ConstStmtVisitor { /// \tparam Derived The CRTP derived class that implements the specific /// analysis. /// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). /// TODO: Maybe use the dataflow framework! The framework might need changes /// to support the current comparison done at block-entry. -template class DataflowAnalysis { +template +class DataflowAnalysis { public: using Lattice = LatticeType; + using Base = DataflowAnalysis; private: const CFG &Cfg; AnalysisDeclContext &AC; - llvm::DenseMap BlockEntryStates; - llvm::DenseMap BlockExitStates; + llvm::DenseMap InStates; + llvm::DenseMap OutStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } protected: FactManager &AllFacts; @@ -539,75 +547,76 @@ template class DataflowAnalysis { Derived &D = static_cast(*this); llvm::TimeTraceScope Time(D.getAnalysisName()); - ForwardDataflowWorklist Worklist(Cfg, AC); - const CFGBlock *Entry = &Cfg.getEntry(); - BlockEntryStates[Entry] = D.getInitialState(); - Worklist.enqueueBlock(Entry); - llvm::SmallBitVector Visited; - Visited.resize(Cfg.getNumBlockIDs() + 1); - - while (const CFGBlock *B = Worklist.dequeue()) { - Lattice EntryState = getEntryState(B); - Lattice ExitState = transferBlock(B, EntryState); - BlockExitStates[B] = ExitState; - Visited.set(B->getBlockID()); + using Worklist = + std::conditional_t; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); - for (const CFGBlock *Successor : B->succs()) { - Lattice OldSuccEntryState = getEntryState(Successor); - Lattice NewSuccEntryState = D.join(OldSuccEntryState, ExitState); + llvm::SmallBitVector Visited(Cfg.getNumBlockIDs() + 1); - // Enqueue the successor if its entry state has changed or if we have + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + Visited.set(B->getBlockID()); + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + Lattice OldInState = getInState(AdjacentB); + Lattice NewInState = D.join(OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have // never visited it. - if (!Visited.test(Successor->getBlockID()) || - NewSuccEntryState != OldSuccEntryState) { - BlockEntryStates[Successor] = NewSuccEntryState; - Worklist.enqueueBlock(Successor); + if (!Visited.test(AdjacentB->getBlockID()) || + NewInState != OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); } } } } - Lattice getEntryState(const CFGBlock *B) const { - return BlockEntryStates.lookup(B); - } + Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); } - Lattice getExitState(const CFGBlock *B) const { - return BlockExitStates.lookup(B); - } + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } void dump() const { const Derived *D = static_cast(this); llvm::dbgs() << "==========================================\n"; llvm::dbgs() << D->getAnalysisName() << " results:\n"; llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = Cfg.getExit(); - getExitState(&B).dump(llvm::dbgs()); + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); } -private: - /// Computes the exit state of a block by applying all its facts sequentially - /// to a given entry state. + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. /// TODO: We might need to store intermediate states per-fact in the block for /// later analysis. - Lattice transferBlock(const CFGBlock *Block, Lattice EntryState) { - Lattice BlockState = EntryState; - for (const Fact *F : AllFacts.getFacts(Block)) { - BlockState = transferFact(BlockState, F); - } - return BlockState; + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = AllFacts.getFacts(Block); + if constexpr (isForward()) + for (const Fact *F : Facts) + State = transferFact(State, F); + else + for (const Fact *F : llvm::reverse(Facts)) + State = transferFact(State, F); + return State; } Lattice transferFact(Lattice In, const Fact *F) { - Derived *d = static_cast(this); + assert(F); + Derived *D = static_cast(this); switch (F->getKind()) { case Fact::Kind::Issue: - return d->transfer(In, *F->getAs()); + return D->transfer(In, *F->getAs()); case Fact::Kind::Expire: - return d->transfer(In, *F->getAs()); + return D->transfer(In, *F->getAs()); case Fact::Kind::AssignOrigin: - return d->transfer(In, *F->getAs()); + return D->transfer(In, *F->getAs()); case Fact::Kind::ReturnOfOrigin: - return d->transfer(In, *F->getAs()); + return D->transfer(In, *F->getAs()); } llvm_unreachable("Unknown fact kind"); } @@ -715,7 +724,8 @@ struct LoanPropagationLattice { /// The analysis that tracks which loans belong to which origins. class LoanPropagationAnalysis - : public DataflowAnalysis { + : public DataflowAnalysis { LifetimeFactory &Factory; @@ -724,7 +734,7 @@ class LoanPropagationAnalysis LifetimeFactory &Factory) : DataflowAnalysis(C, AC, F), Factory(Factory) {} - using DataflowAnalysis::transfer; + using Base::transfer; StringRef getAnalysisName() const { return "LoanPropagation"; } From 7067c956ecb470a38d7bfaab05fe992446389c4f Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 22 Jul 2025 00:02:12 +0200 Subject: [PATCH 03/35] [LifetimeSafety] Add per-program-point lattice tracking (#149199) Add per-program-point state tracking to the dataflow analysis framework. - Added a `ProgramPoint` type representing a pair of a CFGBlock and a Fact within that block - Added a `PerPointStates` map to store lattice states at each program point - Modified the `transferBlock` method to store intermediate states after each fact is processed - Added a `getLoans` method to the `LoanPropagationAnalysis` class that uses program points This change enables more precise analysis by tracking program state at each individual program point rather than just at block boundaries. This is necessary for answering queries about the state of loans, origins, and other properties at specific points in the program, which is required for error reporting in the lifetime safety analysis. --- clang/lib/Analysis/LifetimeSafety.cpp | 39 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e3a03cf93880e..a95db6d8013bd 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -502,6 +502,13 @@ class FactGenerator : public ConstStmtVisitor { enum class Direction { Forward, Backward }; +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + /// A generic, policy-based driver for dataflow analyses. It combines /// the dataflow runner and the transferer logic into a single class hierarchy. /// @@ -524,14 +531,20 @@ template class DataflowAnalysis { public: using Lattice = LatticeType; - using Base = DataflowAnalysis; + using Base = DataflowAnalysis; private: const CFG &Cfg; AnalysisDeclContext &AC; + /// The dataflow state before a basic block is processed. llvm::DenseMap InStates; + /// The dataflow state after a basic block is processed. llvm::DenseMap OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap PerPointStates; static constexpr bool isForward() { return Dir == Direction::Forward; } @@ -577,6 +590,8 @@ class DataflowAnalysis { } } + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); } Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } @@ -590,18 +605,23 @@ class DataflowAnalysis { getOutState(&B).dump(llvm::dbgs()); } +private: /// Computes the state at one end of a block by applying all its facts /// sequentially to a given state from the other end. - /// TODO: We might need to store intermediate states per-fact in the block for - /// later analysis. Lattice transferBlock(const CFGBlock *Block, Lattice State) { auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) - for (const Fact *F : Facts) + if constexpr (isForward()) { + for (const Fact *F : Facts) { State = transferFact(State, F); - else - for (const Fact *F : llvm::reverse(Facts)) + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; State = transferFact(State, F); + } + } return State; } @@ -769,6 +789,10 @@ class LoanPropagationAnalysis Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans)); } + LoanSet getLoans(OriginID OID, ProgramPoint P) { + return getLoans(getState(P), OID); + } + private: LoanSet getLoans(Lattice L, OriginID OID) { if (auto *Loans = L.Origins.lookup(OID)) @@ -779,7 +803,6 @@ class LoanPropagationAnalysis // ========================================================================= // // TODO: -// - Modifying loan propagation to answer `LoanSet getLoans(Origin O, Point P)` // - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)` // - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)` // - Using the above three to perform the final error reporting. From e65ffb6870fdbbd7039b49ff399cf0862ad3101a Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 22 Jul 2025 12:12:22 +0000 Subject: [PATCH 04/35] Reapply "[LifetimeSafety] Revamp test suite using unittests (#149158)" This reverts commit 54b50681ca0fd1c0c6ddb059c88981a45e2f1b19. --- .../clang/Analysis/Analyses/LifetimeSafety.h | 92 +++- clang/lib/Analysis/LifetimeSafety.cpp | 181 ++++++-- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +- .../Sema/warn-lifetime-safety-dataflow.cpp | 87 +--- clang/unittests/Analysis/CMakeLists.txt | 1 + .../unittests/Analysis/LifetimeSafetyTest.cpp | 439 ++++++++++++++++++ 6 files changed, 665 insertions(+), 139 deletions(-) create mode 100644 clang/unittests/Analysis/LifetimeSafetyTest.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 9998702a41cab..beeb0aaba5d0d 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -17,14 +17,96 @@ //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H #define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#include "clang/AST/DeclBase.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" -namespace clang { +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/StringMap.h" +#include -void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC); +namespace clang::lifetimes { -} // namespace clang +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC); + +namespace internal { +// Forward declarations of internal types. +class Fact; +class FactManager; +class LoanPropagationAnalysis; +struct LifetimeFactory; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +using LoanID = ID; +using OriginID = ID; + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet; +using OriginSet = llvm::ImmutableSet; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC); + ~LifetimeSafetyAnalysis(); + + void run(); + + /// Returns the set of loans an origin holds at a specific program point. + LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; + + /// Finds the OriginID for a given declaration. + /// Returns a null optional if not found. + std::optional getOriginIDForDecl(const ValueDecl *D) const; + + /// Finds the LoanID's for the loan created with the specific variable as + /// their Path. + std::vector getLoanIDForVar(const VarDecl *VD) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_")` as test points. This method returns + /// a map from the annotation string () to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap getTestPoints() const; + +private: + AnalysisDeclContext &AC; + std::unique_ptr Factory; + std::unique_ptr FactMgr; + std::unique_ptr LoanPropagation; +}; +} // namespace internal +} // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index a95db6d8013bd..ae6ec9f76cbf6 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -24,8 +24,14 @@ #include "llvm/Support/TimeProfiler.h" #include -namespace clang { +namespace clang::lifetimes { +namespace internal { namespace { +template +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { + return OS << ID.Value; +} +} // namespace /// Represents the storage location being borrowed, e.g., a specific stack /// variable. @@ -36,32 +42,6 @@ struct AccessPath { AccessPath(const clang::ValueDecl *D) : D(D) {} }; -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template struct ID { - uint32_t Value = 0; - - bool operator==(const ID &Other) const { return Value == Other.Value; } - bool operator!=(const ID &Other) const { return !(*this == Other); } - bool operator<(const ID &Other) const { return Value < Other.Value; } - ID operator++(int) { - ID Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -template -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { - return OS << ID.Value; -} - -using LoanID = ID; -using OriginID = ID; - /// Information about a single borrow, or "Loan". A loan is created when a /// reference or pointer is created. struct Loan { @@ -223,7 +203,9 @@ class Fact { /// An origin is propagated from a source to a destination (e.g., p = q). AssignOrigin, /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin + ReturnOfOrigin, + /// A marker for a specific point in the code, for testing. + TestPoint, }; private: @@ -310,6 +292,24 @@ class ReturnOfOriginFact : public Fact { } }; +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS) const override { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; + } +}; + class FactManager { public: llvm::ArrayRef getFacts(const CFGBlock *B) const { @@ -363,6 +363,7 @@ class FactManager { }; class FactGenerator : public ConstStmtVisitor { + using Base = ConstStmtVisitor; public: FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) @@ -458,6 +459,15 @@ class FactGenerator : public ConstStmtVisitor { } } + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (VisitTestPoint(FCE)) + return; + // Visit as normal otherwise. + Base::VisitCXXFunctionalCastExpr(FCE); + } + private: // Check if a type has an origin. bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } @@ -491,6 +501,27 @@ class FactGenerator : public ConstStmtVisitor { } } + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact(Annotation)); + return true; + } + } + return false; + } + FactManager &FactMgr; AnalysisDeclContext &AC; llvm::SmallVector CurrentBlockFacts; @@ -637,6 +668,8 @@ class DataflowAnalysis { return D->transfer(In, *F->getAs()); case Fact::Kind::ReturnOfOrigin: return D->transfer(In, *F->getAs()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs()); } llvm_unreachable("Unknown fact kind"); } @@ -646,14 +679,16 @@ class DataflowAnalysis { Lattice transfer(Lattice In, const ExpireFact &) { return In; } Lattice transfer(Lattice In, const AssignOriginFact &) { return In; } Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } }; namespace utils { /// Computes the union of two ImmutableSets. template -llvm::ImmutableSet join(llvm::ImmutableSet A, llvm::ImmutableSet B, - typename llvm::ImmutableSet::Factory &F) { +static llvm::ImmutableSet join(llvm::ImmutableSet A, + llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { if (A.getHeight() < B.getHeight()) std::swap(A, B); for (const T &E : B) @@ -666,7 +701,7 @@ llvm::ImmutableSet join(llvm::ImmutableSet A, llvm::ImmutableSet B, // efficient merge could be implemented using a Patricia Trie or HAMT // instead of the current AVL-tree-based ImmutableMap. template -llvm::ImmutableMap +static llvm::ImmutableMap join(llvm::ImmutableMap A, llvm::ImmutableMap B, typename llvm::ImmutableMap::Factory &F, Joiner joinValues) { if (A.getHeight() < B.getHeight()) @@ -690,10 +725,6 @@ join(llvm::ImmutableMap A, llvm::ImmutableMap B, // Loan Propagation Analysis // ========================================================================= // -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet; using OriginLoanMap = llvm::ImmutableMap; /// An object to hold the factories for immutable collections, ensuring @@ -807,17 +838,28 @@ class LoanPropagationAnalysis // - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)` // - Using the above three to perform the final error reporting. // ========================================================================= // -} // anonymous namespace -void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC) { +// ========================================================================= // +// LifetimeSafetyAnalysis Class Implementation +// ========================================================================= // + +// We need this here for unique_ptr with forward declared class. +LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC) + : AC(AC), Factory(std::make_unique()), + FactMgr(std::make_unique()) {} + +void LifetimeSafetyAnalysis::run() { llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), /*ShowColors=*/true)); - FactManager FactMgr; - FactGenerator FactGen(FactMgr, AC); + + FactGenerator FactGen(*FactMgr, AC); FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); /// TODO(opt): Consider optimizing individual blocks before running the /// dataflow analysis. @@ -828,9 +870,56 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, /// blocks; only Decls are visible. Therefore, loans in a block that /// never reach an Origin associated with a Decl can be safely dropped by /// the analysis. - LifetimeFactory Factory; - LoanPropagationAnalysis LoanPropagation(Cfg, AC, FactMgr, Factory); - LoanPropagation.run(); - DEBUG_WITH_TYPE("LifetimeLoanPropagation", LoanPropagation.dump()); + LoanPropagation = + std::make_unique(Cfg, AC, *FactMgr, *Factory); + LoanPropagation->run(); +} + +LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, + ProgramPoint PP) const { + assert(LoanPropagation && "Analysis has not been run."); + return LoanPropagation->getLoans(OID, PP); +} + +std::optional +LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { + assert(FactMgr && "FactManager not initialized"); + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` logic + // in a const-query context if that becomes an issue. + return FactMgr->getOriginMgr().get(*D); +} + +std::vector +LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { + assert(FactMgr && "FactManager not initialized"); + std::vector Result; + for (const Loan &L : FactMgr->getLoanMgr().getLoans()) + if (L.Path.D == VD) + Result.push_back(L.ID); + return Result; +} + +llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { + assert(FactMgr && "FactManager not initialized"); + llvm::StringMap AnnotationToPointMap; + for (const CFGBlock *Block : *AC.getCFG()) { + for (const Fact *F : FactMgr->getFacts(Block)) { + if (const auto *TPF = F->getAs()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC) { + internal::LifetimeSafetyAnalysis Analysis(AC); + Analysis.run(); } -} // namespace clang +} // namespace clang::lifetimes diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 942b5b1e08929..2cf9382a90048 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3749,8 +3749,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // TODO: Enable lifetime safety analysis for other languages once it is // stable. if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { - if (CFG *cfg = AC.getCFG()) - runLifetimeSafetyAnalysis(*cast(D), *cfg, AC); + if (AC.getCFG()) + lifetimes::runLifetimeSafetyAnalysis(AC); } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index c5748e32f5b8c..0eb3bda918f82 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fexperimental-lifetime-safety -mllvm -debug-only=LifetimeFacts,LifetimeDataflow -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -mllvm -debug-only=LifetimeFacts -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s // REQUIRES: asserts struct MyObj { @@ -19,10 +19,6 @@ MyObj* return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) // CHECK: Expire (LoanID: [[L_X]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] -// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] // Pointer Assignment and Return @@ -47,15 +43,6 @@ MyObj* assign_and_return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) // CHECK: Expire (LoanID: [[L_Y]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]] - // Return of Non-Pointer Type // CHECK-LABEL: Function: return_int_val @@ -65,8 +52,6 @@ int return_int_val() { return x; } // CHECK-NEXT: End of Block -// CHECK: LoanPropagation results: -// CHECK: // Loan Expiration (Automatic Variable, C++) @@ -79,9 +64,6 @@ void loan_expires_cpp() { // CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) // CHECK: Expire (LoanID: [[L_OBJ]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] -// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] // FIXME: No expire for Trivial Destructors @@ -96,10 +78,6 @@ void loan_expires_trivial() { // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] -// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] - // CHECK-LABEL: Function: conditional void conditional(bool condition) { @@ -119,13 +97,6 @@ void conditional(bool condition) { // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]] -// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]] // CHECK-LABEL: Function: pointers_in_a_cycle @@ -161,25 +132,6 @@ void pointers_in_a_cycle(bool condition) { // CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]]) } } -// At the end of the analysis, the origins for the pointers involved in the cycle -// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]] - // CHECK-LABEL: Function: overwrite_origin void overwrite_origin() { @@ -195,10 +147,6 @@ void overwrite_origin() { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK: LoanPropagation results: -// CHECK: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] - // CHECK-LABEL: Function: reassign_to_null void reassign_to_null() { @@ -213,8 +161,6 @@ void reassign_to_null() { } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. -// CHECK: LoanPropagation results: -// CHECK: Origin [[O_P]] contains no loans // CHECK-LABEL: Function: reassign_in_if @@ -235,11 +181,6 @@ void reassign_in_if(bool condition) { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] // CHECK-LABEL: Function: assign_in_switch @@ -276,14 +217,6 @@ void assign_in_switch(int mode) { // CHECK-DAG: Expire (LoanID: [[L_S2]]) // CHECK-DAG: Expire (LoanID: [[L_S1]]) } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]] - // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { @@ -299,10 +232,6 @@ void loan_in_loop(bool condition) { // CHECK: Expire (LoanID: [[L_INNER]]) } } -// CHECK: LoanPropagation results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] - // CHECK-LABEL: Function: loop_with_break void loop_with_break(int count) { @@ -326,13 +255,6 @@ void loop_with_break(int count) { // CHECK: Expire (LoanID: [[L_S1]]) } -// CHECK-LABEL: LoanPropagation results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] - - // CHECK-LABEL: Function: nested_scopes void nested_scopes() { MyObj* p = nullptr; @@ -355,13 +277,6 @@ void nested_scopes() { // CHECK: Expire (LoanID: [[L_OUTER]]) } -// CHECK-LABEL: LoanPropagation results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]] -// CHECK-NOT: Origin [[O_P]] contains Loan [[L_OUTER]] - - // CHECK-LABEL: Function: pointer_indirection void pointer_indirection() { int a; diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 059a74843155c..52e7d2854633d 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -4,6 +4,7 @@ add_clang_unittest(ClangAnalysisTests CloneDetectionTest.cpp ExprMutationAnalyzerTest.cpp IntervalPartitionTest.cpp + LifetimeSafetyTest.cpp MacroExpansionContextTest.cpp UnsafeBufferUsageTest.cpp CLANG_LIBS diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp new file mode 100644 index 0000000000000..b08159714e78a --- /dev/null +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -0,0 +1,439 @@ +//===- LifetimeSafetyTest.cpp - Lifetime Safety Tests -*---------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Testing/TestAST.h" +#include "llvm/ADT/StringMap.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include + +namespace clang::lifetimes::internal { +namespace { + +using namespace ast_matchers; +using ::testing::UnorderedElementsAreArray; + +// A helper class to run the full lifetime analysis on a piece of code +// and provide an interface for querying the results. +class LifetimeTestRunner { +public: + LifetimeTestRunner(llvm::StringRef Code) { + std::string FullCode = R"( + #define POINT(name) void("__lifetime_test_point_" #name) + struct MyObj { ~MyObj() {} int i; }; + )"; + FullCode += Code.str(); + + AST = std::make_unique(FullCode); + ASTCtx = &AST->context(); + + // Find the target function using AST matchers. + auto MatchResult = + match(functionDecl(hasName("target")).bind("target"), *ASTCtx); + auto *FD = selectFirst("target", MatchResult); + if (!FD) { + ADD_FAILURE() << "Test case must have a function named 'target'"; + return; + } + AnalysisCtx = std::make_unique(nullptr, FD); + AnalysisCtx->getCFGBuildOptions().setAllAlwaysAdd(); + + // Run the main analysis. + Analysis = std::make_unique(*AnalysisCtx); + Analysis->run(); + + AnnotationToPointMap = Analysis->getTestPoints(); + } + + LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } + ASTContext &getASTContext() { return *ASTCtx; } + + ProgramPoint getProgramPoint(llvm::StringRef Annotation) { + auto It = AnnotationToPointMap.find(Annotation); + if (It == AnnotationToPointMap.end()) { + ADD_FAILURE() << "Annotation '" << Annotation << "' not found."; + return nullptr; + } + return It->second; + } + +private: + std::unique_ptr AST; + ASTContext *ASTCtx = nullptr; + std::unique_ptr AnalysisCtx; + std::unique_ptr Analysis; + llvm::StringMap AnnotationToPointMap; +}; + +// A convenience wrapper that uses the LifetimeSafetyAnalysis public API. +class LifetimeTestHelper { +public: + LifetimeTestHelper(LifetimeTestRunner &Runner) + : Runner(Runner), Analysis(Runner.getAnalysis()) {} + + std::optional getOriginForDecl(llvm::StringRef VarName) { + auto *VD = findDecl(VarName); + if (!VD) + return std::nullopt; + auto OID = Analysis.getOriginIDForDecl(VD); + if (!OID) + ADD_FAILURE() << "Origin for '" << VarName << "' not found."; + return OID; + } + + std::optional getLoanForVar(llvm::StringRef VarName) { + auto *VD = findDecl(VarName); + if (!VD) + return std::nullopt; + std::vector LID = Analysis.getLoanIDForVar(VD); + if (LID.empty()) { + ADD_FAILURE() << "Loan for '" << VarName << "' not found."; + return std::nullopt; + } + // TODO: Support retrieving more than one loans to a var. + if (LID.size() > 1) { + ADD_FAILURE() << "More than 1 loans found for '" << VarName; + return std::nullopt; + } + return LID[0]; + } + + std::optional getLoansAtPoint(OriginID OID, + llvm::StringRef Annotation) { + ProgramPoint PP = Runner.getProgramPoint(Annotation); + if (!PP) + return std::nullopt; + return Analysis.getLoansAtPoint(OID, PP); + } + +private: + template DeclT *findDecl(llvm::StringRef Name) { + auto &Ctx = Runner.getASTContext(); + auto Results = match(valueDecl(hasName(Name)).bind("v"), Ctx); + if (Results.empty()) { + ADD_FAILURE() << "Declaration '" << Name << "' not found in AST."; + return nullptr; + } + return const_cast(selectFirst("v", Results)); + } + + LifetimeTestRunner &Runner; + LifetimeSafetyAnalysis &Analysis; +}; + +// ========================================================================= // +// GTest Matchers & Fixture +// ========================================================================= // + +// It holds the name of the origin variable and a reference to the helper. +class OriginInfo { +public: + OriginInfo(llvm::StringRef OriginVar, LifetimeTestHelper &Helper) + : OriginVar(OriginVar), Helper(Helper) {} + llvm::StringRef OriginVar; + LifetimeTestHelper &Helper; +}; + +/// Matcher to verify the set of loans held by an origin at a specific +/// program point. +/// +/// This matcher is intended to be used with an \c OriginInfo object. +/// +/// \param LoanVars A vector of strings, where each string is the name of a +/// variable expected to be the source of a loan. +/// \param Annotation A string identifying the program point (created with +/// POINT()) where the check should be performed. +MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { + const OriginInfo &Info = arg; + std::optional OIDOpt = Info.Helper.getOriginForDecl(Info.OriginVar); + if (!OIDOpt) { + *result_listener << "could not find origin for '" << Info.OriginVar.str() + << "'"; + return false; + } + + std::optional ActualLoansSetOpt = + Info.Helper.getLoansAtPoint(*OIDOpt, Annotation); + if (!ActualLoansSetOpt) { + *result_listener << "could not get a valid loan set at point '" + << Annotation << "'"; + return false; + } + std::vector ActualLoans(ActualLoansSetOpt->begin(), + ActualLoansSetOpt->end()); + + std::vector ExpectedLoans; + for (const auto &LoanVar : LoanVars) { + std::optional ExpectedLIDOpt = Info.Helper.getLoanForVar(LoanVar); + if (!ExpectedLIDOpt) { + *result_listener << "could not find loan for var '" << LoanVar << "'"; + return false; + } + ExpectedLoans.push_back(*ExpectedLIDOpt); + } + + return ExplainMatchResult(UnorderedElementsAreArray(ExpectedLoans), + ActualLoans, result_listener); +} + +// Base test fixture to manage the runner and helper. +class LifetimeAnalysisTest : public ::testing::Test { +protected: + void SetupTest(llvm::StringRef Code) { + Runner = std::make_unique(Code); + Helper = std::make_unique(*Runner); + } + + OriginInfo Origin(llvm::StringRef OriginVar) { + return OriginInfo(OriginVar, *Helper); + } + + // Factory function that hides the std::vector creation. + auto HasLoansTo(std::initializer_list LoanVars, + const char *Annotation) { + return HasLoansToImpl(std::vector(LoanVars), Annotation); + } + + std::unique_ptr Runner; + std::unique_ptr Helper; +}; + +// ========================================================================= // +// TESTS +// ========================================================================= // + +TEST_F(LifetimeAnalysisTest, SimpleLoanAndOrigin) { + SetupTest(R"( + void target() { + int x; + int* p = &x; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"x"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, OverwriteOrigin) { + SetupTest(R"( + void target() { + MyObj s1, s2; + + MyObj* p = &s1; + POINT(after_s1); + + p = &s2; + POINT(after_s2); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_s1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_s2")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalLoan) { + SetupTest(R"( + void target(bool cond) { + int a, b; + int *p = nullptr; + if (cond) { + p = &a; + POINT(after_then); + } else { + p = &b; + POINT(after_else); + } + POINT(after_if); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "after_then")); + EXPECT_THAT(Origin("p"), HasLoansTo({"b"}, "after_else")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a", "b"}, "after_if")); +} + +TEST_F(LifetimeAnalysisTest, PointerChain) { + SetupTest(R"( + void target() { + MyObj y; + MyObj* ptr1 = &y; + POINT(p1); + + MyObj* ptr2 = ptr1; + POINT(p2); + + ptr2 = ptr1; + POINT(p3); + + ptr2 = ptr2; // Self assignment + POINT(p4); + } + )"); + EXPECT_THAT(Origin("ptr1"), HasLoansTo({"y"}, "p1")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p2")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p3")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p4")); +} + +TEST_F(LifetimeAnalysisTest, ReassignToNull) { + SetupTest(R"( + void target() { + MyObj s1; + MyObj* p = &s1; + POINT(before_null); + p = nullptr; + POINT(after_null); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_null")); + // After assigning to null, the origin for `p` should have no loans. + EXPECT_THAT(Origin("p"), HasLoansTo({}, "after_null")); +} + +TEST_F(LifetimeAnalysisTest, ReassignInIf) { + SetupTest(R"( + void target(bool condition) { + MyObj s1, s2; + MyObj* p = &s1; + POINT(before_if); + if (condition) { + p = &s2; + POINT(after_reassign); + } + POINT(after_if); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_if")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_reassign")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_if")); +} + +TEST_F(LifetimeAnalysisTest, AssignInSwitch) { + SetupTest(R"( + void target(int mode) { + MyObj s1, s2, s3; + MyObj* p = nullptr; + switch (mode) { + case 1: + p = &s1; + POINT(case1); + break; + case 2: + p = &s2; + POINT(case2); + break; + default: + p = &s3; + POINT(case3); + break; + } + POINT(after_switch); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "case1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "case2")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s3"}, "case3")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2", "s3"}, "after_switch")); +} + +TEST_F(LifetimeAnalysisTest, LoanInLoop) { + SetupTest(R"( + void target(bool condition) { + MyObj* p = nullptr; + while (condition) { + MyObj inner; + p = &inner; + POINT(in_loop); + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "in_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, LoopWithBreak) { + SetupTest(R"( + void target(int count) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; + POINT(before_loop); + for (int i = 0; i < count; ++i) { + if (i == 5) { + p = &s2; + POINT(inside_if); + break; + } + POINT(after_if); + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "inside_if")); + // At the join point after if, s2 cannot make it to p without the if. + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_if")); + // At the join point after the loop, p could hold a loan to s1 (if the loop + // completed normally) or to s2 (if the loop was broken). + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, PointersInACycle) { + SetupTest(R"( + void target(bool condition) { + MyObj v1, v2, v3; + MyObj *p1 = &v1, *p2 = &v2, *p3 = &v3; + + POINT(before_while); + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = temp; + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while")); + EXPECT_THAT(Origin("p3"), HasLoansTo({"v3"}, "before_while")); + + // At the fixed point after the loop, all pointers could point to any of + // the three variables. + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + EXPECT_THAT(Origin("p3"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, NestedScopes) { + SetupTest(R"( + void target() { + MyObj* p = nullptr; + { + MyObj outer; + p = &outer; + POINT(before_inner_scope); + { + MyObj inner; + p = &inner; + POINT(inside_inner_scope); + } // inner expires + POINT(after_inner_scope); + } // outer expires + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"outer"}, "before_inner_scope")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "inside_inner_scope")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_inner_scope")); +} + +} // anonymous namespace +} // namespace clang::lifetimes::internal From 01e7a2db3541f913f8d7ce3ce9e4e8d1eb7200b6 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 23 Jul 2025 12:14:29 +0200 Subject: [PATCH 05/35] [LifetimeSafety] Add loan expiry analysis (#148712) This PR adds the `ExpiredLoansAnalysis` class to track which loans have expired. The analysis uses a dataflow lattice (`ExpiredLattice`) to maintain the set of expired loans at each program point. This is a very light weight dataflow analysis and is expected to reach fixed point in ~2 iterations. In principle, this does not need a dataflow analysis but is used for convenience in favour of lean code. --- .../clang/Analysis/Analyses/LifetimeSafety.h | 9 + clang/lib/Analysis/LifetimeSafety.cpp | 101 ++++++- .../unittests/Analysis/LifetimeSafetyTest.cpp | 279 +++++++++++++++++- 3 files changed, 379 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index beeb0aaba5d0d..1c00558d32f63 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -33,6 +33,7 @@ namespace internal { class Fact; class FactManager; class LoanPropagationAnalysis; +class ExpiredLoansAnalysis; struct LifetimeFactory; /// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. @@ -52,6 +53,10 @@ template struct ID { IDBuilder.AddInteger(Value); } }; +template +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { + return OS << ID.Value; +} using LoanID = ID; using OriginID = ID; @@ -81,6 +86,9 @@ class LifetimeSafetyAnalysis { /// Returns the set of loans an origin holds at a specific program point. LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; + /// Returns the set of loans that have expired at a specific program point. + LoanSet getExpiredLoansAtPoint(ProgramPoint PP) const; + /// Finds the OriginID for a given declaration. /// Returns a null optional if not found. std::optional getOriginIDForDecl(const ValueDecl *D) const; @@ -105,6 +113,7 @@ class LifetimeSafetyAnalysis { std::unique_ptr Factory; std::unique_ptr FactMgr; std::unique_ptr LoanPropagation; + std::unique_ptr ExpiredLoans; }; } // namespace internal } // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index ae6ec9f76cbf6..94b8197bbf6f3 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -23,15 +23,10 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" #include +#include namespace clang::lifetimes { namespace internal { -namespace { -template -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { - return OS << ID.Value; -} -} // namespace /// Represents the storage location being borrowed, e.g., a specific stack /// variable. @@ -832,6 +827,91 @@ class LoanPropagationAnalysis } }; +// ========================================================================= // +// Expired Loans Analysis +// ========================================================================= // + +/// The dataflow lattice for tracking the set of expired loans. +struct ExpiredLattice { + LoanSet Expired; + + ExpiredLattice() : Expired(nullptr) {}; + explicit ExpiredLattice(LoanSet S) : Expired(S) {} + + bool operator==(const ExpiredLattice &Other) const { + return Expired == Other.Expired; + } + bool operator!=(const ExpiredLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "ExpiredLattice State:\n"; + if (Expired.isEmpty()) + OS << " \n"; + for (const LoanID &LID : Expired) + OS << " Loan " << LID << " is expired\n"; + } +}; + +/// The analysis that tracks which loans have expired. +class ExpiredLoansAnalysis + : public DataflowAnalysis { + + LoanSet::Factory &Factory; + +public: + ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LifetimeFactory &Factory) + : DataflowAnalysis(C, AC, F), Factory(Factory.LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "ExpiredLoans"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptySet()); } + + /// Merges two lattices by taking the union of the expired loan sets. + Lattice join(Lattice L1, Lattice L2) const { + return Lattice(utils::join(L1.Expired, L2.Expired, Factory)); + } + + Lattice transfer(Lattice In, const ExpireFact &F) { + return Lattice(Factory.add(In.Expired, F.getLoanID())); + } + + // Removes the loan from the set of expired loans. + // + // When a loan is re-issued (e.g., in a loop), it is no longer considered + // expired. A loan can be in the expired set at the point of issue due to + // the dataflow state from a previous loop iteration being propagated along + // a backedge in the CFG. + // + // Note: This has a subtle false-negative though where a loan from previous + // iteration is not overwritten by a reissue. This needs careful tracking + // of loans "across iterations" which can be considered for future + // enhancements. + // + // void foo(int safe) { + // int* p = &safe; + // int* q = &safe; + // while (condition()) { + // int x = 1; + // p = &x; // A loan to 'x' is issued to 'p' in every iteration. + // if (condition()) { + // q = p; + // } + // (void)*p; // OK — 'p' points to 'x' from new iteration. + // (void)*q; // UaF - 'q' still points to 'x' from previous iteration + // // which is now destroyed. + // } + // } + Lattice transfer(Lattice In, const IssueFact &F) { + return Lattice(Factory.remove(In.Expired, F.getLoanID())); + } +}; + // ========================================================================= // // TODO: // - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)` @@ -873,6 +953,10 @@ void LifetimeSafetyAnalysis::run() { LoanPropagation = std::make_unique(Cfg, AC, *FactMgr, *Factory); LoanPropagation->run(); + + ExpiredLoans = + std::make_unique(Cfg, AC, *FactMgr, *Factory); + ExpiredLoans->run(); } LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, @@ -881,6 +965,11 @@ LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, return LoanPropagation->getLoans(OID, PP); } +LoanSet LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const { + assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run."); + return ExpiredLoans->getState(PP).Expired; +} + std::optional LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { assert(FactMgr && "FactManager not initialized"); diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index b08159714e78a..a48dc45e4f806 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -45,7 +45,10 @@ class LifetimeTestRunner { return; } AnalysisCtx = std::make_unique(nullptr, FD); - AnalysisCtx->getCFGBuildOptions().setAllAlwaysAdd(); + CFG::BuildOptions &BuildOptions = AnalysisCtx->getCFGBuildOptions(); + BuildOptions.setAllAlwaysAdd(); + BuildOptions.AddImplicitDtors = true; + BuildOptions.AddTemporaryDtors = true; // Run the main analysis. Analysis = std::make_unique(*AnalysisCtx); @@ -115,6 +118,13 @@ class LifetimeTestHelper { return Analysis.getLoansAtPoint(OID, PP); } + std::optional getExpiredLoansAtPoint(llvm::StringRef Annotation) { + ProgramPoint PP = Runner.getProgramPoint(Annotation); + if (!PP) + return std::nullopt; + return Analysis.getExpiredLoansAtPoint(PP); + } + private: template DeclT *findDecl(llvm::StringRef Name) { auto &Ctx = Runner.getASTContext(); @@ -134,6 +144,15 @@ class LifetimeTestHelper { // GTest Matchers & Fixture // ========================================================================= // +// A helper class to represent a set of loans, identified by variable names. +class LoanSetInfo { +public: + LoanSetInfo(const std::vector &Vars, LifetimeTestHelper &H) + : LoanVars(Vars), Helper(H) {} + std::vector LoanVars; + LifetimeTestHelper &Helper; +}; + // It holds the name of the origin variable and a reference to the helper. class OriginInfo { public: @@ -185,6 +204,34 @@ MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { ActualLoans, result_listener); } +/// Matcher to verify that the complete set of expired loans at a program point +/// matches the expected loan set. +MATCHER_P(AreExpiredAt, Annotation, "") { + const LoanSetInfo &Info = arg; + auto &Helper = Info.Helper; + + auto ActualExpiredSetOpt = Helper.getExpiredLoansAtPoint(Annotation); + if (!ActualExpiredSetOpt) { + *result_listener << "could not get a valid expired loan set at point '" + << Annotation << "'"; + return false; + } + std::vector ActualExpiredLoans(ActualExpiredSetOpt->begin(), + ActualExpiredSetOpt->end()); + std::vector ExpectedExpiredLoans; + for (const auto &VarName : Info.LoanVars) { + auto LoanIDOpt = Helper.getLoanForVar(VarName); + if (!LoanIDOpt) { + *result_listener << "could not find a loan for variable '" << VarName + << "'"; + return false; + } + ExpectedExpiredLoans.push_back(*LoanIDOpt); + } + return ExplainMatchResult(UnorderedElementsAreArray(ExpectedExpiredLoans), + ActualExpiredLoans, result_listener); +} + // Base test fixture to manage the runner and helper. class LifetimeAnalysisTest : public ::testing::Test { protected: @@ -197,6 +244,14 @@ class LifetimeAnalysisTest : public ::testing::Test { return OriginInfo(OriginVar, *Helper); } + /// Factory function that hides the std::vector creation. + LoanSetInfo LoansTo(std::initializer_list LoanVars) { + return LoanSetInfo({LoanVars}, *Helper); + } + + /// A convenience helper for asserting that no loans are expired. + LoanSetInfo NoLoans() { return LoansTo({}); } + // Factory function that hides the std::vector creation. auto HasLoansTo(std::initializer_list LoanVars, const char *Annotation) { @@ -292,7 +347,6 @@ TEST_F(LifetimeAnalysisTest, ReassignToNull) { } )"); EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_null")); - // After assigning to null, the origin for `p` should have no loans. EXPECT_THAT(Origin("p"), HasLoansTo({}, "after_null")); } @@ -347,15 +401,22 @@ TEST_F(LifetimeAnalysisTest, LoanInLoop) { void target(bool condition) { MyObj* p = nullptr; while (condition) { + POINT(start_loop); MyObj inner; p = &inner; - POINT(in_loop); + POINT(end_loop); } POINT(after_loop); } )"); - EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "in_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "start_loop")); + EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("start_loop")); + + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "end_loop")); + EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_loop")); + EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("after_loop")); } TEST_F(LifetimeAnalysisTest, LoopWithBreak) { @@ -413,6 +474,44 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) { EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); } +TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) { + SetupTest(R"( + void target(bool condition) { + MyObj v1, v2; + MyObj *p1 = &v1, *p2 = &v2; + + POINT(before_while); + while (condition) { + POINT(in_loop_before_temp); + MyObj temp; + p1 = &temp; + POINT(in_loop_after_temp); + + MyObj* q = p1; + p1 = p2; + p2 = q; + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while")); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_while")); + + EXPECT_THAT(Origin("p1"), + HasLoansTo({"v1", "v2", "temp"}, "in_loop_before_temp")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_before_temp")); + EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("in_loop_before_temp")); + + EXPECT_THAT(Origin("p1"), HasLoansTo({"temp"}, "in_loop_after_temp")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_after_temp")); + EXPECT_THAT(NoLoans(), AreExpiredAt("in_loop_after_temp")); + + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "temp"}, "after_loop")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "after_loop")); + EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("after_loop")); +} + TEST_F(LifetimeAnalysisTest, NestedScopes) { SetupTest(R"( void target() { @@ -435,5 +534,177 @@ TEST_F(LifetimeAnalysisTest, NestedScopes) { EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_inner_scope")); } +TEST_F(LifetimeAnalysisTest, SimpleExpiry) { + SetupTest(R"( + void target() { + MyObj* p = nullptr; + { + MyObj s; + p = &s; + POINT(before_expiry); + } // s goes out of scope here + POINT(after_expiry); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry")); + EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_expiry")); +} + +TEST_F(LifetimeAnalysisTest, NestedExpiry) { + SetupTest(R"( + void target() { + MyObj s1; + MyObj* p = &s1; + POINT(before_inner); + { + MyObj s2; + p = &s2; + POINT(in_inner); + } // s2 expires + POINT(after_inner); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_inner")); + EXPECT_THAT(NoLoans(), AreExpiredAt("in_inner")); + EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_inner")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalExpiry) { + SetupTest(R"( + void target(bool cond) { + MyObj s1; + MyObj* p = &s1; + POINT(before_if); + if (cond) { + MyObj s2; + p = &s2; + POINT(then_block); + } // s2 expires here + POINT(after_if); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_if")); + EXPECT_THAT(NoLoans(), AreExpiredAt("then_block")); + EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_if")); +} + +TEST_F(LifetimeAnalysisTest, LoopExpiry) { + SetupTest(R"( + void target() { + MyObj *p = nullptr; + for (int i = 0; i < 2; ++i) { + POINT(start_loop); + MyObj s; + p = &s; + POINT(end_loop); + } // s expires here on each iteration + POINT(after_loop); + } + )"); + EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("start_loop")); + EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop")); + EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_loop")); +} + +TEST_F(LifetimeAnalysisTest, MultipleExpiredLoans) { + SetupTest(R"( + void target() { + MyObj *p1, *p2, *p3; + { + MyObj s1; + p1 = &s1; + POINT(p1); + } // s1 expires + POINT(p2); + { + MyObj s2; + p2 = &s2; + MyObj s3; + p3 = &s3; + POINT(p3); + } // s2, s3 expire + POINT(p4); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("p1")); + EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p2")); + EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p3")); + EXPECT_THAT(LoansTo({"s1", "s2", "s3"}), AreExpiredAt("p4")); +} + +TEST_F(LifetimeAnalysisTest, GotoJumpsOutOfScope) { + SetupTest(R"( + void target(bool cond) { + MyObj *p = nullptr; + { + MyObj s; + p = &s; + POINT(before_goto); + if (cond) { + goto end; + } + } // `s` expires here on the path that doesn't jump + POINT(after_scope); + end: + POINT(after_goto); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_goto")); + EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_scope")); + EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_goto")); +} + +TEST_F(LifetimeAnalysisTest, ContinueInLoop) { + SetupTest(R"( + void target(int count) { + MyObj *p = nullptr; + MyObj outer; + p = &outer; + POINT(before_loop); + + for (int i = 0; i < count; ++i) { + if (i % 2 == 0) { + MyObj s_even; + p = &s_even; + POINT(in_even_iter); + continue; + } + MyObj s_odd; + p = &s_odd; + POINT(in_odd_iter); + } + POINT(after_loop); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("before_loop")); + EXPECT_THAT(LoansTo({"s_odd"}), AreExpiredAt("in_even_iter")); + EXPECT_THAT(LoansTo({"s_even"}), AreExpiredAt("in_odd_iter")); + EXPECT_THAT(LoansTo({"s_even", "s_odd"}), AreExpiredAt("after_loop")); +} + +TEST_F(LifetimeAnalysisTest, ReassignedPointerThenOriginalExpires) { + SetupTest(R"( + void target() { + MyObj* p = nullptr; + { + MyObj s1; + p = &s1; + POINT(p_has_s1); + { + MyObj s2; + p = &s2; + POINT(p_has_s2); + } + POINT(p_after_s2_expires); + } // s1 expires here. + POINT(p_after_s1_expires); + } + )"); + EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s1")); + EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s2")); + EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("p_after_s2_expires")); + EXPECT_THAT(LoansTo({"s1", "s2"}), AreExpiredAt("p_after_s1_expires")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From 99e0f66f512c477444bb2fb1719a118e7a19f37a Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Sun, 3 Aug 2025 21:45:36 +0200 Subject: [PATCH 06/35] [LifetimeSafety] Handle pruned-edges (null blocks) in dataflow (#150670) Fix a crash in the lifetime safety dataflow analysis when handling null CFG blocks. Added a null check for adjacent blocks in the dataflow analysis algorithm to prevent dereferencing null pointers. This occurs when processing CFG blocks with unreachable successors or predecessors. Original crash: https://compiler-explorer.com/z/qfzfqG5vM Fixes https://github.com/llvm/llvm-project/issues/150095 --- clang/lib/Analysis/LifetimeSafety.cpp | 2 ++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 94b8197bbf6f3..f39998cca56fe 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -603,6 +603,8 @@ class DataflowAnalysis { OutStates[B] = StateOut; Visited.set(B->getBlockID()); for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; Lattice OldInState = getInState(AdjacentB); Lattice NewInState = D.join(OldInState, StateOut); // Enqueue the adjacent block if its in-state has changed or if we have diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index a48dc45e4f806..7cd679e184f6c 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -512,6 +512,25 @@ TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) { EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("after_loop")); } +TEST_F(LifetimeAnalysisTest, InfiniteLoopPrunesEdges) { + SetupTest(R"( + void target(MyObj out) { + MyObj *p = &out; + POINT(before_loop); + + for (;;) { + POINT(begin); + MyObj in; + p = ∈ + POINT(end); + } + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"out"}, "before_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"in", "out"}, "begin")); + EXPECT_THAT(Origin("p"), HasLoansTo({"in"}, "end")); +} + TEST_F(LifetimeAnalysisTest, NestedScopes) { SetupTest(R"( void target() { From ea493b97abd59f784aaf4a4e27563fa01707a590 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Mon, 18 Aug 2025 13:46:43 +0200 Subject: [PATCH 07/35] [LifetimeSafety] Implement a basic use-after-free diagnostic (#149731) Implement use-after-free detection in the lifetime safety analysis with two warning levels. - Added a `LifetimeSafetyReporter` interface for reporting lifetime safety issues - Created two warning levels: - Definite errors (reported with `-Wexperimental-lifetime-safety-permissive`) - Potential errors (reported with `-Wexperimental-lifetime-safety-strict`) - Implemented a `LifetimeChecker` class that analyzes loan propagation and expired loans to detect use-after-free issues. - Added tracking of use sites through a new `UseFact` class. - Enhanced the `ExpireFact` to track the expressions where objects are destroyed. - Added test cases for both definite and potential use-after-free scenarios. The implementation now tracks pointer uses and can determine when a pointer is dereferenced after its loan has been expired, with appropriate diagnostics. The two warning levels provide flexibility - definite errors for high-confidence issues and potential errors for cases that depend on control flow. --- .../clang/Analysis/Analyses/LifetimeSafety.h | 51 +++- clang/include/clang/Basic/DiagnosticGroups.td | 9 +- .../clang/Basic/DiagnosticSemaKinds.td | 12 +- clang/lib/Analysis/LifetimeSafety.cpp | 264 ++++++++++++++--- clang/lib/Sema/AnalysisBasedWarnings.cpp | 31 +- clang/test/Sema/warn-lifetime-safety.cpp | 273 ++++++++++++++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 13 +- 7 files changed, 592 insertions(+), 61 deletions(-) create mode 100644 clang/test/Sema/warn-lifetime-safety.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 1c00558d32f63..7e1bfc903083e 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -19,14 +19,35 @@ #define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/StringMap.h" #include namespace clang::lifetimes { +/// Enum to track the confidence level of a potential error. +enum class Confidence { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + /// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC); +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); namespace internal { // Forward declarations of internal types. @@ -53,6 +74,7 @@ template struct ID { IDBuilder.AddInteger(Value); } }; + template inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { return OS << ID.Value; @@ -78,7 +100,8 @@ using ProgramPoint = const Fact *; /// encapsulates the various dataflow analyses. class LifetimeSafetyAnalysis { public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC); + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); ~LifetimeSafetyAnalysis(); void run(); @@ -87,7 +110,7 @@ class LifetimeSafetyAnalysis { LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; /// Returns the set of loans that have expired at a specific program point. - LoanSet getExpiredLoansAtPoint(ProgramPoint PP) const; + std::vector getExpiredLoansAtPoint(ProgramPoint PP) const; /// Finds the OriginID for a given declaration. /// Returns a null optional if not found. @@ -110,6 +133,7 @@ class LifetimeSafetyAnalysis { private: AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; std::unique_ptr Factory; std::unique_ptr FactMgr; std::unique_ptr LoanPropagation; @@ -118,4 +142,25 @@ class LifetimeSafetyAnalysis { } // namespace internal } // namespace clang::lifetimes +namespace llvm { +template +struct DenseMapInfo> { + using ID = clang::lifetimes::internal::ID; + + static inline ID getEmptyKey() { + return {DenseMapInfo::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b986f93200452..1e3c0e852e8e2 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -535,7 +535,14 @@ def Dangling : DiagGroup<"dangling", [DanglingAssignment, DanglingGsl, ReturnStackAddress]>; -def LifetimeSafety : DiagGroup<"experimental-lifetime-safety">; +def LifetimeSafetyPermissive : DiagGroup<"experimental-lifetime-safety-permissive">; +def LifetimeSafetyStrict : DiagGroup<"experimental-lifetime-safety-strict">; +def LifetimeSafety : DiagGroup<"experimental-lifetime-safety", + [LifetimeSafetyPermissive, LifetimeSafetyStrict]> { + code Documentation = [{ + Experimental warnings to detect use-after-free and related temporal safety bugs based on lifetime safety analysis. + }]; +} def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6749e2fd3502f..218465f66ca43 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10727,9 +10727,15 @@ def warn_dangling_reference_captured_by_unknown : Warning< "object whose reference is captured will be destroyed at the end of " "the full-expression">, InGroup; -def warn_experimental_lifetime_safety_dummy_warning : Warning< - "todo: remove this warning after we have atleast one warning based on the lifetime analysis">, - InGroup, DefaultIgnore; +// Diagnostics based on the Lifetime safety analysis. +def warn_lifetime_safety_loan_expires_permissive : Warning< + "object whose reference is captured does not live long enough">, + InGroup, DefaultIgnore; +def warn_lifetime_safety_loan_expires_strict : Warning< + "object whose reference is captured may not live long enough">, + InGroup, DefaultIgnore; +def note_lifetime_safety_used_here : Note<"later used here">; +def note_lifetime_safety_destroyed_here : Note<"destroyed here">; // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index f39998cca56fe..ba9f7d0f6ee36 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -45,10 +45,11 @@ struct Loan { /// is represented as empty LoanSet LoanID ID; AccessPath Path; - SourceLocation IssueLoc; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; - Loan(LoanID id, AccessPath path, SourceLocation loc) - : ID(id), Path(path), IssueLoc(loc) {} + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} }; /// An Origin is a symbolic identifier that represents the set of possible @@ -82,8 +83,8 @@ class LoanManager { public: LoanManager() = default; - Loan &addLoan(AccessPath Path, SourceLocation Loc) { - AllLoans.emplace_back(getNextLoanID(), Path, Loc); + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); return AllLoans.back(); } @@ -199,6 +200,8 @@ class Fact { AssignOrigin, /// An origin escapes the function by flowing into the return value. ReturnOfOrigin, + /// An origin is used (eg. dereferencing a pointer). + Use, /// A marker for a specific point in the code, for testing. TestPoint, }; @@ -242,12 +245,17 @@ class IssueFact : public Fact { class ExpireFact : public Fact { LoanID LID; + SourceLocation ExpiryLoc; public: static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + void dump(llvm::raw_ostream &OS) const override { OS << "Expire (LoanID: " << getLoanID() << ")\n"; } @@ -287,6 +295,24 @@ class ReturnOfOriginFact : public Fact { } }; +class UseFact : public Fact { + OriginID UsedOrigin; + const Expr *UseExpr; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(OriginID UsedOrigin, const Expr *UseExpr) + : Fact(Kind::Use), UsedOrigin(UsedOrigin), UseExpr(UseExpr) {} + + OriginID getUsedOrigin() const { return UsedOrigin; } + const Expr *getUseExpr() const { return UseExpr; } + + void dump(llvm::raw_ostream &OS) const override { + OS << "Use (OriginID: " << UsedOrigin << ")\n"; + } +}; + /// A dummy-fact used to mark a specific point in the code for testing. /// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. class TestPointFact : public Fact { @@ -417,13 +443,17 @@ class FactGenerator : public ConstStmtVisitor { if (VD->hasLocalStorage()) { OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); AccessPath AddrOfLocalVarPath(VD); - const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, - UO->getOperatorLoc()); + const Loan &L = + FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, UO); CurrentBlockFacts.push_back( FactMgr.createFact(L.ID, OID)); } } } + } else if (UO->getOpcode() == UO_Deref) { + // This is a pointer use, like '*p'. + OriginID OID = FactMgr.getOriginMgr().get(*UO->getSubExpr()); + CurrentBlockFacts.push_back(FactMgr.createFact(OID, UO)); } } @@ -492,7 +522,8 @@ class FactGenerator : public ConstStmtVisitor { // Check if the loan is for a stack variable and if that variable // is the one being destructed. if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact(L.ID)); + CurrentBlockFacts.push_back(FactMgr.createFact( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); } } @@ -618,6 +649,7 @@ class DataflowAnalysis { } } +protected: Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); } @@ -665,6 +697,8 @@ class DataflowAnalysis { return D->transfer(In, *F->getAs()); case Fact::Kind::ReturnOfOrigin: return D->transfer(In, *F->getAs()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs()); case Fact::Kind::TestPoint: return D->transfer(In, *F->getAs()); } @@ -676,6 +710,7 @@ class DataflowAnalysis { Lattice transfer(Lattice In, const ExpireFact &) { return In; } Lattice transfer(Lattice In, const AssignOriginFact &) { return In; } Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } Lattice transfer(Lattice In, const TestPointFact &) { return In; } }; @@ -693,6 +728,20 @@ static llvm::ImmutableSet join(llvm::ImmutableSet A, return A; } +/// Checks if set A is a subset of set B. +template +static bool isSubsetOf(const llvm::ImmutableSet &A, + const llvm::ImmutableSet &B) { + // Empty set is a subset of all sets. + if (A.isEmpty()) + return true; + + for (const T &Elem : A) + if (!B.contains(Elem)) + return false; + return true; +} + /// Computes the key-wise union of two ImmutableMaps. // TODO(opt): This key-wise join is a performance bottleneck. A more // efficient merge could be implemented using a Patricia Trie or HAMT @@ -700,7 +749,7 @@ static llvm::ImmutableSet join(llvm::ImmutableSet A, template static llvm::ImmutableMap join(llvm::ImmutableMap A, llvm::ImmutableMap B, - typename llvm::ImmutableMap::Factory &F, Joiner joinValues) { + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues) { if (A.getHeight() < B.getHeight()) std::swap(A, B); @@ -710,7 +759,7 @@ join(llvm::ImmutableMap A, llvm::ImmutableMap B, const K &Key = Entry.first; const V &ValB = Entry.second; if (const V *ValA = A.lookup(Key)) - A = F.add(A, Key, joinValues(*ValA, ValB)); + A = F.add(A, Key, JoinValues(*ValA, ValB)); else A = F.add(A, Key, ValB); } @@ -723,17 +772,14 @@ join(llvm::ImmutableMap A, llvm::ImmutableMap B, // ========================================================================= // using OriginLoanMap = llvm::ImmutableMap; +using ExpiredLoanMap = llvm::ImmutableMap; /// An object to hold the factories for immutable collections, ensuring /// that all created states share the same underlying memory management. struct LifetimeFactory { OriginLoanMap::Factory OriginMapFactory; LoanSet::Factory LoanSetFactory; - - /// Creates a singleton set containing only the given loan ID. - LoanSet createLoanSet(LoanID LID) { - return LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID); - } + ExpiredLoanMap::Factory ExpiredLoanMapFactory; }; /// Represents the dataflow lattice for loan propagation. @@ -774,13 +820,15 @@ struct LoanPropagationLattice { class LoanPropagationAnalysis : public DataflowAnalysis { - - LifetimeFactory &Factory; + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; public: LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LifetimeFactory &Factory) - : DataflowAnalysis(C, AC, F), Factory(Factory) {} + LifetimeFactory &LFactory) + : DataflowAnalysis(C, AC, F), + OriginLoanMapFactory(LFactory.OriginMapFactory), + LoanSetFactory(LFactory.LoanSetFactory) {} using Base::transfer; @@ -792,9 +840,9 @@ class LoanPropagationAnalysis // TODO(opt): Keep the state small by removing origins which become dead. Lattice join(Lattice A, Lattice B) { OriginLoanMap JoinedOrigins = - utils::join(A.Origins, B.Origins, Factory.OriginMapFactory, - [this](LoanSet S1, LoanSet S2) { - return utils::join(S1, S2, Factory.LoanSetFactory); + utils::join(A.Origins, B.Origins, OriginLoanMapFactory, + [&](LoanSet S1, LoanSet S2) { + return utils::join(S1, S2, LoanSetFactory); }); return Lattice(JoinedOrigins); } @@ -803,8 +851,9 @@ class LoanPropagationAnalysis Lattice transfer(Lattice In, const IssueFact &F) { OriginID OID = F.getOriginID(); LoanID LID = F.getLoanID(); - return LoanPropagationLattice(Factory.OriginMapFactory.add( - In.Origins, OID, Factory.createLoanSet(LID))); + return LoanPropagationLattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); } /// The destination origin's loan set is replaced by the source's. @@ -814,7 +863,7 @@ class LoanPropagationAnalysis OriginID SrcOID = F.getSrcOriginID(); LoanSet SrcLoans = getLoans(In, SrcOID); return LoanPropagationLattice( - Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans)); + OriginLoanMapFactory.add(In.Origins, DestOID, SrcLoans)); } LoanSet getLoans(OriginID OID, ProgramPoint P) { @@ -825,7 +874,7 @@ class LoanPropagationAnalysis LoanSet getLoans(Lattice L, OriginID OID) { if (auto *Loans = L.Origins.lookup(OID)) return *Loans; - return Factory.LoanSetFactory.getEmptySet(); + return LoanSetFactory.getEmptySet(); } }; @@ -835,10 +884,11 @@ class LoanPropagationAnalysis /// The dataflow lattice for tracking the set of expired loans. struct ExpiredLattice { - LoanSet Expired; + /// Map from an expired `LoanID` to the `ExpireFact` that made it expire. + ExpiredLoanMap Expired; ExpiredLattice() : Expired(nullptr) {}; - explicit ExpiredLattice(LoanSet S) : Expired(S) {} + explicit ExpiredLattice(ExpiredLoanMap M) : Expired(M) {} bool operator==(const ExpiredLattice &Other) const { return Expired == Other.Expired; @@ -851,8 +901,8 @@ struct ExpiredLattice { OS << "ExpiredLattice State:\n"; if (Expired.isEmpty()) OS << " \n"; - for (const LoanID &LID : Expired) - OS << " Loan " << LID << " is expired\n"; + for (const auto &[ID, _] : Expired) + OS << " Loan " << ID << " is expired\n"; } }; @@ -861,26 +911,31 @@ class ExpiredLoansAnalysis : public DataflowAnalysis { - LoanSet::Factory &Factory; + ExpiredLoanMap::Factory &Factory; public: ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, LifetimeFactory &Factory) - : DataflowAnalysis(C, AC, F), Factory(Factory.LoanSetFactory) {} + : DataflowAnalysis(C, AC, F), Factory(Factory.ExpiredLoanMapFactory) {} using Base::transfer; StringRef getAnalysisName() const { return "ExpiredLoans"; } - Lattice getInitialState() { return Lattice(Factory.getEmptySet()); } + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - /// Merges two lattices by taking the union of the expired loan sets. - Lattice join(Lattice L1, Lattice L2) const { - return Lattice(utils::join(L1.Expired, L2.Expired, Factory)); + /// Merges two lattices by taking the union of the two expired loans. + Lattice join(Lattice L1, Lattice L2) { + return Lattice( + utils::join(L1.Expired, L2.Expired, Factory, + // Take the last expiry fact to make this hermetic. + [](const ExpireFact *F1, const ExpireFact *F2) { + return F1->getExpiryLoc() > F2->getExpiryLoc() ? F1 : F2; + })); } Lattice transfer(Lattice In, const ExpireFact &F) { - return Lattice(Factory.add(In.Expired, F.getLoanID())); + return Lattice(Factory.add(In.Expired, F.getLoanID(), &F)); } // Removes the loan from the set of expired loans. @@ -912,15 +967,116 @@ class ExpiredLoansAnalysis Lattice transfer(Lattice In, const IssueFact &F) { return Lattice(Factory.remove(In.Expired, F.getLoanID())); } + + ExpiredLoanMap getExpiredLoans(ProgramPoint P) { return getState(P).Expired; } }; // ========================================================================= // -// TODO: -// - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)` -// - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)` -// - Using the above three to perform the final error reporting. +// Lifetime checker and Error reporter // ========================================================================= // +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap FinalWarningsMap; + LoanPropagationAnalysis &LoanPropagation; + ExpiredLoansAnalysis &ExpiredLoans; + FactManager &FactMgr; + AnalysisDeclContext &ADC; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(LoanPropagationAnalysis &LPA, ExpiredLoansAnalysis &ELA, + FactManager &FM, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) + : LoanPropagation(LPA), ExpiredLoans(ELA), FactMgr(FM), ADC(ADC), + Reporter(Reporter) {} + + void run() { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + for (const CFGBlock *B : *ADC.getAnalysis()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *UF = F->getAs()) + checkUse(UF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors for a given use of an Origin. + /// + /// This method is called for each 'UseFact' identified in the control flow + /// graph. It determines if the loans held by the used origin have expired + /// at the point of use. + void checkUse(const UseFact *UF) { + + OriginID O = UF->getUsedOrigin(); + + // Get the set of loans that the origin might hold at this program point. + LoanSet HeldLoans = LoanPropagation.getLoans(O, UF); + + // Get the set of all loans that have expired at this program point. + ExpiredLoanMap AllExpiredLoans = ExpiredLoans.getExpiredLoans(UF); + + // If the pointer holds no loans or no loans have expired, there's nothing + // to check. + if (HeldLoans.isEmpty() || AllExpiredLoans.isEmpty()) + return; + + // Identify loans that which have expired but are held by the pointer. Using + // them is a use-after-free. + llvm::SmallVector DefaultedLoans; + // A definite UaF error occurs if all loans the origin might hold have + // expired. + bool IsDefiniteError = true; + for (LoanID L : HeldLoans) { + if (AllExpiredLoans.contains(L)) + DefaultedLoans.push_back(L); + else + // If at least one loan is not expired, this use is not a definite UaF. + IsDefiniteError = false; + } + // If there are no defaulted loans, the use is safe. + if (DefaultedLoans.empty()) + return; + + // Determine the confidence level of the error (definite or maybe). + Confidence CurrentConfidence = + IsDefiniteError ? Confidence::Definite : Confidence::Maybe; + + // For each expired loan, create a pending warning. + for (LoanID DefaultedLoan : DefaultedLoans) { + // If we already have a warning for this loan with a higher or equal + // confidence, skip this one. + if (FinalWarningsMap.count(DefaultedLoan) && + CurrentConfidence <= FinalWarningsMap[DefaultedLoan].ConfidenceLevel) + continue; + + auto *EF = AllExpiredLoans.lookup(DefaultedLoan); + assert(EF && "Could not find ExpireFact for an expired loan."); + + FinalWarningsMap[DefaultedLoan] = {/*ExpiryLoc=*/(*EF)->getExpiryLoc(), + /*UseExpr=*/UF->getUseExpr(), + /*ConfidenceLevel=*/CurrentConfidence}; + } + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; + // ========================================================================= // // LifetimeSafetyAnalysis Class Implementation // ========================================================================= // @@ -928,8 +1084,9 @@ class ExpiredLoansAnalysis // We need this here for unique_ptr with forward declared class. LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC) - : AC(AC), Factory(std::make_unique()), +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter), Factory(std::make_unique()), FactMgr(std::make_unique()) {} void LifetimeSafetyAnalysis::run() { @@ -952,6 +1109,8 @@ void LifetimeSafetyAnalysis::run() { /// blocks; only Decls are visible. Therefore, loans in a block that /// never reach an Origin associated with a Decl can be safely dropped by /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. LoanPropagation = std::make_unique(Cfg, AC, *FactMgr, *Factory); LoanPropagation->run(); @@ -959,6 +1118,10 @@ void LifetimeSafetyAnalysis::run() { ExpiredLoans = std::make_unique(Cfg, AC, *FactMgr, *Factory); ExpiredLoans->run(); + + LifetimeChecker Checker(*LoanPropagation, *ExpiredLoans, *FactMgr, AC, + Reporter); + Checker.run(); } LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, @@ -967,9 +1130,13 @@ LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, return LoanPropagation->getLoans(OID, PP); } -LoanSet LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const { +std::vector +LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const { assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run."); - return ExpiredLoans->getState(PP).Expired; + std::vector Result; + for (const auto &pair : ExpiredLoans->getExpiredLoans(PP)) + Result.push_back(pair.first); + return Result; } std::optional @@ -1009,8 +1176,9 @@ llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { } } // namespace internal -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC) { - internal::LifetimeSafetyAnalysis Analysis(AC); +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); Analysis.run(); } } // namespace clang::lifetimes diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 2cf9382a90048..7c52f784e1389 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3498,6 +3498,31 @@ class CallableVisitor : public DynamicRecursiveASTVisitor { } }; +namespace clang::lifetimes { +namespace { +class LifetimeSafetyReporterImpl : public LifetimeSafetyReporter { + +public: + LifetimeSafetyReporterImpl(Sema &S) : S(S) {} + + void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, Confidence C) override { + S.Diag(IssueExpr->getExprLoc(), + C == Confidence::Definite + ? diag::warn_lifetime_safety_loan_expires_permissive + : diag::warn_lifetime_safety_loan_expires_strict) + << IssueExpr->getEndLoc(); + S.Diag(FreeLoc, diag::note_lifetime_safety_destroyed_here); + S.Diag(UseExpr->getExprLoc(), diag::note_lifetime_safety_used_here) + << UseExpr->getEndLoc(); + } + +private: + Sema &S; +}; +} // namespace +} // namespace clang::lifetimes + void clang::sema::AnalysisBasedWarnings::IssueWarnings( TranslationUnitDecl *TU) { if (!TU) @@ -3749,8 +3774,10 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // TODO: Enable lifetime safety analysis for other languages once it is // stable. if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { - if (AC.getCFG()) - lifetimes::runLifetimeSafetyAnalysis(AC); + if (AC.getCFG()) { + lifetimes::LifetimeSafetyReporterImpl LifetimeSafetyReporter(S); + lifetimes::runLifetimeSafetyAnalysis(AC, &LifetimeSafetyReporter); + } } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp new file mode 100644 index 0000000000000..660b9c9d5e243 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -0,0 +1,273 @@ +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety -verify %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor + MyObj operator+(MyObj); +}; + +//===----------------------------------------------------------------------===// +// Basic Definite Use-After-Free (-W...permissive) +// These are cases where the pointer is guaranteed to be dangling at the use site. +//===----------------------------------------------------------------------===// + +void definite_simple_case() { + MyObj* p; + { + MyObj s; + p = &s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void no_use_no_error() { + MyObj* p; + { + MyObj s; + p = &s; + } +} + +void definite_pointer_chain() { + MyObj* p; + MyObj* q; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + q = p; + } // expected-note {{destroyed here}} + (void)*q; // expected-note {{later used here}} +} + +void definite_multiple_uses_one_warning() { + MyObj* p; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + // No second warning for the same loan. + p->id = 1; + MyObj* q = p; + (void)*q; +} + +void definite_multiple_pointers() { + MyObj *p, *q, *r; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + q = &s; // expected-warning {{does not live long enough}} + r = &s; // expected-warning {{does not live long enough}} + } // expected-note 3 {{destroyed here}} + (void)*p; // expected-note {{later used here}} + (void)*q; // expected-note {{later used here}} + (void)*r; // expected-note {{later used here}} +} + +void definite_single_pointer_multiple_loans(bool cond) { + MyObj *p; + if (cond){ + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj t; + p = &t; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + + +//===----------------------------------------------------------------------===// +// Potential (Maybe) Use-After-Free (-W...strict) +// These are cases where the pointer *may* become dangling, depending on the path taken. +//===----------------------------------------------------------------------===// + +void potential_if_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +// If all paths lead to a dangle, it becomes a definite error. +void potential_becomes_definite(bool cond) { + MyObj* p; + if (cond) { + MyObj temp1; + p = &temp1; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj temp2; + p = &temp2; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void definite_potential_together(bool cond) { + MyObj safe; + MyObj* p_maybe = &safe; + MyObj* p_definite = nullptr; + + { + MyObj s; + p_definite = &s; // expected-warning {{does not live long enough}} + if (cond) { + p_maybe = &s; // expected-warning {{may not live long enough}} + } + } // expected-note 2 {{destroyed here}} + (void)*p_definite; // expected-note {{later used here}} + (void)*p_maybe; // expected-note {{later used here}} +} + +void definite_overrides_potential(bool cond) { + MyObj safe; + MyObj* p; + MyObj* q; + { + MyObj s; + q = &s; // expected-warning {{does not live long enough}} + p = q; + } // expected-note {{destroyed here}} + + if (cond) { + // 'q' is conditionally "rescued". 'p' is not. + q = &safe; + } + + // The use of 'p' is a definite error because it was never rescued. + (void)*q; + (void)*p; // expected-note {{later used here}} + (void)*q; +} + + +//===----------------------------------------------------------------------===// +// Control Flow Tests +//===----------------------------------------------------------------------===// + +void potential_for_loop_use_after_loop_body(MyObj safe) { + MyObj* p = &safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + p = &s; // expected-warning {{may not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void potential_for_loop_use_before_loop_body(MyObj safe) { + MyObj* p = &safe; + for (int i = 0; i < 1; ++i) { + (void)*p; // expected-note {{later used here}} + MyObj s; + p = &s; // expected-warning {{may not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; +} + +void potential_loop_with_break(bool cond) { + MyObj safe; + MyObj* p = &safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{may not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note {{later used here}} +} + +void potential_multiple_expiry_of_same_loan(bool cond) { + // Choose the last expiry location for the loan. + MyObj safe; + MyObj* p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) { + p = &unsafe; // expected-warning {{may not live long enough}} + break; + } + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) { + p = &unsafe; // expected-warning {{may not live long enough}} + if (cond) + break; + } + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj unsafe2; + p = &unsafe2; // expected-warning {{may not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note {{later used here}} +} + +void potential_switch(int mode) { + MyObj safe; + MyObj* p = &safe; + switch (mode) { + case 1: { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured may not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + p = &safe; // This path is okay. + break; + } + } + (void)*p; // expected-note {{later used here}} +} + +void definite_switch(int mode) { + MyObj safe; + MyObj* p = &safe; + // All cases are UaF --> Definite error. + switch (mode) { + case 1: { + MyObj temp1; + p = &temp1; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + MyObj temp2; + p = &temp2; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + default: { + MyObj temp2; + p = &temp2; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note 3 {{later used here}} +} + +//===----------------------------------------------------------------------===// +// No-Error Cases +//===----------------------------------------------------------------------===// +void no_error_if_dangle_then_rescue() { + MyObj safe; + MyObj* p; + { + MyObj temp; + p = &temp; // p is temporarily dangling. + } + p = &safe; // p is "rescued" before use. + (void)*p; // This is safe. +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 7cd679e184f6c..c8d88b4ea2277 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -33,7 +33,9 @@ class LifetimeTestRunner { )"; FullCode += Code.str(); - AST = std::make_unique(FullCode); + Inputs = TestInputs(FullCode); + Inputs.Language = TestLanguage::Lang_CXX20; + AST = std::make_unique(Inputs); ASTCtx = &AST->context(); // Find the target function using AST matchers. @@ -51,7 +53,7 @@ class LifetimeTestRunner { BuildOptions.AddTemporaryDtors = true; // Run the main analysis. - Analysis = std::make_unique(*AnalysisCtx); + Analysis = std::make_unique(*AnalysisCtx, nullptr); Analysis->run(); AnnotationToPointMap = Analysis->getTestPoints(); @@ -70,6 +72,7 @@ class LifetimeTestRunner { } private: + TestInputs Inputs; std::unique_ptr AST; ASTContext *ASTCtx = nullptr; std::unique_ptr AnalysisCtx; @@ -118,11 +121,13 @@ class LifetimeTestHelper { return Analysis.getLoansAtPoint(OID, PP); } - std::optional getExpiredLoansAtPoint(llvm::StringRef Annotation) { + std::optional> + getExpiredLoansAtPoint(llvm::StringRef Annotation) { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getExpiredLoansAtPoint(PP); + auto Expired = Analysis.getExpiredLoansAtPoint(PP); + return llvm::DenseSet{Expired.begin(), Expired.end()}; } private: From 5200cdbedb2e8230b9413b1e2b260b0bb272866e Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Mon, 18 Aug 2025 21:07:41 +0200 Subject: [PATCH 08/35] [LifetimeSafety] Enhance benchmark script for new sub analyses (#149577) Enhanced the lifetime safety analysis benchmark script with more detailed performance metrics and a new nested loop test case. This is a worst case for loan expiry analysis. ### What changed? - Added a new test case `nested_loops` that generates code with N levels of nested loops to test how analysis performance scales with loop nesting depth - Improved the trace file analysis to extract durations for sub-phases of the lifetime analysis (FactGenerator, LoanPropagation, ExpiredLoans) - Enhanced the markdown report generation to include: - Relative timing results as percentages of total Clang time - More detailed complexity analysis for each analysis phase Report # Lifetime Analysis Performance Report > Generated on: 2025-08-18 13:29:57 --- ## Test Case: Pointer Cycle in Loop **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 10 | 10.75 ms | 24.61% | 0.00% | 24.38% | 0.00% | | 25 | 64.98 ms | 86.08% | 0.00% | 86.02% | 0.00% | | 50 | 709.37 ms | 98.53% | 0.00% | 98.51% | 0.00% | | 75 | 3.13 s | 99.63% | 0.00% | 99.63% | 0.00% | | 100 | 9.44 s | 99.85% | 0.00% | 99.84% | 0.00% | | 150 | 45.31 s | 99.96% | 0.00% | 99.96% | 0.00% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.87 ± 0.01) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.87 ± 0.01) | | ExpiredLoans | (Negligible) | --- ## Test Case: CFG Merges **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 10 | 8.54 ms | 0.00% | 0.00% | 0.00% | 0.00% | | 50 | 40.85 ms | 65.09% | 0.00% | 64.61% | 0.00% | | 100 | 207.70 ms | 93.58% | 0.00% | 93.46% | 0.00% | | 200 | 1.54 s | 98.82% | 0.00% | 98.78% | 0.00% | | 400 | 12.04 s | 99.72% | 0.00% | 99.71% | 0.01% | | 800 | 96.73 s | 99.94% | 0.00% | 99.94% | 0.00% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.01 ± 0.00) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.01 ± 0.00) | | ExpiredLoans | (Negligible) | --- ## Test Case: Deeply Nested Loops **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 10 | 8.25 ms | 0.00% | 0.00% | 0.00% | 0.00% | | 50 | 27.25 ms | 51.87% | 0.00% | 45.71% | 5.93% | | 100 | 113.42 ms | 82.48% | 0.00% | 72.74% | 9.62% | | 200 | 730.05 ms | 95.24% | 0.00% | 83.95% | 11.25% | | 400 | 5.40 s | 98.74% | 0.01% | 87.05% | 11.68% | | 800 | 41.86 s | 99.62% | 0.00% | 87.77% | 11.84% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n2.97 ± 0.00) | | FactGenerator | (Negligible) | | LoanPropagation | O(n2.96 ± 0.00) | | ExpiredLoans | O(n2.97 ± 0.00) | --- --- .../test/Analysis/LifetimeSafety/benchmark.py | 227 +++++++++++++----- 1 file changed, 161 insertions(+), 66 deletions(-) diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py index 9d5f36c51b9ee..4421fe9a81e21 100644 --- a/clang/test/Analysis/LifetimeSafety/benchmark.py +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -99,28 +99,84 @@ def generate_cpp_merge_test(n: int) -> str: return cpp_code -def analyze_trace_file(trace_path: str) -> tuple[float, float]: +def generate_cpp_nested_loop_test(n: int) -> str: """ - Parses the -ftime-trace JSON output to find durations. + Generates C++ code with N levels of nested loops. + This pattern tests how analysis performance scales with loop nesting depth, + which is a key factor in the complexity of dataflow analyses on structured + control flow. - Returns: - A tuple of (lifetime_analysis_duration_us, total_clang_duration_us). + Example (n=3): + struct MyObj { int id; ~MyObj() {} }; + void nested_loops_3() { + MyObj* p = nullptr; + for(int i0=0; i0<2; ++i0) { + MyObj s0; + p = &s0; + for(int i1=0; i1<2; ++i1) { + MyObj s1; + p = &s1; + for(int i2=0; i2<2; ++i2) { + MyObj s2; + p = &s2; + } + } + } + } + """ + if n <= 0: + return "// Nesting depth must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void nested_loops_{n}() {{\n" + cpp_code += " MyObj* p = nullptr;\n" + + for i in range(n): + indent = " " * (i + 1) + cpp_code += f"{indent}for(int i{i}=0; i{i}<2; ++i{i}) {{\n" + cpp_code += f"{indent} MyObj s{i}; p = &s{i};\n" + + for i in range(n - 1, -1, -1): + indent = " " * (i + 1) + cpp_code += f"{indent}}}\n" + + cpp_code += "}\n" + cpp_code += f"\nint main() {{ nested_loops_{n}(); return 0; }}\n" + return cpp_code + + +def analyze_trace_file(trace_path: str) -> dict: """ - lifetime_duration = 0.0 - total_duration = 0.0 + Parses the -ftime-trace JSON output to find durations for the lifetime + analysis and its sub-phases. + Returns a dictionary of durations in microseconds. + """ + durations = { + "lifetime_us": 0.0, + "total_us": 0.0, + "fact_gen_us": 0.0, + "loan_prop_us": 0.0, + "expired_loans_us": 0.0, + } + event_name_map = { + "LifetimeSafetyAnalysis": "lifetime_us", + "ExecuteCompiler": "total_us", + "FactGenerator": "fact_gen_us", + "LoanPropagation": "loan_prop_us", + "ExpiredLoans": "expired_loans_us", + } try: with open(trace_path, "r") as f: trace_data = json.load(f) for event in trace_data.get("traceEvents", []): - if event.get("name") == "LifetimeSafetyAnalysis": - lifetime_duration += float(event.get("dur", 0)) - if event.get("name") == "ExecuteCompiler": - total_duration += float(event.get("dur", 0)) - + event_name = event.get("name") + if event_name in event_name_map: + key = event_name_map[event_name] + durations[key] += float(event.get("dur", 0)) except (IOError, json.JSONDecodeError) as e: print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) - return 0.0, 0.0 - return lifetime_duration, total_duration + return {key: 0.0 for key in durations} + return durations def power_law(n, c, k): @@ -135,8 +191,29 @@ def human_readable_time(ms: float) -> str: return f"{ms:.2f} ms" +def calculate_complexity(n_data, y_data) -> tuple[float | None, float | None]: + """ + Calculates the exponent 'k' for the power law fit y = c * n^k. + Returns a tuple of (k, k_standard_error). + """ + try: + if len(n_data) < 3 or np.all(y_data < 1e-6) or np.var(y_data) < 1e-6: + return None, None + + non_zero_indices = y_data > 0 + if np.sum(non_zero_indices) < 3: + return None, None + + n_fit, y_fit = n_data[non_zero_indices], y_data[non_zero_indices] + popt, pcov = curve_fit(power_law, n_fit, y_fit, p0=[0, 1], maxfev=5000) + k_stderr = np.sqrt(np.diag(pcov))[1] + return popt[1], k_stderr + except (RuntimeError, ValueError): + return None, None + + def generate_markdown_report(results: dict) -> str: - """Generates a Markdown-formatted report from the benchmark results.""" + """Generates a concise, Markdown-formatted report from the benchmark results.""" report = [] timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") report.append(f"# Lifetime Analysis Performance Report") @@ -146,54 +223,52 @@ def generate_markdown_report(results: dict) -> str: for test_name, data in results.items(): title = data["title"] report.append(f"## Test Case: {title}") - report.append("") + report.append("\n**Timing Results:**\n") # Table header - report.append("| N | Analysis Time | Total Clang Time |") - report.append("|:----|--------------:|-----------------:|") + report.append( + "| N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) |" + ) + report.append( + "|:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:|" + ) # Table rows n_data = np.array(data["n"]) - analysis_data = np.array(data["lifetime_ms"]) - total_data = np.array(data["total_ms"]) + total_ms_data = np.array(data["total_ms"]) for i in range(len(n_data)): - analysis_str = human_readable_time(analysis_data[i]) - total_str = human_readable_time(total_data[i]) - report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |") - - report.append("") - - # Complexity analysis - report.append(f"**Complexity Analysis:**") - try: - # Curve fitting requires at least 3 points - if len(n_data) < 3: - raise ValueError("Not enough data points to perform curve fitting.") - - popt, pcov = curve_fit( - power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000 - ) - _, k = popt - - # Confidence Interval for k - alpha = 0.05 # 95% confidence - dof = max(0, len(n_data) - len(popt)) # degrees of freedom - t_val = t.ppf(1.0 - alpha / 2.0, dof) - # Standard error of the parameters - perr = np.sqrt(np.diag(pcov)) - k_stderr = perr[1] - k_ci_lower = k - t_val * k_stderr - k_ci_upper = k + t_val * k_stderr - - report.append( - f"- The performance for this case scales approx. as **O(n{k:.2f})**." - ) - report.append( - f"- **95% Confidence interval for exponent:** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`." - ) + total_t = total_ms_data[i] + if total_t < 1e-6: + total_t = 1.0 # Avoid division by zero + + row = [ + f"| {n_data[i]:<14} |", + f"{human_readable_time(total_t):>10} |", + f"{data['lifetime_ms'][i] / total_t * 100:>17.2f}% |", + f"{data['fact_gen_ms'][i] / total_t * 100:>18.2f}% |", + f"{data['loan_prop_ms'][i] / total_t * 100:>20.2f}% |", + f"{data['expired_loans_ms'][i] / total_t * 100:>17.2f}% |", + ] + report.append(" ".join(row)) + + report.append("\n**Complexity Analysis:**\n") + report.append("| Analysis Phase | Complexity O(nk) |") + report.append("|:------------------|:--------------------------|") + + analysis_phases = { + "Total Analysis": data["lifetime_ms"], + "FactGenerator": data["fact_gen_ms"], + "LoanPropagation": data["loan_prop_ms"], + "ExpiredLoans": data["expired_loans_ms"], + } - except (RuntimeError, ValueError) as e: - report.append(f"- Could not determine a best-fit curve for the data: {e}") + for phase_name, y_data in analysis_phases.items(): + k, delta = calculate_complexity(n_data, np.array(y_data)) + if k is not None and delta is not None: + complexity_str = f"O(n{k:.2f} ± {delta:.2f})" + else: + complexity_str = "(Negligible)" + report.append(f"| {phase_name:<17} | {complexity_str:<25} |") report.append("\n---\n") @@ -202,7 +277,7 @@ def generate_markdown_report(results: dict) -> str: def run_single_test( clang_binary: str, output_dir: str, test_name: str, generator_func, n: int -) -> tuple[float, float]: +) -> dict: """Generates, compiles, and benchmarks a single test case.""" print(f"--- Running Test: {test_name.capitalize()} with N={n} ---") @@ -221,7 +296,8 @@ def run_single_test( "-o", "/dev/null", "-ftime-trace=" + trace_file, - "-Wexperimental-lifetime-safety", + "-Xclang", + "-fexperimental-lifetime-safety", "-std=c++17", source_file, ] @@ -231,11 +307,12 @@ def run_single_test( if result.returncode != 0: print(f"Compilation failed for N={n}!", file=sys.stderr) print(result.stderr, file=sys.stderr) - return 0.0, 0.0 + return {} - lifetime_us, total_us = analyze_trace_file(trace_file) - - return lifetime_us / 1000.0, total_us / 1000.0 + durations_us = analyze_trace_file(trace_file) + return { + key.replace("_us", "_ms"): value / 1000.0 for key, value in durations_us.items() + } if __name__ == "__main__": @@ -270,6 +347,12 @@ def run_single_test( "generator_func": generate_cpp_merge_test, "n_values": [10, 50, 100, 200, 400, 800], }, + { + "name": "nested_loops", + "title": "Deeply Nested Loops", + "generator_func": generate_cpp_nested_loop_test, + "n_values": [10, 50, 100, 200, 400, 800], + }, ] results = {} @@ -282,21 +365,28 @@ def run_single_test( "n": [], "lifetime_ms": [], "total_ms": [], + "fact_gen_ms": [], + "loan_prop_ms": [], + "expired_loans_ms": [], } for n in config["n_values"]: - lifetime_ms, total_ms = run_single_test( + durations_ms = run_single_test( args.clang_binary, args.output_dir, test_name, config["generator_func"], n, ) - if total_ms > 0: + if durations_ms: results[test_name]["n"].append(n) - results[test_name]["lifetime_ms"].append(lifetime_ms) - results[test_name]["total_ms"].append(total_ms) + for key, value in durations_ms.items(): + results[test_name][key].append(value) + print( - f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}" + f" Total Analysis: {human_readable_time(durations_ms['lifetime_ms'])} | " + f"FactGen: {human_readable_time(durations_ms['fact_gen_ms'])} | " + f"LoanProp: {human_readable_time(durations_ms['loan_prop_ms'])} | " + f"ExpiredLoans: {human_readable_time(durations_ms['expired_loans_ms'])}" ) print("\n\n" + "=" * 80) @@ -305,3 +395,8 @@ def run_single_test( markdown_report = generate_markdown_report(results) print(markdown_report) + + report_filename = os.path.join(args.output_dir, "performance_report.md") + with open(report_filename, "w") as f: + f.write(markdown_report) + print(f"Report saved to: {report_filename}") From c163bef871dcf3775e168dffe04a89aa1bb67f41 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 19 Aug 2025 14:06:10 +0200 Subject: [PATCH 09/35] [LifetimeSafety] Improve Origin information in debug output (#153951) The previous debug output only showed numeric IDs for origins, making it difficult to understand what each origin represented. This change makes the debug output more informative by showing what kind of entity each origin refers to (declaration or expression) and additional details like declaration names or expression class names. This improved output makes it easier to debug and understand the lifetime safety analysis. --- clang/lib/Analysis/LifetimeSafety.cpp | 48 ++++-- .../Sema/warn-lifetime-safety-dataflow.cpp | 153 +++++++++--------- 2 files changed, 111 insertions(+), 90 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index ba9f7d0f6ee36..c2e6dd74d0758 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -175,6 +175,18 @@ class OriginManager { return NewID; } + void dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; + } + private: OriginID getNextOriginID() { return NextOriginID++; } @@ -222,7 +234,7 @@ class Fact { return nullptr; } - virtual void dump(llvm::raw_ostream &OS) const { + virtual void dump(llvm::raw_ostream &OS, const OriginManager &) const { OS << "Fact (Kind: " << static_cast(K) << ")\n"; } }; @@ -237,9 +249,10 @@ class IssueFact : public Fact { IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} LoanID getLoanID() const { return LID; } OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS) const override { - OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() - << ")\n"; + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + OS << "Issue (LoanID: " << getLoanID() << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; } }; @@ -256,7 +269,7 @@ class ExpireFact : public Fact { LoanID getLoanID() const { return LID; } SourceLocation getExpiryLoc() const { return ExpiryLoc; } - void dump(llvm::raw_ostream &OS) const override { + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { OS << "Expire (LoanID: " << getLoanID() << ")\n"; } }; @@ -274,9 +287,12 @@ class AssignOriginFact : public Fact { : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} OriginID getDestOriginID() const { return OIDDest; } OriginID getSrcOriginID() const { return OIDSrc; } - void dump(llvm::raw_ostream &OS) const override { - OS << "AssignOrigin (DestID: " << getDestOriginID() - << ", SrcID: " << getSrcOriginID() << ")\n"; + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + OS << "AssignOrigin (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << ")\n"; } }; @@ -290,8 +306,10 @@ class ReturnOfOriginFact : public Fact { ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS) const override { - OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; } }; @@ -308,8 +326,10 @@ class UseFact : public Fact { OriginID getUsedOrigin() const { return UsedOrigin; } const Expr *getUseExpr() const { return UseExpr; } - void dump(llvm::raw_ostream &OS) const override { - OS << "Use (OriginID: " << UsedOrigin << ")\n"; + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + OS << "Use ("; + OM.dump(getUsedOrigin(), OS); + OS << ")\n"; } }; @@ -326,7 +346,7 @@ class TestPointFact : public Fact { StringRef getAnnotation() const { return Annotation; } - void dump(llvm::raw_ostream &OS) const override { + void dump(llvm::raw_ostream &OS, const OriginManager &) const override { OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; } }; @@ -365,7 +385,7 @@ class FactManager { if (It != BlockToFactsMap.end()) { for (const Fact *F : It->second) { llvm::dbgs() << " "; - F->dump(llvm::dbgs()); + F->dump(llvm::dbgs(), OriginMgr); } } llvm::dbgs() << " End of Block\n"; diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 0eb3bda918f82..e180c7ddc8280 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -12,11 +12,11 @@ MyObj* return_local_addr() { MyObj x {10}; MyObj* p = &x; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]]) +// CHECK: Issue (LoanID: [[L_X:[0-9]+]], ToOrigin: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) return p; -// CHECK: AssignOrigin (DestID: [[O_RET_VAL:[0-9]+]], SrcID: [[O_P]]) -// CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) +// CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) // CHECK: Expire (LoanID: [[L_X]]) } @@ -27,20 +27,20 @@ MyObj* return_local_addr() { MyObj* assign_and_return_local_addr() { MyObj y{20}; MyObj* ptr1 = &y; -// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]]) +// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], ToOrigin: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) MyObj* ptr2 = ptr1; -// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL:[0-9]+]], SrcID: [[O_PTR1]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL]]) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) ptr2 = ptr1; -// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]]) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) ptr2 = ptr2; // Self assignment. -// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL:[0-9]+]], SrcID: [[O_PTR2]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL]]) +// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) return ptr2; -// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]]) -// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) +// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: ReturnOfOrigin ([[O_PTR2_RVAL_2]] (Expr: ImplicitCastExpr)) // CHECK: Expire (LoanID: [[L_Y]]) } @@ -60,8 +60,8 @@ int return_int_val() { void loan_expires_cpp() { MyObj obj{1}; MyObj* pObj = &obj; -// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) +// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_POBJ:[0-9]+]] (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) // CHECK: Expire (LoanID: [[L_OBJ]]) } @@ -72,8 +72,8 @@ void loan_expires_cpp() { void loan_expires_trivial() { int trivial_obj = 1; int* pTrivialObj = &trivial_obj; -// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]]) +// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_PTOBJ:[0-9]+]] (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) // CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. @@ -87,15 +87,15 @@ void conditional(bool condition) { if (condition) p = &a; - // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) +// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) else p = &b; - // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) +// CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) int *q = p; - // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) - // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) +// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) } @@ -109,12 +109,12 @@ void pointers_in_a_cycle(bool condition) { MyObj* p2 = &v2; MyObj* p3 = &v3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]]) -// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]]) -// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]]) +// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], ToOrigin: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) +// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], ToOrigin: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) +// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], ToOrigin: [[O_ADDR_V3:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) while (condition) { MyObj* temp = p1; @@ -122,14 +122,14 @@ void pointers_in_a_cycle(bool condition) { p2 = p3; p3 = temp; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]]) -// CHECK: AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]]) -// CHECK: AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]]) -// CHECK: AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]]) -// CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]]) +// CHECK: AssignOrigin (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) +// CHECK: AssignOrigin (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: AssignOrigin (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) +// CHECK: AssignOrigin (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: AssignOrigin (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) +// CHECK: AssignOrigin (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: AssignOrigin (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) +// CHECK: AssignOrigin (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) } } @@ -139,11 +139,11 @@ void overwrite_origin() { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = &s2; -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } @@ -153,10 +153,11 @@ void reassign_to_null() { MyObj s1; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = nullptr; -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) // CHECK: Expire (LoanID: [[L_S1]]) } // FIXME: Have a better representation for nullptr than just an empty origin. @@ -169,13 +170,13 @@ void reassign_in_if(bool condition) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) if (condition) { p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) } // CHECK: Block B{{[0-9]+}}: // CHECK: Expire (LoanID: [[L_S2]]) @@ -190,26 +191,26 @@ void assign_in_switch(int mode) { MyObj s3; MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) +// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) switch (mode) { case 1: p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) break; case 2: p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; default: p = &s3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]]) +// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], ToOrigin: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) break; } // CHECK: Block B{{[0-9]+}}: @@ -221,14 +222,14 @@ void assign_in_switch(int mode) { // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { MyObj* p = nullptr; - // CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) + // CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) + // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) while (condition) { MyObj inner; p = &inner; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) // CHECK: Expire (LoanID: [[L_INNER]]) } } @@ -239,14 +240,14 @@ void loop_with_break(int count) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) for (int i = 0; i < count; ++i) { if (i == 5) { p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; } } @@ -259,18 +260,18 @@ void loop_with_break(int count) { void nested_scopes() { MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) +// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) { MyObj outer; p = &outer; -// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], OriginID: [[O_ADDR_OUTER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_OUTER]]) +// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], ToOrigin: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) { MyObj inner; p = &inner; -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) } // CHECK: Expire (LoanID: [[L_INNER]]) } @@ -282,13 +283,13 @@ void pointer_indirection() { int a; int *p = &a; // CHECK: Block B1: -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) +// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) int **pp = &p; -// CHECK: Issue (LoanID: [[L_P:[0-9]+]], OriginID: [[O_ADDR_P:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PP:[0-9]+]], SrcID: [[O_ADDR_P]]) +// CHECK: Issue (LoanID: [[L_P:[0-g]+]], ToOrigin: [[O_ADDR_P:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_PP:[0-9]+]] (Decl: pp), Src: [[O_ADDR_P]] (Expr: UnaryOperator)) // FIXME: The Origin for the RHS is broken int *q = *pp; -// CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: {{[0-9]+}}) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) } From 2f37c67bc733fbe3f67cf43e3e1b385b23579e8b Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 3 Sep 2025 16:30:58 +0200 Subject: [PATCH 10/35] [LifetimeSafety] Fix duplicate loan generation for ImplicitCastExpr (#153661) This PR fixes a bug in the lifetime safety analysis where `ImplicitCastExpr` nodes were causing duplicate loan generation. The changes: 1. Remove the recursive `Visit(ICE->getSubExpr())` call in `VisitImplicitCastExpr` to prevent duplicate processing of the same expression 2. Ensure the CFG build options are properly configured for lifetime safety analysis by moving the flag check earlier 3. Enhance the unit test infrastructure to properly handle multiple loans per variable 4. Add a test case that verifies implicit casts to const don't create duplicate loans 5. Add a test case for ternary operators with a FIXME note about origin propagation These changes prevent the analysis from generating duplicate loans when expressions are wrapped in implicit casts, which improves the accuracy of the lifetime safety analysis. --- clang/lib/Analysis/LifetimeSafety.cpp | 1 - clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 +-- .../Sema/warn-lifetime-safety-dataflow.cpp | 19 +++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 53 +++++++++++-------- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index c2e6dd74d0758..3c9ac9c48b2e5 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -445,7 +445,6 @@ class FactGenerator : public ConstStmtVisitor { void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { if (!hasOrigin(ICE->getType())) return; - Visit(ICE->getSubExpr()); // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). // TODO: Consider if this is actually useful in practice. Alternatively, we diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 7c52f784e1389..7fd88a9ca73d6 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3625,6 +3625,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( AC.getCFGBuildOptions().AddCXXNewAllocator = false; AC.getCFGBuildOptions().AddCXXDefaultInitExprInCtors = true; + bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety; + // Force that certain expressions appear as CFGElements in the CFG. This // is used to speed up various analyses. // FIXME: This isn't the right factoring. This is here for initial @@ -3632,11 +3634,10 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // expect to always be CFGElements and then fill in the BuildOptions // appropriately. This is essentially a layering violation. if (P.enableCheckUnreachable || P.enableThreadSafetyAnalysis || - P.enableConsumedAnalysis) { + P.enableConsumedAnalysis || EnableLifetimeSafetyAnalysis) { // Unreachable code analysis and thread safety require a linearized CFG. AC.getCFGBuildOptions().setAllAlwaysAdd(); - } - else { + } else { AC.getCFGBuildOptions() .setAlwaysAdd(Stmt::BinaryOperatorClass) .setAlwaysAdd(Stmt::CompoundAssignOperatorClass) @@ -3647,7 +3648,6 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( .setAlwaysAdd(Stmt::UnaryOperatorClass); } - bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety; // Install the logical handler. std::optional LEH; if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index e180c7ddc8280..034c79fe7c01c 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -293,3 +293,22 @@ void pointer_indirection() { int *q = *pp; // CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) } + +// CHECK-LABEL: Function: ternary_operator +// FIXME: Propagate origins across ConditionalOperator. +void ternary_operator() { + int a, b; + int *p; + p = (a > b) ? &a : &b; + // CHECK: Block B2: + // CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) + // CHECK: End of Block + + // CHECK: Block B3: + // CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) + // CHECK: End of Block + + // CHECK: Block B1: + // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) + // CHECK: End of Block +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index c8d88b4ea2277..13e5832d70050 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -11,6 +11,7 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -20,6 +21,7 @@ namespace clang::lifetimes::internal { namespace { using namespace ast_matchers; +using ::testing::SizeIs; using ::testing::UnorderedElementsAreArray; // A helper class to run the full lifetime analysis on a piece of code @@ -96,21 +98,18 @@ class LifetimeTestHelper { return OID; } - std::optional getLoanForVar(llvm::StringRef VarName) { + std::vector getLoansForVar(llvm::StringRef VarName) { auto *VD = findDecl(VarName); - if (!VD) - return std::nullopt; + if (!VD) { + ADD_FAILURE() << "No VarDecl found for '" << VarName << "'"; + return {}; + } std::vector LID = Analysis.getLoanIDForVar(VD); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; - return std::nullopt; - } - // TODO: Support retrieving more than one loans to a var. - if (LID.size() > 1) { - ADD_FAILURE() << "More than 1 loans found for '" << VarName; - return std::nullopt; + return {}; } - return LID[0]; + return LID; } std::optional getLoansAtPoint(OriginID OID, @@ -121,13 +120,12 @@ class LifetimeTestHelper { return Analysis.getLoansAtPoint(OID, PP); } - std::optional> + std::optional> getExpiredLoansAtPoint(llvm::StringRef Annotation) { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - auto Expired = Analysis.getExpiredLoansAtPoint(PP); - return llvm::DenseSet{Expired.begin(), Expired.end()}; + return Analysis.getExpiredLoansAtPoint(PP); } private: @@ -197,12 +195,13 @@ MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { std::vector ExpectedLoans; for (const auto &LoanVar : LoanVars) { - std::optional ExpectedLIDOpt = Info.Helper.getLoanForVar(LoanVar); - if (!ExpectedLIDOpt) { + std::vector ExpectedLIDs = Info.Helper.getLoansForVar(LoanVar); + if (ExpectedLIDs.empty()) { *result_listener << "could not find loan for var '" << LoanVar << "'"; return false; } - ExpectedLoans.push_back(*ExpectedLIDOpt); + ExpectedLoans.insert(ExpectedLoans.end(), ExpectedLIDs.begin(), + ExpectedLIDs.end()); } return ExplainMatchResult(UnorderedElementsAreArray(ExpectedLoans), @@ -221,17 +220,17 @@ MATCHER_P(AreExpiredAt, Annotation, "") { << Annotation << "'"; return false; } - std::vector ActualExpiredLoans(ActualExpiredSetOpt->begin(), - ActualExpiredSetOpt->end()); + std::vector ActualExpiredLoans = *ActualExpiredSetOpt; std::vector ExpectedExpiredLoans; for (const auto &VarName : Info.LoanVars) { - auto LoanIDOpt = Helper.getLoanForVar(VarName); - if (!LoanIDOpt) { + auto LoanIDs = Helper.getLoansForVar(VarName); + if (LoanIDs.empty()) { *result_listener << "could not find a loan for variable '" << VarName << "'"; return false; } - ExpectedExpiredLoans.push_back(*LoanIDOpt); + ExpectedExpiredLoans.insert(ExpectedExpiredLoans.end(), LoanIDs.begin(), + LoanIDs.end()); } return ExplainMatchResult(UnorderedElementsAreArray(ExpectedExpiredLoans), ActualExpiredLoans, result_listener); @@ -730,5 +729,17 @@ TEST_F(LifetimeAnalysisTest, ReassignedPointerThenOriginalExpires) { EXPECT_THAT(LoansTo({"s1", "s2"}), AreExpiredAt("p_after_s1_expires")); } +TEST_F(LifetimeAnalysisTest, NoDuplicateLoansForImplicitCastToConst) { + SetupTest(R"( + void target() { + MyObj a; + const MyObj* p = &a; + const MyObj* q = &a; + POINT(at_end); + } + )"); + EXPECT_THAT(Helper->getLoansForVar("a"), SizeIs(2)); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From f6ffb7d684fecbfb54ea31f70cf013b68fed67e1 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Fri, 5 Sep 2025 22:17:00 +0200 Subject: [PATCH 11/35] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (#154316) Instead of identifying various forms of pointer usage (like dereferencing, member access, or function calls) individually, this new approach simplifies the logic by treating all `DeclRefExpr`s as uses of their underlying origin When a `DeclRefExpr` appears on the left-hand side of an assignment, the corresponding `UseFact` is marked as a "write" operation. These write operations are then exempted from use-after-free checks. --- clang/lib/Analysis/LifetimeSafety.cpp | 89 +++++++++++++------ .../Sema/warn-lifetime-safety-dataflow.cpp | 26 ++++++ 2 files changed, 89 insertions(+), 26 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 3c9ac9c48b2e5..dbbf7f3cc14b1 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -118,6 +118,7 @@ class OriginManager { return AllOrigins.back(); } + // TODO: Mark this method as const once we remove the call to getOrCreate. OriginID get(const Expr &E) { // Origin of DeclRefExpr is that of the declaration it refers to. if (const auto *DRE = dyn_cast(&E)) @@ -314,22 +315,28 @@ class ReturnOfOriginFact : public Fact { }; class UseFact : public Fact { - OriginID UsedOrigin; const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; public: static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - UseFact(OriginID UsedOrigin, const Expr *UseExpr) - : Fact(Kind::Use), UsedOrigin(UsedOrigin), UseExpr(UseExpr) {} + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - OriginID getUsedOrigin() const { return UsedOrigin; } + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast(OM).get(*UseExpr); + } const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { OS << "Use ("; - OM.dump(getUsedOrigin(), OS); - OS << ")\n"; + OM.dump(getUsedOrigin(OM), OS); + OS << " " << (isWritten() ? "Write" : "Read") << ")\n"; } }; @@ -436,6 +443,8 @@ class FactGenerator : public ConstStmtVisitor { addAssignOriginFact(*VD, *InitExpr); } + void VisitDeclRefExpr(const DeclRefExpr *DRE) { handleUse(DRE); } + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized /// pointers can use the same type of loan. @@ -469,10 +478,6 @@ class FactGenerator : public ConstStmtVisitor { } } } - } else if (UO->getOpcode() == UO_Deref) { - // This is a pointer use, like '*p'. - OriginID OID = FactMgr.getOriginMgr().get(*UO->getSubExpr()); - CurrentBlockFacts.push_back(FactMgr.createFact(OID, UO)); } } @@ -487,20 +492,13 @@ class FactGenerator : public ConstStmtVisitor { } void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) { - const Expr *LHSExpr = BO->getLHS(); - const Expr *RHSExpr = BO->getRHS(); - - // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` - // LHS must be a pointer/reference type that can be an origin. - // RHS must also represent an origin (either another pointer/ref or an - // address-of). - if (const auto *DRE_LHS = dyn_cast(LHSExpr)) - if (const auto *VD_LHS = - dyn_cast(DRE_LHS->getDecl()->getCanonicalDecl()); - VD_LHS && hasOrigin(VD_LHS->getType())) - addAssignOriginFact(*VD_LHS, *RHSExpr); - } + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) + handleAssignment(OCE->getArg(0), OCE->getArg(1)); } void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { @@ -567,9 +565,47 @@ class FactGenerator : public ConstStmtVisitor { return false; } + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) + if (hasOrigin(LHSExpr->getType())) + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` + // LHS must be a pointer/reference type that can be an origin. + // RHS must also represent an origin (either another pointer/ref or an + // address-of). + addAssignOriginFact(*VD_LHS, *RHSExpr); + } + } + + // A DeclRefExpr is a use of the referenced decl. It is checked for + // use-after-free unless it is being written to (e.g. on the left-hand side + // of an assignment). + void handleUse(const DeclRefExpr *DRE) { + if (hasOrigin(DRE->getType())) { + UseFact *UF = FactMgr.createFact(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } + } + + void markUseAsWrite(const DeclRefExpr *DRE) { + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); + } + FactManager &FactMgr; AnalysisDeclContext &AC; llvm::SmallVector CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap UseFacts; }; // ========================================================================= // @@ -1032,8 +1068,9 @@ class LifetimeChecker { /// graph. It determines if the loans held by the used origin have expired /// at the point of use. void checkUse(const UseFact *UF) { - - OriginID O = UF->getUsedOrigin(); + if (UF->isWritten()) + return; + OriginID O = UF->getUsedOrigin(FactMgr.getOriginMgr()); // Get the set of loans that the origin might hold at this program point. LoanSet HeldLoans = LoanPropagation.getLoans(O, UF); diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 034c79fe7c01c..42042b44549a6 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -312,3 +312,29 @@ void ternary_operator() { // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) // CHECK: End of Block } + +// CHECK-LABEL: Function: test_use_facts +void usePointer(MyObj*); +void test_use_facts() { + // CHECK: Block B{{[0-9]+}}: + MyObj x; + MyObj *p; + p = &x; + // CHECK: Use ([[O_P:[0-9]+]] (Decl: p) Write) + (void)*p; + // CHECK: Use ([[O_P]] (Decl: p) Read) + usePointer(p); + // CHECK: Use ([[O_P]] (Decl: p) Read) + p->id = 1; + // CHECK: Use ([[O_P]] (Decl: p) Read) + + + MyObj* q; + q = p; + // CHECK: Use ([[O_P]] (Decl: p) Read) + // CHECK: Use ([[O_Q:[0-9]+]] (Decl: q) Write) + usePointer(q); + // CHECK: Use ([[O_Q]] (Decl: q) Read) + q->id = 2; + // CHECK: Use ([[O_Q]] (Decl: q) Read) +} From d3b3a7c5e6e1d1640c1e613cc3530534b386c462 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 10 Sep 2025 12:00:04 +0200 Subject: [PATCH 12/35] [LifetimeSafety] Add PR labeler automation (#157820) This will add label `clang:temporal-safety` to PRs touching the mentioned files. --- .github/new-prs-labeler.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index b05e9c6c56ed0..f38d27a85d39a 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1082,6 +1082,14 @@ clang:openmp: - llvm/unittests/Frontend/OpenMP* - llvm/test/Transforms/OpenMP/** +clang:temporal-safety: + - clang/include/clang/Analysis/Analyses/LifetimeSafety* + - clang/lib/Analysis/LifetimeSafety* + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** + clang:as-a-library: - clang/tools/libclang/** - clang/bindings/** From 815ff9b2b40279c580bd6ad3ed75b18e2f3cdd9f Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 11 Sep 2025 15:52:30 +0200 Subject: [PATCH 13/35] [LifetimeSafety] Associate origins to all l-valued expressions (#156896) This patch refactors the C++ lifetime safety analysis to implement a more consistent model for tracking borrows. The central idea is to make loan creation a consequence of referencing a variable, while making loan propagation dependent on the type's semantics. This change introduces a more uniform model for tracking borrows from non-pointer types: * Centralised Loan Creation: A Loan is now created for every `DeclRefExpr` that refers to a **non-pointer type** (e.g., `std::string`, `int`). This correctly models that any use of an **gl-value** is a borrow of its storage, replacing the previous heuristic-based loan creation. * The address-of operator (&) no longer creates loans. Instead, it propagates the origin (and thus the loans) of its sub-expression. This is guarded to exclude expressions that are already pointer types, deferring the complexity of pointers-to-pointers. **Future Work: Multi-Origin Model** This patch deliberately defers support for creating loans on references to pointer-type expressions (e.g., `&my_pointer`). The current single-origin model is unable to distinguish between a loan to the pointer variable itself (its storage) and a loan to the object it points to. The future plan is to move to a multi-origin model where a type has a "list of origins" governed by its level of indirection, which will allow the analysis to track these distinct lifetimes separately. Once this more advanced model is in place, the restriction can be lifted, and all `DeclRefExpr` nodes, regardless of type, can uniformly create a loan, making the analysis consistent. --- clang/lib/Analysis/LifetimeSafety.cpp | 178 +++++++----- .../Sema/warn-lifetime-safety-dataflow.cpp | 260 +++++++++++------- 2 files changed, 269 insertions(+), 169 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index dbbf7f3cc14b1..e687e5419c50a 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -50,6 +50,11 @@ struct Loan { Loan(LoanID id, AccessPath path, const Expr *IssueExpr) : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; + } }; /// An Origin is a symbolic identifier that represents the set of possible @@ -120,17 +125,19 @@ class OriginManager { // TODO: Mark this method as const once we remove the call to getOrCreate. OriginID get(const Expr &E) { - // Origin of DeclRefExpr is that of the declaration it refers to. + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. if (const auto *DRE = dyn_cast(&E)) return get(*DRE->getDecl()); - auto It = ExprToOriginID.find(&E); // TODO: This should be an assert(It != ExprToOriginID.end()). The current // implementation falls back to getOrCreate to avoid crashing on // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == ExprToOriginID.end()) - return getOrCreate(E); - - return It->second; + return getOrCreate(E); } OriginID get(const ValueDecl &D) { @@ -149,10 +156,6 @@ class OriginManager { if (It != ExprToOriginID.end()) return It->second; - if (const auto *DRE = dyn_cast(&E)) { - // Origin of DeclRefExpr is that of the declaration it refers to. - return getOrCreate(*DRE->getDecl()); - } OriginID NewID = getNextOriginID(); addOrigin(NewID, E); ExprToOriginID[&E] = NewID; @@ -235,7 +238,8 @@ class Fact { return nullptr; } - virtual void dump(llvm::raw_ostream &OS, const OriginManager &) const { + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { OS << "Fact (Kind: " << static_cast(K) << ")\n"; } }; @@ -250,8 +254,11 @@ class IssueFact : public Fact { IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} LoanID getLoanID() const { return LID; } OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { - OS << "Issue (LoanID: " << getLoanID() << ", ToOrigin: "; + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; OM.dump(getOriginID(), OS); OS << ")\n"; } @@ -270,8 +277,11 @@ class ExpireFact : public Fact { LoanID getLoanID() const { return LID; } SourceLocation getExpiryLoc() const { return ExpiryLoc; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { - OS << "Expire (LoanID: " << getLoanID() << ")\n"; + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; } }; @@ -288,7 +298,8 @@ class AssignOriginFact : public Fact { : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} OriginID getDestOriginID() const { return OIDDest; } OriginID getSrcOriginID() const { return OIDSrc; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "AssignOrigin (Dest: "; OM.dump(getDestOriginID(), OS); OS << ", Src: "; @@ -307,7 +318,8 @@ class ReturnOfOriginFact : public Fact { ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "ReturnOfOrigin ("; OM.dump(getReturnedOriginID(), OS); OS << ")\n"; @@ -333,10 +345,11 @@ class UseFact : public Fact { void markAsWritten() { IsWritten = true; } bool isWritten() const { return IsWritten; } - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { OS << "Use ("; OM.dump(getUsedOrigin(OM), OS); - OS << " " << (isWritten() ? "Write" : "Read") << ")\n"; + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; } }; @@ -353,7 +366,8 @@ class TestPointFact : public Fact { StringRef getAnnotation() const { return Annotation; } - void dump(llvm::raw_ostream &OS, const OriginManager &) const override { + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override { OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; } }; @@ -392,7 +406,7 @@ class FactManager { if (It != BlockToFactsMap.end()) { for (const Fact *F : It->second) { llvm::dbgs() << " "; - F->dump(llvm::dbgs(), OriginMgr); + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); } } llvm::dbgs() << " End of Block\n"; @@ -438,12 +452,31 @@ class FactGenerator : public ConstStmtVisitor { void VisitDeclStmt(const DeclStmt *DS) { for (const Decl *D : DS->decls()) if (const auto *VD = dyn_cast(D)) - if (hasOrigin(VD->getType())) + if (hasOrigin(VD)) if (const Expr *InitExpr = VD->getInit()) addAssignOriginFact(*VD, *InitExpr); } - void VisitDeclRefExpr(const DeclRefExpr *DRE) { handleUse(DRE); } + void VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact(L->ID, ExprOID)); + } + } + } void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized @@ -452,38 +485,31 @@ class FactGenerator : public ConstStmtVisitor { } void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE->getType())) + if (!hasOrigin(ICE)) return; // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). - // TODO: Consider if this is actually useful in practice. Alternatively, we - // could directly use the sub-expression's OriginID instead of creating a - // new one. addAssignOriginFact(*ICE, *ICE->getSubExpr()); } void VisitUnaryOperator(const UnaryOperator *UO) { if (UO->getOpcode() == UO_AddrOf) { const Expr *SubExpr = UO->getSubExpr(); - if (const auto *DRE = dyn_cast(SubExpr)) { - if (const auto *VD = dyn_cast(DRE->getDecl())) { - // Check if it's a local variable. - if (VD->hasLocalStorage()) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); - AccessPath AddrOfLocalVarPath(VD); - const Loan &L = - FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, UO); - CurrentBlockFacts.push_back( - FactMgr.createFact(L.ID, OID)); - } - } - } + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + addAssignOriginFact(*UO, *SubExpr); } } void VisitReturnStmt(const ReturnStmt *RS) { if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr->getType())) { + if (hasOrigin(RetExpr)) { OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); CurrentBlockFacts.push_back( FactMgr.createFact(OID)); @@ -506,20 +532,6 @@ class FactGenerator : public ConstStmtVisitor { // expression. if (VisitTestPoint(FCE)) return; - // Visit as normal otherwise. - Base::VisitCXXFunctionalCastExpr(FCE); - } - -private: - // Check if a type has an origin. - bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } - - template - void addAssignOriginFact(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID)); } void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { @@ -544,6 +556,41 @@ class FactGenerator : public ConstStmtVisitor { } } +private: + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType(); + } + + // Check if a type has an origin. + static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); + } + + static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); + } + + /// Creates a loan for the storage path of a given declaration reference. + /// This function should be called whenever a DeclRefExpr represents a borrow. + /// \param DRE The declaration reference expression that initiates the borrow. + /// \return The new Loan on success, nullptr otherwise. + const Loan *createLoan(const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; + } + + template + void addAssignOriginFact(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID)); + } + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. /// If so, creates a `TestPointFact` and returns true. bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { @@ -566,25 +613,26 @@ class FactGenerator : public ConstStmtVisitor { } void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; // Find the underlying variable declaration for the left-hand side. if (const auto *DRE_LHS = dyn_cast(LHSExpr->IgnoreParenImpCasts())) { markUseAsWrite(DRE_LHS); if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) - if (hasOrigin(LHSExpr->getType())) - // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` - // LHS must be a pointer/reference type that can be an origin. - // RHS must also represent an origin (either another pointer/ref or an - // address-of). - addAssignOriginFact(*VD_LHS, *RHSExpr); + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`. + // LHS must be a pointer/reference type that can be an origin. RHS must + // also represent an origin (either another pointer/ref or an + // address-of). + addAssignOriginFact(*VD_LHS, *RHSExpr); } } - // A DeclRefExpr is a use of the referenced decl. It is checked for - // use-after-free unless it is being written to (e.g. on the left-hand side - // of an assignment). + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). void handleUse(const DeclRefExpr *DRE) { - if (hasOrigin(DRE->getType())) { + if (isPointerType(DRE->getType())) { UseFact *UF = FactMgr.createFact(DRE); CurrentBlockFacts.push_back(UF); assert(!UseFacts.contains(DRE)); diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 42042b44549a6..805dd305046e4 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -10,45 +10,55 @@ struct MyObj { // CHECK-LABEL: Function: return_local_addr MyObj* return_local_addr() { MyObj x {10}; - MyObj* p = &x; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_X:[0-9]+]], ToOrigin: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) + MyObj* p = &x; // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) return p; +// CHECK: Use ([[O_P]] (Decl: p), Read) // CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) // CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_X]]) +// CHECK: Expire ([[L_X]] (Path: x)) } // Pointer Assignment and Return // CHECK-LABEL: Function: assign_and_return_local_addr -// CHECK-NEXT: Block B{{[0-9]+}}: MyObj* assign_and_return_local_addr() { MyObj y{20}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) MyObj* ptr1 = &y; -// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], ToOrigin: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) +// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) MyObj* ptr2 = ptr1; -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) -// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) ptr2 = ptr1; -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: Use ({{[0-9]+}} (Decl: ptr2), Write) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) ptr2 = ptr2; // Self assignment. -// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Write) +// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) return ptr2; -// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) -// CHECK: ReturnOfOrigin ([[O_PTR2_RVAL_2]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_Y]]) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) +// CHECK: Expire ([[L_Y]] (Path: y)) } // Return of Non-Pointer Type // CHECK-LABEL: Function: return_int_val -// CHECK-NEXT: Block B{{[0-9]+}}: int return_int_val() { int x = 10; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: {{[0-9]+}} (Expr: DeclRefExpr)) return x; } // CHECK-NEXT: End of Block @@ -56,25 +66,27 @@ int return_int_val() { // Loan Expiration (Automatic Variable, C++) // CHECK-LABEL: Function: loan_expires_cpp -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_cpp() { MyObj obj{1}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_OBJ:[0-9]+]] (Path: obj), ToOrigin: [[O_DRE_OBJ:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OBJ]] (Expr: DeclRefExpr)) MyObj* pObj = &obj; -// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_POBJ:[0-9]+]] (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_OBJ]]) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_OBJ]] (Path: obj)) } // FIXME: No expire for Trivial Destructors // CHECK-LABEL: Function: loan_expires_trivial -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_trivial() { int trivial_obj = 1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj), ToOrigin: [[O_DRE_TRIVIAL:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) int* pTrivialObj = &trivial_obj; -// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], ToOrigin: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PTOBJ:[0-9]+]] (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) -// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) +// CHECK-NOT: Expire // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. } @@ -86,16 +98,22 @@ void conditional(bool condition) { int* p = nullptr; if (condition) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) p = &a; -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) else +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_B]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) p = &b; -// CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) +// CHECK: Block B{{[0-9]+}}: int *q = p; -// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) -// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) } @@ -109,26 +127,36 @@ void pointers_in_a_cycle(bool condition) { MyObj* p2 = &v2; MyObj* p3 = &v3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], ToOrigin: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V1:[0-9]+]] (Path: v1), ToOrigin: [[O_DRE_V1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) -// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], ToOrigin: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V2:[0-9]+]] (Path: v2), ToOrigin: [[O_DRE_V2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V2]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) -// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], ToOrigin: [[O_ADDR_V3:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V3:[0-9]+]] (Path: v3), ToOrigin: [[O_DRE_V3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_V3:[0-g]+]] (Expr: UnaryOperator), Src: [[O_DRE_V3]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) while (condition) { - MyObj* temp = p1; - p1 = p2; - p2 = p3; - p3 = temp; // CHECK: Block B{{[0-9]+}}: + MyObj* temp = p1; +// CHECK: Use ([[O_P1]] (Decl: p1), Read) // CHECK: AssignOrigin (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) // CHECK: AssignOrigin (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) + p1 = p2; +// CHECK: Use ([[O_P2:[0-9]+]] (Decl: p2), Read) // CHECK: AssignOrigin (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) +// CHECK: Use ({{[0-9]+}} (Decl: p1), Write) // CHECK: AssignOrigin (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) + p2 = p3; +// CHECK: Use ([[O_P3:[0-9]+]] (Decl: p3), Read) // CHECK: AssignOrigin (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) +// CHECK: Use ({{[0-9]+}} (Decl: p2), Write) // CHECK: AssignOrigin (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) + p3 = temp; +// CHECK: Use ([[O_TEMP]] (Decl: temp), Read) // CHECK: AssignOrigin (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) +// CHECK: Use ({{[0-9]+}} (Decl: p3), Write) // CHECK: AssignOrigin (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) } } @@ -137,28 +165,33 @@ void pointers_in_a_cycle(bool condition) { void overwrite_origin() { MyObj s1; MyObj s2; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = &s2; -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: reassign_to_null void reassign_to_null() { MyObj s1; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = nullptr; // CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. @@ -170,17 +203,20 @@ void reassign_in_if(bool condition) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) if (condition) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } @@ -195,42 +231,51 @@ void assign_in_switch(int mode) { // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) switch (mode) { case 1: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s1; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK-DAG: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) break; case 2: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s2; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK-DAG: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; default: - p = &s3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], ToOrigin: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator)) + p = &s3; +// CHECK: Issue ([[L_S3:[0-9]+]] (Path: s3), ToOrigin: [[O_DRE_S3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S3]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) break; } // CHECK: Block B{{[0-9]+}}: -// CHECK-DAG: Expire (LoanID: [[L_S3]]) -// CHECK-DAG: Expire (LoanID: [[L_S2]]) -// CHECK-DAG: Expire (LoanID: [[L_S1]]) +// CHECK-DAG: Expire ([[L_S3]] (Path: s3)) +// CHECK-DAG: Expire ([[L_S2]] (Path: s2)) +// CHECK-DAG: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { MyObj* p = nullptr; - // CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) - // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) while (condition) { MyObj inner; - p = &inner; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) + p = &inner; +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) -// CHECK: Expire (LoanID: [[L_INNER]]) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } } @@ -240,20 +285,23 @@ void loop_with_break(int count) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], ToOrigin: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) for (int i = 0; i < count; ++i) { if (i == 5) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], ToOrigin: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator)) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; } } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // CHECK-LABEL: Function: nested_scopes @@ -265,32 +313,36 @@ void nested_scopes() { { MyObj outer; p = &outer; -// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], ToOrigin: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_OUTER:[0-9]+]] (Path: outer), ToOrigin: [[O_DRE_OUTER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OUTER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) { MyObj inner; p = &inner; -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], ToOrigin: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) // CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) } -// CHECK: Expire (LoanID: [[L_INNER]]) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } -// CHECK: Expire (LoanID: [[L_OUTER]]) +// CHECK: Expire ([[L_OUTER]] (Path: outer)) } // CHECK-LABEL: Function: pointer_indirection void pointer_indirection() { int a; int *p = &a; -// CHECK: Block B1: -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) int **pp = &p; -// CHECK: Issue (LoanID: [[L_P:[0-g]+]], ToOrigin: [[O_ADDR_P:[0-9]+]] (Expr: UnaryOperator)) -// CHECK: AssignOrigin (Dest: [[O_PP:[0-9]+]] (Decl: pp), Src: [[O_ADDR_P]] (Expr: UnaryOperator)) - -// FIXME: The Origin for the RHS is broken +// Note: No facts are generated for &p because the subexpression is a pointer type, +// which is not yet supported by the origin model. This is expected. int *q = *pp; +// CHECK: Use ([[O_PP:[0-9]+]] (Decl: pp), Read) // CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) } @@ -300,41 +352,41 @@ void ternary_operator() { int a, b; int *p; p = (a > b) ? &a : &b; - // CHECK: Block B2: - // CHECK: Issue (LoanID: [[L_A:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) - // CHECK: End of Block - - // CHECK: Block B3: - // CHECK: Issue (LoanID: [[L_B:[0-9]+]], ToOrigin: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator)) - // CHECK: End of Block - - // CHECK: Block B1: - // CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) - // CHECK: End of Block +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) } // CHECK-LABEL: Function: test_use_facts void usePointer(MyObj*); void test_use_facts() { - // CHECK: Block B{{[0-9]+}}: MyObj x; MyObj *p; +// CHECK: Block B{{[0-9]+}}: p = &x; - // CHECK: Use ([[O_P:[0-9]+]] (Decl: p) Write) +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Use ([[O_P:[0-9]+]] (Decl: p), Write) +// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) (void)*p; - // CHECK: Use ([[O_P]] (Decl: p) Read) +// CHECK: Use ([[O_P]] (Decl: p), Read) usePointer(p); - // CHECK: Use ([[O_P]] (Decl: p) Read) +// CHECK: Use ([[O_P]] (Decl: p), Read) p->id = 1; - // CHECK: Use ([[O_P]] (Decl: p) Read) - - +// CHECK: Use ([[O_P]] (Decl: p), Read) MyObj* q; q = p; - // CHECK: Use ([[O_P]] (Decl: p) Read) - // CHECK: Use ([[O_Q:[0-9]+]] (Decl: q) Write) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: Use ([[O_Q:[0-9]+]] (Decl: q), Write) usePointer(q); - // CHECK: Use ([[O_Q]] (Decl: q) Read) +// CHECK: Use ([[O_Q]] (Decl: q), Read) q->id = 2; - // CHECK: Use ([[O_Q]] (Decl: q) Read) -} +// CHECK: Use ([[O_Q]] (Decl: q), Read) +// CHECK: Expire ([[L_X]] (Path: x)) +} \ No newline at end of file From 339d0dbdfed700a467944a70047a2a877e609471 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Sun, 14 Sep 2025 14:19:19 +0200 Subject: [PATCH 14/35] [LifetimeSafety] Add support for GSL Pointer types (#154009) This extends the lifetime safety analysis to support C++ types annotated with `gsl::Pointer`, which represent non-owning "view" types like `std::string_view`. These types have the same lifetime safety concerns as raw pointers and references. - Added support for detecting and analyzing `gsl::Pointer` annotated types in lifetime safety analysis - Implemented handling for various expressions involving `gsl::Pointer` types: - Constructor expressions - Member call expressions (especially conversion operators) - Functional cast expressions - Initialization list expressions - Materialized temporary expressions - Updated the pointer type detection to recognize `gsl::Pointer` types - Added handling for function calls that create borrows through reference parameters Fixes: https://github.com/llvm/llvm-project/issues/152513 --- clang/lib/Analysis/LifetimeSafety.cpp | 94 ++++++++- clang/test/Sema/warn-lifetime-safety.cpp | 120 +++++++++++- .../unittests/Analysis/LifetimeSafetyTest.cpp | 182 +++++++++++++++++- 3 files changed, 381 insertions(+), 15 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e687e5419c50a..0dd5716d93fb6 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -478,6 +478,25 @@ class FactGenerator : public ConstStmtVisitor { } } + void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } + } + + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}); + } + // FIXME: A more general VisitCallExpr could also be used here. + } + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized /// pointers can use the same type of loan. @@ -530,8 +549,27 @@ class FactGenerator : public ConstStmtVisitor { void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { // Check if this is a test point marker. If so, we are done with this // expression. - if (VisitTestPoint(FCE)) + if (handleTestPoint(FCE)) return; + if (isGslPointerType(FCE->getType())) + addAssignOriginFact(*FCE, *FCE->getSubExpr()); + } + + void VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + addAssignOriginFact(*ILE, *ILE->getInit(0)); + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + addAssignOriginFact(*MTE, *MTE->getSubExpr()); } void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { @@ -557,10 +595,21 @@ class FactGenerator : public ConstStmtVisitor { } private: - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType(); + static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + return false; } + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); + } // Check if a type has an origin. static bool hasOrigin(const Expr *E) { return E->isGLValue() || isPointerType(E->getType()); @@ -570,6 +619,41 @@ class FactGenerator : public ConstStmtVisitor { return isPointerType(VD->getType()); } + void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + addAssignOriginFact(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}); + } + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args) { + if (!FD) + return; + // TODO: Handle more than one arguments. + for (unsigned I = 0; I <= 0 /*Args.size()*/; ++I) { + const Expr *ArgExpr = Args[I]; + + // Propagate origins for CXX this. + if (FD->isCXXClassMember() && I == 0) { + addAssignOriginFact(*Call, *ArgExpr); + continue; + } + // The parameter is a pointer, reference, or gsl::Pointer. + // This is a borrow. We propagate the origin from the argument expression + // at the call site to the parameter declaration in the callee. + if (hasOrigin(ArgExpr)) + addAssignOriginFact(*Call, *ArgExpr); + } + } + /// Creates a loan for the storage path of a given declaration reference. /// This function should be called whenever a DeclRefExpr represents a borrow. /// \param DRE The declaration reference expression that initiates the borrow. @@ -593,7 +677,7 @@ class FactGenerator : public ConstStmtVisitor { /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. /// If so, creates a `TestPointFact` and returns true. - bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) { + bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { if (!FCE->getType()->isVoidType()) return false; @@ -641,6 +725,8 @@ class FactGenerator : public ConstStmtVisitor { } void markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; assert(UseFacts.contains(DRE)); UseFacts[DRE]->markAsWritten(); } diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 660b9c9d5e243..bc8a5f3f7150f 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -6,6 +6,12 @@ struct MyObj { MyObj operator+(MyObj); }; +struct [[gsl::Pointer()]] View { + View(const MyObj&); // Borrows from MyObj + View(); + void use() const; +}; + //===----------------------------------------------------------------------===// // Basic Definite Use-After-Free (-W...permissive) // These are cases where the pointer is guaranteed to be dangling at the use site. @@ -20,12 +26,31 @@ void definite_simple_case() { (void)*p; // expected-note {{later used here}} } +void definite_simple_case_gsl() { + View v; + { + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + void no_use_no_error() { MyObj* p; { MyObj s; p = &s; } + // 'p' is dangling here, but since it is never used, no warning is issued. +} + +void no_use_no_error_gsl() { + View v; + { + MyObj s; + v = s; + } + // 'v' is dangling here, but since it is never used, no warning is issued. } void definite_pointer_chain() { @@ -39,6 +64,16 @@ void definite_pointer_chain() { (void)*q; // expected-note {{later used here}} } +void definite_propagation_gsl() { + View v1, v2; + { + MyObj s; + v1 = s; // expected-warning {{object whose reference is captured does not live long enough}} + v2 = v1; + } // expected-note {{destroyed here}} + v2.use(); // expected-note {{later used here}} +} + void definite_multiple_uses_one_warning() { MyObj* p; { @@ -78,6 +113,19 @@ void definite_single_pointer_multiple_loans(bool cond) { (void)*p; // expected-note 2 {{later used here}} } +void definite_single_pointer_multiple_loans_gsl(bool cond) { + View v; + if (cond){ + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj t; + v = t; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + //===----------------------------------------------------------------------===// // Potential (Maybe) Use-After-Free (-W...strict) @@ -94,18 +142,14 @@ void potential_if_branch(bool cond) { (void)*p; // expected-note {{later used here}} } -// If all paths lead to a dangle, it becomes a definite error. -void potential_becomes_definite(bool cond) { - MyObj* p; +void potential_if_branch_gsl(bool cond) { + MyObj safe; + View v = safe; if (cond) { - MyObj temp1; - p = &temp1; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} - else { - MyObj temp2; - p = &temp2; // expected-warning {{does not live long enough}} + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured may not live long enough}} } // expected-note {{destroyed here}} - (void)*p; // expected-note 2 {{later used here}} + v.use(); // expected-note {{later used here}} } void definite_potential_together(bool cond) { @@ -159,6 +203,16 @@ void potential_for_loop_use_after_loop_body(MyObj safe) { (void)*p; // expected-note {{later used here}} } +void potential_for_loop_gsl() { + MyObj safe; + View v = safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + v = s; // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + void potential_for_loop_use_before_loop_body(MyObj safe) { MyObj* p = &safe; for (int i = 0; i < 1; ++i) { @@ -182,6 +236,19 @@ void potential_loop_with_break(bool cond) { (void)*p; // expected-note {{later used here}} } +void potential_loop_with_break_gsl(bool cond) { + MyObj safe; + View v = safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured may not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note {{later used here}} +} + void potential_multiple_expiry_of_same_loan(bool cond) { // Choose the last expiry location for the loan. MyObj safe; @@ -258,6 +325,28 @@ void definite_switch(int mode) { (void)*p; // expected-note 3 {{later used here}} } +void definite_switch_gsl(int mode) { + View v; + switch (mode) { + case 1: { + MyObj temp1; + v = temp1; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + MyObj temp2; + v = temp2; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + default: { + MyObj temp3; + v = temp3; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note 3 {{later used here}} +} + //===----------------------------------------------------------------------===// // No-Error Cases //===----------------------------------------------------------------------===// @@ -271,3 +360,14 @@ void no_error_if_dangle_then_rescue() { p = &safe; // p is "rescued" before use. (void)*p; // This is safe. } + +void no_error_if_dangle_then_rescue_gsl() { + MyObj safe; + View v; + { + MyObj temp; + v = temp; // 'v' is temporarily dangling. + } + v = safe; // 'v' is "rescued" before use by reassigning to a valid object. + v.use(); // This is safe. +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 13e5832d70050..bff5378c0a8a9 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -11,7 +11,6 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -31,7 +30,13 @@ class LifetimeTestRunner { LifetimeTestRunner(llvm::StringRef Code) { std::string FullCode = R"( #define POINT(name) void("__lifetime_test_point_" #name) + struct MyObj { ~MyObj() {} int i; }; + + struct [[gsl::Pointer()]] View { + View(const MyObj&); + View(); + }; )"; FullCode += Code.str(); @@ -741,5 +746,180 @@ TEST_F(LifetimeAnalysisTest, NoDuplicateLoansForImplicitCastToConst) { EXPECT_THAT(Helper->getLoansForVar("a"), SizeIs(2)); } +TEST_F(LifetimeAnalysisTest, GslPointerSimpleLoan) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromOwner) { + SetupTest(R"( + void target() { + MyObj al, bl, cl, dl, el, fl; + View a = View(al); + View b = View{bl}; + View c = View(View(View(cl))); + View d = View{View(View(dl))}; + View e = View{View{View{el}}}; + View f = {fl}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("a"), HasLoansTo({"al"}, "p1")); + EXPECT_THAT(Origin("b"), HasLoansTo({"bl"}, "p1")); + EXPECT_THAT(Origin("c"), HasLoansTo({"cl"}, "p1")); + EXPECT_THAT(Origin("d"), HasLoansTo({"dl"}, "p1")); + EXPECT_THAT(Origin("e"), HasLoansTo({"el"}, "p1")); + EXPECT_THAT(Origin("f"), HasLoansTo({"fl"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromView) { + SetupTest(R"( + void target() { + MyObj a; + View x = View(a); + View y = View{x}; + View z = View(View(View(y))); + View p = View{View(View(x))}; + View q = {x}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("q"), HasLoansTo({"a"}, "p1")); +} + +// FIXME: Handle loans in ternary operator! +TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { + SetupTest(R"( + void target(bool cond) { + MyObj a, b; + View v = cond ? a : b; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); +} + +// FIXME: Handle temporaries. +TEST_F(LifetimeAnalysisTest, ViewFromTemporary) { + SetupTest(R"( + MyObj temporary(); + void target() { + View v = temporary(); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerWithConstAndAuto) { + SetupTest(R"( + void target() { + MyObj a; + const View v1 = a; + auto v2 = v1; + const auto& v3 = v2; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerPropagation) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + + View y = x; // Propagation via copy-construction + POINT(p2); + + View z; + z = x; // Propagation via copy-assignment + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerLoanExpiration) { + SetupTest(R"( + void target() { + View x; + { + MyObj a; + x = a; + POINT(before_expiry); + } // `a` is destroyed here. + POINT(after_expiry); + } + )"); + + EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry")); + EXPECT_THAT(LoansTo({"a"}), AreExpiredAt("after_expiry")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerReassignment) { + SetupTest(R"( + void target() { + MyObj safe; + View v; + v = safe; + POINT(p1); + { + MyObj unsafe; + v = unsafe; + POINT(p2); + } // `unsafe` expires here. + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("v"), HasLoansTo({"safe"}, "p1")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p2")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p3")); + EXPECT_THAT(LoansTo({"unsafe"}), AreExpiredAt("p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConversionOperator) { + SetupTest(R"( + struct String; + + struct [[gsl::Pointer()]] StringView { + StringView() = default; + }; + + struct String { + ~String() {} + operator StringView() const; + }; + + void target() { + String xl, yl; + StringView x = xl; + StringView y; + y = yl; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"xl"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"yl"}, "p1")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From 7f403fbb5a3ee89c40fdc377b580ed8f6e493c05 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Fri, 19 Sep 2025 18:40:05 +0200 Subject: [PATCH 15/35] [LifetimeSafety] Avoid adding already present items in sets/maps (#159582) Optimize lifetime safety analysis performance - Added early return optimization in `join` function for ImmutableSet when sets are identical - Improved ImmutableMap join logic to avoid unnecessary operations when values are equal I was under the impression that ImmutableSets/Maps would not modify the underlying if already existing elements are added to the container (and was hoping for structural equality in this aspect). It looks like the current implementation of `ImmutableSet` would perform addition nevertheless thereby creating (presumably `O(log(N))` tree nodes. This change considerably brings down compile times for some edge cases which happened to be present in the LLVM codebase. Now it is actually possible to compile LLVM in under 20 min with the lifetime analysis. The compile time hit is still significant but not as bad as before this change where it was not possible to compile LLVM without severely limiting analysis' scope (giving up on CFG with > 3000 blocks). Fixes https://github.com/llvm/llvm-project/issues/157420
Report (Before)
Report (After) # Lifetime Analysis Performance Report > Generated on: 2025-09-18 14:28:00 --- ## Test Case: Pointer Cycle in Loop **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 25 | 53.76 ms | 85.58% | 0.00% | 85.46% | 0.00% | | 50 | 605.35 ms | 98.39% | 0.00% | 98.37% | 0.00% | | 75 | 2.89 s | 99.62% | 0.00% | 99.61% | 0.00% | | 100 | 8.62 s | 99.80% | 0.00% | 99.80% | 0.00% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.82 ± 0.01) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.82 ± 0.01) | | ExpiredLoans | (Negligible) | --- ## Test Case: CFG Merges **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 400 | 66.02 ms | 58.61% | 1.04% | 56.53% | 1.02% | | 1000 | 319.24 ms | 81.31% | 0.63% | 80.04% | 0.64% | | 2000 | 1.43 s | 92.00% | 0.40% | 91.32% | 0.28% | | 5000 | 9.35 s | 97.01% | 0.25% | 96.63% | 0.12% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n2.12 ± 0.02) | | FactGenerator | O(n1.54 ± 0.02) | | LoanPropagation | O(n2.12 ± 0.03) | | ExpiredLoans | O(n1.13 ± 0.03) | --- ## Test Case: Deeply Nested Loops **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 50 | 137.30 ms | 90.72% | 0.00% | 90.42% | 0.00% | | 100 | 1.09 s | 98.13% | 0.00% | 98.02% | 0.09% | | 150 | 4.06 s | 99.24% | 0.00% | 99.18% | 0.05% | | 200 | 10.44 s | 99.66% | 0.00% | 99.63% | 0.03% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.29 ± 0.01) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.29 ± 0.01) | | ExpiredLoans | O(n1.42 ± 0.19) | ---
--- clang/lib/Analysis/LifetimeSafety.cpp | 12 ++++++++---- clang/test/Analysis/LifetimeSafety/benchmark.py | 11 ++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 0dd5716d93fb6..d016c6f12e82e 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -910,10 +910,13 @@ template static llvm::ImmutableSet join(llvm::ImmutableSet A, llvm::ImmutableSet B, typename llvm::ImmutableSet::Factory &F) { + if (A == B) + return A; if (A.getHeight() < B.getHeight()) std::swap(A, B); for (const T &E : B) - A = F.add(A, E); + if (!A.contains(E)) + A = F.add(A, E); return A; } @@ -947,10 +950,11 @@ join(llvm::ImmutableMap A, llvm::ImmutableMap B, for (const auto &Entry : B) { const K &Key = Entry.first; const V &ValB = Entry.second; - if (const V *ValA = A.lookup(Key)) - A = F.add(A, Key, JoinValues(*ValA, ValB)); - else + const V *ValA = A.lookup(Key); + if (!ValA) A = F.add(A, Key, ValB); + else if (*ValA != ValB) + A = F.add(A, Key, JoinValues(*ValA, ValB)); } return A; } diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py index 4421fe9a81e21..2373f9984eecd 100644 --- a/clang/test/Analysis/LifetimeSafety/benchmark.py +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -252,7 +252,7 @@ def generate_markdown_report(results: dict) -> str: report.append(" ".join(row)) report.append("\n**Complexity Analysis:**\n") - report.append("| Analysis Phase | Complexity O(nk) |") + report.append("| Analysis Phase | Complexity O(nk) | ") report.append("|:------------------|:--------------------------|") analysis_phases = { @@ -302,7 +302,7 @@ def run_single_test( source_file, ] - result = subprocess.run(clang_command, capture_output=True, text=True) + result = subprocess.run(clang_command, capture_output=True, text=True, timeout=60) if result.returncode != 0: print(f"Compilation failed for N={n}!", file=sys.stderr) @@ -334,24 +334,25 @@ def run_single_test( os.makedirs(args.output_dir, exist_ok=True) print(f"Benchmark files will be saved in: {os.path.abspath(args.output_dir)}\n") + # Maximize 'n' values while keeping execution time under 10s. test_configurations = [ { "name": "cycle", "title": "Pointer Cycle in Loop", "generator_func": generate_cpp_cycle_test, - "n_values": [10, 25, 50, 75, 100, 150], + "n_values": [25, 50, 75, 100], }, { "name": "merge", "title": "CFG Merges", "generator_func": generate_cpp_merge_test, - "n_values": [10, 50, 100, 200, 400, 800], + "n_values": [400, 1000, 2000, 5000], }, { "name": "nested_loops", "title": "Deeply Nested Loops", "generator_func": generate_cpp_nested_loop_test, - "n_values": [10, 50, 100, 200, 400, 800], + "n_values": [50, 100, 150, 200], }, ] From 01fe3223ee2f81db5a10d43ca7fff4d4d8a62371 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 25 Sep 2025 18:11:55 +0200 Subject: [PATCH 16/35] [LifetimeSafety] Disable canonicalization in immutable collections (#159850) Disable canonicalization in immutable collections for lifetime analysis. Modified the `LifetimeFactory` struct in `LifetimeSafety.cpp` to explicitly initialize the immutable collection factories with `canonicalize=false`. This prevents the factories from canonicalizing their data structures, which can improve performance by avoiding unnecessary comparisons and digest computations. --- clang/lib/Analysis/LifetimeSafety.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index d016c6f12e82e..57210f4ee09d1 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -970,9 +970,11 @@ using ExpiredLoanMap = llvm::ImmutableMap; /// An object to hold the factories for immutable collections, ensuring /// that all created states share the same underlying memory management. struct LifetimeFactory { - OriginLoanMap::Factory OriginMapFactory; - LoanSet::Factory LoanSetFactory; - ExpiredLoanMap::Factory ExpiredLoanMapFactory; + llvm::BumpPtrAllocator Allocator; + OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; + ExpiredLoanMap::Factory ExpiredLoanMapFactory{Allocator, + /*canonicalize=*/false}; }; /// Represents the dataflow lattice for loan propagation. From 176a9b0312b399f60eddf5b244e064ed740797fe Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Fri, 26 Sep 2025 00:44:24 +0200 Subject: [PATCH 17/35] [LifetimeSafety] Implement support for lifetimebound attribute (#158489) Add support for `lifetimebound` attributes in the lifetime safety analysis to track loans from function parameters to return values. Implemented support for `lifetimebound` attributes on function parameters This change replaces the single `AssignOriginFact` with two separate operations: `OriginFlowFact` and `KillOriginFact`. The key difference is in semantics: * Old `AssignOriginFact`: Replaced the destination origin's loans entirely with the source origin's loans. * New `OriginFlowFact`: Can now optionally merge the source origin's loans to the destination's existing loans. * New `KillOriginFact`: Clears all loans from an origin. For function calls with `lifetimebound` parameters, we kill the the return value' origin first then use `OriginFlowFact` to accumulate loans from multiple parameters into the return value's origin - enabling tracking multiple lifetimebound arguments. - Added a new `LifetimeAnnotations.h/cpp` to provide helper functions for inspecting and inferring lifetime annotations - Moved several functions from `CheckExprLifetime.cpp` to the new file to make them reusable The `lifetimebound` attribute is a key mechanism for expressing lifetime dependencies between function parameters and return values. This change enables the lifetime safety analysis to properly track these dependencies, allowing it to detect more potential dangling reference issues. --- .../Analysis/Analyses/LifetimeAnnotations.h | 44 +++ .../clang/Analysis/Analyses/LifetimeSafety.h | 11 +- clang/lib/Analysis/CMakeLists.txt | 1 + clang/lib/Analysis/LifetimeAnnotations.cpp | 75 +++++ clang/lib/Analysis/LifetimeSafety.cpp | 161 +++++++---- clang/lib/Sema/CheckExprLifetime.cpp | 80 +----- clang/lib/Sema/CheckExprLifetime.h | 2 - clang/lib/Sema/SemaAPINotes.cpp | 4 +- .../test/Analysis/LifetimeSafety/benchmark.py | 2 +- .../Sema/warn-lifetime-safety-dataflow.cpp | 178 ++++++------ clang/test/Sema/warn-lifetime-safety.cpp | 149 ++++++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 257 +++++++++++++++++- 12 files changed, 758 insertions(+), 206 deletions(-) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h create mode 100644 clang/lib/Analysis/LifetimeAnnotations.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h new file mode 100644 index 0000000000000..229d16c20b0f8 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h @@ -0,0 +1,44 @@ +//===- LifetimeAnnotations.h - -*--------------- C++--------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Helper functions to inspect and infer lifetime annotations. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H + +#include "clang/AST/DeclCXX.h" + +namespace clang { +namespace lifetimes { + +/// Returns the most recent declaration of the method to ensure all +/// lifetime-bound attributes from redeclarations are considered. +const FunctionDecl *getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD); + +/// Returns the most recent declaration of the method to ensure all +/// lifetime-bound attributes from redeclarations are considered. +const CXXMethodDecl * +getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD); + +// Return true if this is an "normal" assignment operator. +// We assume that a normal assignment operator always returns *this, that is, +// an lvalue reference that is the same type as the implicit object parameter +// (or the LHS for a non-member operator==). +bool isNormalAssignmentOperator(const FunctionDecl *FD); + +/// Returns true if this is an assignment operator where the parameter +/// has the lifetimebound attribute. +bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); + +/// Returns true if the implicit object parameter (this) should be considered +/// lifetimebound, either due to an explicit lifetimebound attribute on the +/// method or because it's a normal assignment operator. +bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); +} // namespace lifetimes +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 7e1bfc903083e..512cb76cd6349 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -75,13 +75,14 @@ template struct ID { } }; -template -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { - return OS << ID.Value; -} - using LoanID = ID; using OriginID = ID; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} // Using LLVM's immutable collections is efficient for dataflow analysis // as it avoids deep copies during state transitions. diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 0523d92480cb3..5a26f3eeea418 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeAnnotations.cpp LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp diff --git a/clang/lib/Analysis/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeAnnotations.cpp new file mode 100644 index 0000000000000..e79122475625e --- /dev/null +++ b/clang/lib/Analysis/LifetimeAnnotations.cpp @@ -0,0 +1,75 @@ +//===- LifetimeAnnotations.cpp - -*--------------- C++------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeLoc.h" + +namespace clang { +namespace lifetimes { + +const FunctionDecl * +getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { + return FD != nullptr ? FD->getMostRecentDecl() : nullptr; +} + +const CXXMethodDecl * +getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD) { + const FunctionDecl *FD = CMD; + return cast_if_present( + getDeclWithMergedLifetimeBoundAttrs(FD)); +} + +bool isNormalAssignmentOperator(const FunctionDecl *FD) { + OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator(); + bool IsAssignment = OO == OO_Equal || isCompoundAssignmentOperator(OO); + if (!IsAssignment) + return false; + QualType RetT = FD->getReturnType(); + if (!RetT->isLValueReferenceType()) + return false; + ASTContext &Ctx = FD->getASTContext(); + QualType LHST; + auto *MD = dyn_cast(FD); + if (MD && MD->isCXXInstanceMember()) + LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType()); + else + LHST = FD->getParamDecl(0)->getType(); + return Ctx.hasSameType(RetT, LHST); +} + +bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD) { + CMD = getDeclWithMergedLifetimeBoundAttrs(CMD); + return CMD && isNormalAssignmentOperator(CMD) && CMD->param_size() == 1 && + CMD->getParamDecl(0)->hasAttr(); +} + +bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { + FD = getDeclWithMergedLifetimeBoundAttrs(FD); + const TypeSourceInfo *TSI = FD->getTypeSourceInfo(); + if (!TSI) + return false; + // Don't declare this variable in the second operand of the for-statement; + // GCC miscompiles that by ending its lifetime before evaluating the + // third operand. See gcc.gnu.org/PR86769. + AttributedTypeLoc ATL; + for (TypeLoc TL = TSI->getTypeLoc(); + (ATL = TL.getAsAdjusted()); + TL = ATL.getModifiedLoc()) { + if (ATL.getAttrAs()) + return true; + } + + return isNormalAssignmentOperator(FD); +} + +} // namespace lifetimes +} // namespace clang diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 57210f4ee09d1..02c7373c7444b 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -10,6 +10,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" @@ -213,10 +214,13 @@ class Fact { /// out of scope). Expire, /// An origin is propagated from a source to a destination (e.g., p = q). - AssignOrigin, + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, /// An origin escapes the function by flowing into the return value. ReturnOfOrigin, - /// An origin is used (eg. dereferencing a pointer). + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). Use, /// A marker for a specific point in the code, for testing. TestPoint, @@ -285,25 +289,33 @@ class ExpireFact : public Fact { } }; -class AssignOriginFact : public Fact { +class OriginFlowFact : public Fact { OriginID OIDDest; OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; public: static bool classof(const Fact *F) { - return F->getKind() == Kind::AssignOrigin; + return F->getKind() == Kind::OriginFlow; } - AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) - : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + OriginID getDestOriginID() const { return OIDDest; } OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + void dump(llvm::raw_ostream &OS, const LoanManager &, const OriginManager &OM) const override { - OS << "AssignOrigin (Dest: "; + OS << "OriginFlow (Dest: "; OM.dump(getDestOriginID(), OS); OS << ", Src: "; OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); OS << ")\n"; } }; @@ -454,7 +466,7 @@ class FactGenerator : public ConstStmtVisitor { if (const auto *VD = dyn_cast(D)) if (hasOrigin(VD)) if (const Expr *InitExpr = VD->getInit()) - addAssignOriginFact(*VD, *InitExpr); + killAndFlowOrigin(*VD, *InitExpr); } void VisitDeclRefExpr(const DeclRefExpr *DRE) { @@ -492,9 +504,23 @@ class FactGenerator : public ConstStmtVisitor { isa(MCE->getCalleeDecl())) { // The argument is the implicit object itself. handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}); + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); } - // FIXME: A more general VisitCallExpr could also be used here. + } + + void VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); } void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { @@ -508,7 +534,7 @@ class FactGenerator : public ConstStmtVisitor { return; // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). - addAssignOriginFact(*ICE, *ICE->getSubExpr()); + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); } void VisitUnaryOperator(const UnaryOperator *UO) { @@ -522,7 +548,7 @@ class FactGenerator : public ConstStmtVisitor { // its sub-expression (x). This fact will cause the dataflow analysis // to propagate any loans held by the sub-expression's origin to the // origin of this UnaryOperator expression. - addAssignOriginFact(*UO, *SubExpr); + killAndFlowOrigin(*UO, *SubExpr); } } @@ -542,8 +568,15 @@ class FactGenerator : public ConstStmtVisitor { } void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); } void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { @@ -552,7 +585,7 @@ class FactGenerator : public ConstStmtVisitor { if (handleTestPoint(FCE)) return; if (isGslPointerType(FCE->getType())) - addAssignOriginFact(*FCE, *FCE->getSubExpr()); + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); } void VisitInitListExpr(const InitListExpr *ILE) { @@ -561,7 +594,7 @@ class FactGenerator : public ConstStmtVisitor { // For list initialization with a single element, like `View{...}`, the // origin of the list itself is the origin of its single element. if (ILE->getNumInits() == 1) - addAssignOriginFact(*ILE, *ILE->getInit(0)); + killAndFlowOrigin(*ILE, *ILE->getInit(0)); } void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { @@ -569,7 +602,7 @@ class FactGenerator : public ConstStmtVisitor { return; // A temporary object's origin is the same as the origin of the // expression that initializes it. - addAssignOriginFact(*MTE, *MTE->getSubExpr()); + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); } void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { @@ -624,34 +657,51 @@ class FactGenerator : public ConstStmtVisitor { if (CCE->getNumArgs() != 1) return; if (hasOrigin(CCE->getArg(0))) - addAssignOriginFact(*CCE, *CCE->getArg(0)); + killAndFlowOrigin(*CCE, *CCE->getArg(0)); else // This could be a new borrow. handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}); + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); } /// Checks if a call-like expression creates a borrow by passing a value to a /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef Args) { - if (!FD) + ArrayRef Args, + bool IsGslConstruction = false) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) return; - // TODO: Handle more than one arguments. - for (unsigned I = 0; I <= 0 /*Args.size()*/; ++I) { - const Expr *ArgExpr = Args[I]; - - // Propagate origins for CXX this. - if (FD->isCXXClassMember() && I == 0) { - addAssignOriginFact(*Call, *ArgExpr); - continue; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); } - // The parameter is a pointer, reference, or gsl::Pointer. - // This is a borrow. We propagate the origin from the argument expression - // at the call site to the parameter declaration in the callee. - if (hasOrigin(ArgExpr)) - addAssignOriginFact(*Call, *ArgExpr); - } } /// Creates a loan for the storage path of a given declaration reference. @@ -668,11 +718,19 @@ class FactGenerator : public ConstStmtVisitor { } template - void addAssignOriginFact(const Destination &D, const Source &S) { + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template + void killAndFlowOrigin(const Destination &D, const Source &S) { OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); OriginID SrcOID = FactMgr.getOriginMgr().get(S); CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID)); + FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); } /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. @@ -703,12 +761,11 @@ class FactGenerator : public ConstStmtVisitor { if (const auto *DRE_LHS = dyn_cast(LHSExpr->IgnoreParenImpCasts())) { markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) - // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`. - // LHS must be a pointer/reference type that can be an origin. RHS must - // also represent an origin (either another pointer/ref or an - // address-of). - addAssignOriginFact(*VD_LHS, *RHSExpr); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } } } @@ -882,8 +939,8 @@ class DataflowAnalysis { return D->transfer(In, *F->getAs()); case Fact::Kind::Expire: return D->transfer(In, *F->getAs()); - case Fact::Kind::AssignOrigin: - return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs()); case Fact::Kind::ReturnOfOrigin: return D->transfer(In, *F->getAs()); case Fact::Kind::Use: @@ -897,7 +954,7 @@ class DataflowAnalysis { public: Lattice transfer(Lattice In, const IssueFact &) { return In; } Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const AssignOriginFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } Lattice transfer(Lattice In, const UseFact &) { return In; } Lattice transfer(Lattice In, const TestPointFact &) { return In; } @@ -1051,14 +1108,20 @@ class LoanPropagationAnalysis LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); } - /// The destination origin's loan set is replaced by the source's. - /// This implicitly "resets" the old loans of the destination. - Lattice transfer(Lattice In, const AssignOriginFact &F) { + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { OriginID DestOID = F.getDestOriginID(); OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + return LoanPropagationLattice( - OriginLoanMapFactory.add(In.Origins, DestOID, SrcLoans)); + OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); } LoanSet getLoans(OriginID OID, ProgramPoint P) { diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e02e00231e58e..e8a7ad3bd355a 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,6 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" @@ -503,60 +504,6 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { return true; } -// Return true if this is an "normal" assignment operator. -// We assume that a normal assignment operator always returns *this, that is, -// an lvalue reference that is the same type as the implicit object parameter -// (or the LHS for a non-member operator$=). -static bool isNormalAssignmentOperator(const FunctionDecl *FD) { - OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator(); - if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) { - QualType RetT = FD->getReturnType(); - if (RetT->isLValueReferenceType()) { - ASTContext &Ctx = FD->getASTContext(); - QualType LHST; - auto *MD = dyn_cast(FD); - if (MD && MD->isCXXInstanceMember()) - LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType()); - else - LHST = FD->getParamDecl(0)->getType(); - if (Ctx.hasSameType(RetT, LHST)) - return true; - } - } - return false; -} - -static const FunctionDecl * -getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { - return FD != nullptr ? FD->getMostRecentDecl() : nullptr; -} - -static const CXXMethodDecl * -getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD) { - const FunctionDecl *FD = CMD; - return cast_if_present( - getDeclWithMergedLifetimeBoundAttrs(FD)); -} - -bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { - FD = getDeclWithMergedLifetimeBoundAttrs(FD); - const TypeSourceInfo *TSI = FD->getTypeSourceInfo(); - if (!TSI) - return false; - // Don't declare this variable in the second operand of the for-statement; - // GCC miscompiles that by ending its lifetime before evaluating the - // third operand. See gcc.gnu.org/PR86769. - AttributedTypeLoc ATL; - for (TypeLoc TL = TSI->getTypeLoc(); - (ATL = TL.getAsAdjusted()); - TL = ATL.getModifiedLoc()) { - if (ATL.getAttrAs()) - return true; - } - - return isNormalAssignmentOperator(FD); -} - // Visit lifetimebound or gsl-pointer arguments. static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, LocalVisitor Visit) { @@ -639,7 +586,8 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, // lifetimebound. if (Sema::CanBeGetReturnObject(Callee)) CheckCoroObjArg = false; - if (implicitObjectParamIsLifetimeBound(Callee) || CheckCoroObjArg) + if (lifetimes::implicitObjectParamIsLifetimeBound(Callee) || + CheckCoroObjArg) VisitLifetimeBoundArg(Callee, ObjectArg); else if (EnableGSLAnalysis) { if (auto *CME = dyn_cast(Callee); @@ -648,7 +596,8 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, } } - const FunctionDecl *CanonCallee = getDeclWithMergedLifetimeBoundAttrs(Callee); + const FunctionDecl *CanonCallee = + lifetimes::getDeclWithMergedLifetimeBoundAttrs(Callee); unsigned NP = std::min(Callee->getNumParams(), CanonCallee->getNumParams()); for (unsigned I = 0, N = std::min(NP, Args.size()); I != N; ++I) { Expr *Arg = Args[I]; @@ -1276,19 +1225,14 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, return Report; } -static bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD) { - CMD = getDeclWithMergedLifetimeBoundAttrs(CMD); - return CMD && isNormalAssignmentOperator(CMD) && CMD->param_size() == 1 && - CMD->getParamDecl(0)->hasAttr(); -} - static bool shouldRunGSLAssignmentAnalysis(const Sema &SemaRef, const AssignedEntity &Entity) { bool EnableGSLAssignmentWarnings = !SemaRef.getDiagnostics().isIgnored( diag::warn_dangling_lifetime_pointer_assignment, SourceLocation()); return (EnableGSLAssignmentWarnings && (isRecordWithAttr(Entity.LHS->getType()) || - isAssignmentOperatorLifetimeBound(Entity.AssignmentOperator))); + lifetimes::isAssignmentOperatorLifetimeBound( + Entity.AssignmentOperator))); } static void @@ -1610,11 +1554,11 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity, switch (LK) { case LK_Assignment: { if (shouldRunGSLAssignmentAnalysis(SemaRef, *AEntity)) - Path.push_back( - {isAssignmentOperatorLifetimeBound(AEntity->AssignmentOperator) - ? IndirectLocalPathEntry::LifetimeBoundCall - : IndirectLocalPathEntry::GslPointerAssignment, - Init}); + Path.push_back({lifetimes::isAssignmentOperatorLifetimeBound( + AEntity->AssignmentOperator) + ? IndirectLocalPathEntry::LifetimeBoundCall + : IndirectLocalPathEntry::GslPointerAssignment, + Init}); break; } case LK_LifetimeCapture: { diff --git a/clang/lib/Sema/CheckExprLifetime.h b/clang/lib/Sema/CheckExprLifetime.h index 6351e52a362f1..16595d0ca1b36 100644 --- a/clang/lib/Sema/CheckExprLifetime.h +++ b/clang/lib/Sema/CheckExprLifetime.h @@ -60,8 +60,6 @@ void checkCaptureByLifetime(Sema &SemaRef, const CapturingEntity &Entity, void checkExprLifetimeMustTailArg(Sema &SemaRef, const InitializedEntity &Entity, Expr *Init); -bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); - } // namespace clang::sema #endif // LLVM_CLANG_SEMA_CHECK_EXPR_LIFETIME_H diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 04aa8d224c7c0..d0acb27f6a360 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "CheckExprLifetime.h" #include "TypeLocBuilder.h" #include "clang/APINotes/APINotesReader.h" #include "clang/APINotes/Types.h" @@ -18,6 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" @@ -733,7 +733,7 @@ static void ProcessAPINotes(Sema &S, CXXMethodDecl *Method, const api_notes::CXXMethodInfo &Info, VersionedInfoMetadata Metadata) { if (Info.This && Info.This->isLifetimebound() && - !sema::implicitObjectParamIsLifetimeBound(Method)) { + !lifetimes::implicitObjectParamIsLifetimeBound(Method)) { auto MethodType = Method->getType(); auto *attr = ::new (S.Context) LifetimeBoundAttr(S.Context, getPlaceholderAttrInfo()); diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py index 2373f9984eecd..d2e5f0b2122a3 100644 --- a/clang/test/Analysis/LifetimeSafety/benchmark.py +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -340,7 +340,7 @@ def run_single_test( "name": "cycle", "title": "Pointer Cycle in Loop", "generator_func": generate_cpp_cycle_test, - "n_values": [25, 50, 75, 100], + "n_values": [50, 75, 100, 200, 300], }, { "name": "merge", diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 805dd305046e4..f34c1e95454d7 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -12,12 +12,12 @@ MyObj* return_local_addr() { MyObj x {10}; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) MyObj* p = &x; -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) return p; // CHECK: Use ([[O_P]] (Decl: p), Read) -// CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) // CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) // CHECK: Expire ([[L_X]] (Path: x)) } @@ -29,26 +29,26 @@ MyObj* assign_and_return_local_addr() { MyObj y{20}; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) MyObj* ptr1 = &y; -// CHECK: AssignOrigin (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) MyObj* ptr2 = ptr1; // CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) -// CHECK: AssignOrigin (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: OriginFlow (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) ptr2 = ptr1; // CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) -// CHECK: AssignOrigin (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: OriginFlow (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) // CHECK: Use ({{[0-9]+}} (Decl: ptr2), Write) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) ptr2 = ptr2; // Self assignment. // CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) -// CHECK: AssignOrigin (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: OriginFlow (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) // CHECK: Use ([[O_PTR2]] (Decl: ptr2), Write) -// CHECK: AssignOrigin (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) return ptr2; // CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) -// CHECK: AssignOrigin (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) // CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) // CHECK: Expire ([[L_Y]] (Path: y)) } @@ -70,9 +70,9 @@ void loan_expires_cpp() { MyObj obj{1}; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_OBJ:[0-9]+]] (Path: obj), ToOrigin: [[O_DRE_OBJ:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OBJ]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OBJ]] (Expr: DeclRefExpr)) MyObj* pObj = &obj; -// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) // CHECK: Expire ([[L_OBJ]] (Path: obj)) } @@ -83,9 +83,9 @@ void loan_expires_trivial() { int trivial_obj = 1; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj), ToOrigin: [[O_DRE_TRIVIAL:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) int* pTrivialObj = &trivial_obj; -// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) // CHECK-NOT: Expire // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. @@ -100,20 +100,20 @@ void conditional(bool condition) { if (condition) // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) p = &a; else // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_B]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_B]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) p = &b; // CHECK: Block B{{[0-9]+}}: int *q = p; // CHECK: Use ([[O_P]] (Decl: p), Read) -// CHECK: AssignOrigin (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) -// CHECK: AssignOrigin (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: OriginFlow (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) } @@ -128,36 +128,36 @@ void pointers_in_a_cycle(bool condition) { MyObj* p3 = &v3; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_V1:[0-9]+]] (Path: v1), ToOrigin: [[O_DRE_V1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V1]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) // CHECK: Issue ([[L_V2:[0-9]+]] (Path: v2), ToOrigin: [[O_DRE_V2:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V2]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V2]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) // CHECK: Issue ([[L_V3:[0-9]+]] (Path: v3), ToOrigin: [[O_DRE_V3:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_V3:[0-g]+]] (Expr: UnaryOperator), Src: [[O_DRE_V3]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V3:[0-g]+]] (Expr: UnaryOperator), Src: [[O_DRE_V3]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) while (condition) { // CHECK: Block B{{[0-9]+}}: MyObj* temp = p1; // CHECK: Use ([[O_P1]] (Decl: p1), Read) -// CHECK: AssignOrigin (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) -// CHECK: AssignOrigin (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) +// CHECK: OriginFlow (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) p1 = p2; // CHECK: Use ([[O_P2:[0-9]+]] (Decl: p2), Read) -// CHECK: AssignOrigin (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) +// CHECK: OriginFlow (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) // CHECK: Use ({{[0-9]+}} (Decl: p1), Write) -// CHECK: AssignOrigin (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) p2 = p3; // CHECK: Use ([[O_P3:[0-9]+]] (Decl: p3), Read) -// CHECK: AssignOrigin (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) +// CHECK: OriginFlow (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) // CHECK: Use ({{[0-9]+}} (Decl: p2), Write) -// CHECK: AssignOrigin (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) p3 = temp; // CHECK: Use ([[O_TEMP]] (Decl: temp), Read) -// CHECK: AssignOrigin (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) +// CHECK: OriginFlow (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) // CHECK: Use ({{[0-9]+}} (Decl: p3), Write) -// CHECK: AssignOrigin (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) } } @@ -168,13 +168,13 @@ void overwrite_origin() { // CHECK: Block B{{[0-9]+}}: MyObj* p = &s1; // CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = &s2; // CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) // CHECK: Expire ([[L_S2]] (Path: s2)) // CHECK: Expire ([[L_S1]] (Path: s1)) } @@ -185,12 +185,12 @@ void reassign_to_null() { // CHECK: Block B{{[0-9]+}}: MyObj* p = &s1; // CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = nullptr; -// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) // CHECK: Expire ([[L_S1]] (Path: s1)) } // FIXME: Have a better representation for nullptr than just an empty origin. @@ -204,15 +204,15 @@ void reassign_in_if(bool condition) { MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) if (condition) { // CHECK: Block B{{[0-9]+}}: p = &s2; // CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) } // CHECK: Block B{{[0-9]+}}: // CHECK: Expire ([[L_S2]] (Path: s2)) @@ -227,32 +227,32 @@ void assign_in_switch(int mode) { MyObj s3; MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) switch (mode) { case 1: // CHECK-DAG: Block B{{[0-9]+}}: p = &s1; // CHECK-DAG: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK-DAG: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) // CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK-DAG: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) break; case 2: // CHECK-DAG: Block B{{[0-9]+}}: p = &s2; // CHECK-DAG: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK-DAG: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK-DAG: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) // CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK-DAG: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK-DAG: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; default: // CHECK: Block B{{[0-9]+}}: p = &s3; // CHECK: Issue ([[L_S3:[0-9]+]] (Path: s3), ToOrigin: [[O_DRE_S3:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S3]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S3]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) break; } // CHECK: Block B{{[0-9]+}}: @@ -265,16 +265,16 @@ void assign_in_switch(int mode) { void loan_in_loop(bool condition) { MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) while (condition) { MyObj inner; // CHECK: Block B{{[0-9]+}}: p = &inner; // CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) // CHECK: Expire ([[L_INNER]] (Path: inner)) } } @@ -286,16 +286,16 @@ void loop_with_break(int count) { MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) for (int i = 0; i < count; ++i) { if (i == 5) { // CHECK: Block B{{[0-9]+}}: p = &s2; // CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; } } @@ -308,22 +308,22 @@ void loop_with_break(int count) { void nested_scopes() { MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) { MyObj outer; p = &outer; // CHECK: Issue ([[L_OUTER:[0-9]+]] (Path: outer), ToOrigin: [[O_DRE_OUTER:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OUTER]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OUTER]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) { MyObj inner; p = &inner; // CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) } // CHECK: Expire ([[L_INNER]] (Path: inner)) } @@ -336,14 +336,14 @@ void pointer_indirection() { int *p = &a; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) int **pp = &p; // Note: No facts are generated for &p because the subexpression is a pointer type, // which is not yet supported by the origin model. This is expected. int *q = *pp; // CHECK: Use ([[O_PP:[0-9]+]] (Decl: pp), Read) -// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) } // CHECK-LABEL: Function: ternary_operator @@ -360,7 +360,7 @@ void ternary_operator() { // CHECK: Block B{{[0-9]+}}: // CHECK: Use ({{[0-9]+}} (Decl: p), Write) -// CHECK: AssignOrigin (Dest: {{[0-9]+}} (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) } // CHECK-LABEL: Function: test_use_facts @@ -371,9 +371,9 @@ void test_use_facts() { // CHECK: Block B{{[0-9]+}}: p = &x; // CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) -// CHECK: AssignOrigin (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) // CHECK: Use ([[O_P:[0-9]+]] (Decl: p), Write) -// CHECK: AssignOrigin (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) (void)*p; // CHECK: Use ([[O_P]] (Decl: p), Read) usePointer(p); @@ -389,4 +389,28 @@ void test_use_facts() { q->id = 2; // CHECK: Use ([[O_Q]] (Decl: q), Read) // CHECK: Expire ([[L_X]] (Path: x)) -} \ No newline at end of file +} + +// CHECK-LABEL: Function: test_use_lifetimebound_call +MyObj* LifetimeBoundCall(MyObj* x [[clang::lifetimebound]], MyObj* y [[clang::lifetimebound]]); +void test_use_lifetimebound_call() { + MyObj x, y; + MyObj *p = &x; +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) + MyObj *q = &y; +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) + MyObj* r = LifetimeBoundCall(p, q); +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: OriginFlow (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: Use ([[O_Q]] (Decl: q), Read) +// CHECK: OriginFlow (Dest: [[O_Q_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_Q]] (Decl: q)) +// CHECK: OriginFlow (Dest: [[O_CALL_EXPR:[0-9]+]] (Expr: CallExpr), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_CALL_EXPR]] (Expr: CallExpr), Src: [[O_Q_RVAL]] (Expr: ImplicitCastExpr), Merge) +// CHECK: OriginFlow (Dest: [[O_R:[0-9]+]] (Decl: r), Src: [[O_CALL_EXPR]] (Expr: CallExpr)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: Expire ([[L_X]] (Path: x)) +} diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index bc8a5f3f7150f..f6bec6e66c365 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -371,3 +371,152 @@ void no_error_if_dangle_then_rescue_gsl() { v = safe; // 'v' is "rescued" before use by reassigning to a valid object. v.use(); // This is safe. } + + +//===----------------------------------------------------------------------===// +// Lifetimebound Attribute Tests +//===----------------------------------------------------------------------===// + +View Identity(View v [[clang::lifetimebound]]); +View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); +MyObj* GetPointer(const MyObj& obj [[clang::lifetimebound]]); + +struct [[gsl::Pointer()]] LifetimeBoundView { + LifetimeBoundView(); + LifetimeBoundView(const MyObj& obj [[clang::lifetimebound]]); + LifetimeBoundView pass() [[clang::lifetimebound]] { return *this; } + operator View() const [[clang::lifetimebound]]; +}; + +void lifetimebound_simple_function() { + View v; + { + MyObj obj; + v = Identity(obj); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_multiple_args_definite() { + View v; + { + MyObj obj1, obj2; + v = Choose(true, + obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + +void lifetimebound_multiple_args_potential(bool cond) { + MyObj safe; + View v = safe; + { + MyObj obj1; + if (cond) { + MyObj obj2; + v = Choose(true, + obj1, // expected-warning {{object whose reference is captured may not live long enough}} + obj2); // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + } // expected-note {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + +View SelectFirst(View a [[clang::lifetimebound]], View b); +void lifetimebound_mixed_args() { + View v; + { + MyObj obj1, obj2; + v = SelectFirst(obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_member_function() { + LifetimeBoundView lbv, lbv2; + { + MyObj obj; + lbv = obj; // expected-warning {{object whose reference is captured does not live long enough}} + lbv2 = lbv.pass(); + } // expected-note {{destroyed here}} + View v = lbv2; // expected-note {{later used here}} + v.use(); +} + +void lifetimebound_conversion_operator() { + View v; + { + MyObj obj; + LifetimeBoundView lbv = obj; // expected-warning {{object whose reference is captured does not live long enough}} + v = lbv; // Conversion operator is lifetimebound + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_chained_calls() { + View v; + { + MyObj obj; + v = Identity(Identity(Identity(obj))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_with_pointers() { + MyObj* ptr; + { + MyObj obj; + ptr = GetPointer(obj); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*ptr; // expected-note {{later used here}} +} + +void lifetimebound_no_error_safe_usage() { + MyObj obj; + View v1 = Identity(obj); // No warning - obj lives long enough + View v2 = Choose(true, v1, Identity(obj)); // No warning - all args are safe + v2.use(); // Safe usage +} + +void lifetimebound_partial_safety(bool cond) { + MyObj safe_obj; + View v = safe_obj; + + if (cond) { + MyObj temp_obj; + v = Choose(true, + safe_obj, + temp_obj); // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +// FIXME: Creating reference from lifetimebound call doesn't propagate loans. +const MyObj& GetObject(View v [[clang::lifetimebound]]); +void lifetimebound_return_reference() { + View v; + const MyObj* ptr; + { + MyObj obj; + View temp_v = obj; + const MyObj& ref = GetObject(temp_v); + ptr = &ref; + } + (void)*ptr; +} + +// FIXME: No warning for non gsl::Pointer types. Origin tracking is only supported for pointer types. +struct LifetimeBoundCtor { + LifetimeBoundCtor(); + LifetimeBoundCtor(const MyObj& obj [[clang::lifetimebound]]); +}; +void lifetimebound_ctor() { + LifetimeBoundCtor v; + { + MyObj obj; + v = obj; + } + (void)v; +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index bff5378c0a8a9..3821015f07fb1 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -68,6 +68,7 @@ class LifetimeTestRunner { LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } ASTContext &getASTContext() { return *ASTCtx; } + AnalysisDeclContext &getAnalysisContext() { return *AnalysisCtx; } ProgramPoint getProgramPoint(llvm::StringRef Annotation) { auto It = AnnotationToPointMap.find(Annotation); @@ -106,7 +107,7 @@ class LifetimeTestHelper { std::vector getLoansForVar(llvm::StringRef VarName) { auto *VD = findDecl(VarName); if (!VD) { - ADD_FAILURE() << "No VarDecl found for '" << VarName << "'"; + ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } std::vector LID = Analysis.getLoanIDForVar(VD); @@ -136,11 +137,20 @@ class LifetimeTestHelper { private: template DeclT *findDecl(llvm::StringRef Name) { auto &Ctx = Runner.getASTContext(); - auto Results = match(valueDecl(hasName(Name)).bind("v"), Ctx); + const auto *TargetFunc = Runner.getAnalysisContext().getDecl(); + auto Results = + match(valueDecl(hasName(Name), + hasAncestor(functionDecl(equalsNode(TargetFunc)))) + .bind("v"), + Ctx); if (Results.empty()) { ADD_FAILURE() << "Declaration '" << Name << "' not found in AST."; return nullptr; } + if (Results.size() > 1) { + ADD_FAILURE() << "Multiple declarations found for '" << Name << "'"; + return nullptr; + } return const_cast(selectFirst("v", Results)); } @@ -208,6 +218,19 @@ MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { ExpectedLoans.insert(ExpectedLoans.end(), ExpectedLIDs.begin(), ExpectedLIDs.end()); } + std::sort(ExpectedLoans.begin(), ExpectedLoans.end()); + std::sort(ActualLoans.begin(), ActualLoans.end()); + if (ExpectedLoans != ActualLoans) { + *result_listener << "Expected: "; + for (const auto &LoanID : ExpectedLoans) { + *result_listener << LoanID.Value << ", "; + } + *result_listener << "Actual: "; + for (const auto &LoanID : ActualLoans) { + *result_listener << LoanID.Value << ", "; + } + return false; + } return ExplainMatchResult(UnorderedElementsAreArray(ExpectedLoans), ActualLoans, result_listener); @@ -921,5 +944,235 @@ TEST_F(LifetimeAnalysisTest, GslPointerConversionOperator) { EXPECT_THAT(Origin("y"), HasLoansTo({"yl"}, "p1")); } +TEST_F(LifetimeAnalysisTest, LifetimeboundSimple) { + SetupTest(R"( + View Identity(View v [[clang::lifetimebound]]); + void target() { + MyObj a, b; + View v1 = a; + POINT(p1); + + View v2 = Identity(v1); + View v3 = Identity(b); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + // The origin of v2 should now contain the loan to 'o' from v1. + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"b"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMemberFunction) { + SetupTest(R"( + struct [[gsl::Pointer()]] MyView { + MyView(const MyObj& o) {} + MyView pass() [[clang::lifetimebound]] { return *this; } + }; + void target() { + MyObj o; + MyView v1 = o; + POINT(p1); + MyView v2 = v1.pass(); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o"}, "p1")); + // The call v1.pass() is bound to 'v1'. The origin of v2 should get the loans + // from v1. + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMultipleArgs) { + SetupTest(R"( + View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); + void target() { + MyObj o1, o2; + View v1 = o1; + View v2 = o2; + POINT(p1); + + View v3 = Choose(true, v1, v2); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o1"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o2"}, "p2")); + // v3 should have loans from both v1 and v2, demonstrating the union of + // loans. + EXPECT_THAT(Origin("v3"), HasLoansTo({"o1", "o2"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMixedArgs) { + SetupTest(R"( + View Choose(bool cond, View a [[clang::lifetimebound]], View b); + void target() { + MyObj o1, o2; + View v1 = o1; + View v2 = o2; + POINT(p1); + + View v3 = Choose(true, v1, v2); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o1"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o2"}, "p1")); + // v3 should only have loans from v1, as v2 is not lifetimebound. + EXPECT_THAT(Origin("v3"), HasLoansTo({"o1"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundChainOfViews) { + SetupTest(R"( + View Identity(View v [[clang::lifetimebound]]); + View DoubleIdentity(View v [[clang::lifetimebound]]); + + void target() { + MyObj obj; + View v1 = obj; + POINT(p1); + View v2 = DoubleIdentity(Identity(v1)); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"obj"}, "p1")); + // v2 should inherit the loan from v1 through the chain of calls. + EXPECT_THAT(Origin("v2"), HasLoansTo({"obj"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundRawPointerParameter) { + SetupTest(R"( + View ViewFromPtr(const MyObj* p [[clang::lifetimebound]]); + MyObj* PtrFromPtr(const MyObj* p [[clang::lifetimebound]]); + MyObj* PtrFromView(View v [[clang::lifetimebound]]); + + void target() { + MyObj a; + View v = ViewFromPtr(&a); + POINT(p1); + + MyObj b; + MyObj* ptr1 = PtrFromPtr(&b); + MyObj* ptr2 = PtrFromPtr(PtrFromPtr(PtrFromPtr(ptr1))); + POINT(p2); + + MyObj c; + View v2 = ViewFromPtr(PtrFromView(c)); + POINT(p3); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("ptr1"), HasLoansTo({"b"}, "p2")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"b"}, "p2")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"c"}, "p3")); +} + +// FIXME: This can be controversial and may be revisited in the future. +TEST_F(LifetimeAnalysisTest, LifetimeboundConstRefViewParameter) { + SetupTest(R"( + View Identity(const View& v [[clang::lifetimebound]]); + void target() { + MyObj o; + View v1 = o; + View v2 = Identity(v1); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundConstRefObjParam) { + SetupTest(R"( + View Identity(const MyObj& o [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = Identity(a); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundReturnReference) { + SetupTest(R"( + const MyObj& Identity(View v [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = a; + POINT(p1); + + View v2 = Identity(v1); + + const MyObj& b = Identity(v1); + View v3 = Identity(b); + POINT(p2); + + MyObj c; + View v4 = Identity(c); + POINT(p3); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + + // FIXME: Handle reference types. 'v3' should have loan to 'a' instead of 'b'. + EXPECT_THAT(Origin("v3"), HasLoansTo({"b"}, "p2")); + + EXPECT_THAT(Origin("v4"), HasLoansTo({"c"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundTemplateFunction) { + SetupTest(R"( + template + const T& Identity(T&& v [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = Identity(a); + POINT(p1); + + View v2 = Identity(v1); + const View& v3 = Identity(v1); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"a"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundTemplateClass) { + SetupTest(R"( + template + struct [[gsl::Pointer()]] MyTemplateView { + MyTemplateView(const T& o) {} + MyTemplateView pass() [[clang::lifetimebound]] { return *this; } + }; + void target() { + MyObj o; + MyTemplateView v1 = o; + POINT(p1); + MyTemplateView v2 = v1.pass(); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundConversionOperator) { + SetupTest(R"( + struct MyOwner { + MyObj o; + operator View() const [[clang::lifetimebound]]; + }; + + void target() { + MyOwner owner; + View v = owner; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({"owner"}, "p1")); +} } // anonymous namespace } // namespace clang::lifetimes::internal From 34a8baba65c0559cc1a05b1fe22c7ae8cd0dbf35 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 7 Oct 2025 19:46:04 +0200 Subject: [PATCH 18/35] [LifetimeSafety] Introduce a liveness-based lifetime policy (#159991) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR replaces the forward `ExpiredLoansAnalysis` with a backward `LiveOriginAnalysis` that tracks which origins are live at each program point, along with confidence levels (Definite or Maybe). The new approach: - Tracks liveness of origins rather than expiration of loans - Uses a backward dataflow analysis to determine which origins are live at each point. - Provides more precise confidence levels for use-after-free warnings and avoids previous false-positives The `LifetimeChecker` now checks for use-after-free by examining if an origin is live when a loan expires, rather than checking if a loan is expired when an origin is used. More details describing the design flaw in using `ExpiredLoans` is mentioned in https://github.com/llvm/llvm-project/issues/156959#issuecomment-3281917701 Fixes: https://github.com/llvm/llvm-project/issues/156959 (With this, we can build LLVM with no false-positives 🎉 )
Benchmark report > Generated on: 2025-09-24 13:08:03 --- **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 50 | 54.12 ms | 80.80% | 0.00% | 80.42% | 0.00% | | 75 | 150.22 ms | 91.54% | 0.00% | 91.19% | 0.00% | | 100 | 317.12 ms | 94.90% | 0.00% | 94.77% | 0.00% | | 200 | 2.40 s | 98.58% | 0.00% | 98.54% | 0.03% | | 300 | 9.85 s | 99.25% | 0.00% | 99.24% | 0.01% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.47 ± 0.06) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.47 ± 0.06) | | LiveOrigins | (Negligible) | --- **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 400 | 105.22 ms | 72.61% | 0.68% | 71.38% | 0.52% | | 1000 | 610.74 ms | 88.88% | 0.33% | 88.32% | 0.23% | | 2000 | 2.50 s | 95.32% | 0.21% | 94.99% | 0.11% | | 5000 | 17.20 s | 98.20% | 0.14% | 98.01% | 0.05% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n2.14 ± 0.00) | | FactGenerator | O(n1.59 ± 0.05) | | LoanPropagation | O(n2.14 ± 0.00) | | LiveOrigins | O(n1.19 ± 0.04) | --- **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 50 | 141.95 ms | 91.14% | 0.00% | 90.99% | 0.00% | | 100 | 1.09 s | 98.17% | 0.00% | 98.13% | 0.00% | | 150 | 3.87 s | 99.28% | 0.00% | 99.27% | 0.00% | | 200 | 9.81 s | 99.61% | 0.00% | 99.60% | 0.00% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n3.23 ± 0.02) | | FactGenerator | (Negligible) | | LoanPropagation | O(n3.23 ± 0.02) | | LiveOrigins | (Negligible) | --- **Timing Results:** | N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) | |:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:| | 500 | 155.10 ms | 72.03% | 0.47% | 67.49% | 4.06% | | 1000 | 568.40 ms | 85.60% | 0.24% | 80.53% | 4.83% | | 2000 | 2.25 s | 93.00% | 0.13% | 86.99% | 5.88% | | 4000 | 9.06 s | 96.62% | 0.10% | 89.68% | 6.84% | **Complexity Analysis:** | Analysis Phase | Complexity O(nk) | |:------------------|:--------------------------| | Total Analysis | O(n2.07 ± 0.01) | | FactGenerator | O(n1.52 ± 0.13) | | LoanPropagation | O(n2.06 ± 0.01) | | LiveOrigins | O(n2.23 ± 0.00) | ---
--- .../clang/Analysis/Analyses/LifetimeSafety.h | 24 +- clang/lib/Analysis/LifetimeSafety.cpp | 506 +++++++++++------- clang/lib/Analysis/LifetimeSafety.md | 230 ++++++++ .../test/Analysis/LifetimeSafety/benchmark.py | 82 ++- clang/test/Sema/warn-lifetime-safety.cpp | 138 +++-- .../unittests/Analysis/LifetimeSafetyTest.cpp | 501 ++++++++--------- 6 files changed, 991 insertions(+), 490 deletions(-) create mode 100644 clang/lib/Analysis/LifetimeSafety.md diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 512cb76cd6349..e54fc26df28ea 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -29,12 +29,18 @@ namespace clang::lifetimes { /// Enum to track the confidence level of a potential error. -enum class Confidence { +enum class Confidence : uint8_t { None, Maybe, // Reported as a potential error (-Wlifetime-safety-strict) Definite // Reported as a definite error (-Wlifetime-safety-permissive) }; +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + class LifetimeSafetyReporter { public: LifetimeSafetyReporter() = default; @@ -55,6 +61,7 @@ class Fact; class FactManager; class LoanPropagationAnalysis; class ExpiredLoansAnalysis; +class LiveOriginAnalysis; struct LifetimeFactory; /// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. @@ -89,6 +96,7 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { // TODO(opt): Consider using a bitset to represent the set of loans. using LoanSet = llvm::ImmutableSet; using OriginSet = llvm::ImmutableSet; +using OriginLoanMap = llvm::ImmutableMap; /// A `ProgramPoint` identifies a location in the CFG by pointing to a specific /// `Fact`. identified by a lifetime-related event (`Fact`). @@ -110,8 +118,16 @@ class LifetimeSafetyAnalysis { /// Returns the set of loans an origin holds at a specific program point. LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; - /// Returns the set of loans that have expired at a specific program point. - std::vector getExpiredLoansAtPoint(ProgramPoint PP) const; + /// Returns the set of origins that are live at a specific program point, + /// along with the confidence level of their liveness. + /// + /// An origin is considered live if there are potential future uses of that + /// origin after the given program point. The confidence level indicates + /// whether the origin is definitely live (Definite) due to being domintated + /// by a set of uses or only possibly live (Maybe) only on some but not all + /// control flow paths. + std::vector> + getLiveOriginsAtPoint(ProgramPoint PP) const; /// Finds the OriginID for a given declaration. /// Returns a null optional if not found. @@ -138,7 +154,7 @@ class LifetimeSafetyAnalysis { std::unique_ptr Factory; std::unique_ptr FactMgr; std::unique_ptr LoanPropagation; - std::unique_ptr ExpiredLoans; + std::unique_ptr LiveOrigins; }; } // namespace internal } // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 02c7373c7444b..55129a0c831e9 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -19,12 +19,13 @@ #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" #include #include +#include namespace clang::lifetimes { namespace internal { @@ -872,22 +873,19 @@ class DataflowAnalysis { InStates[Start] = D.getInitialState(); W.enqueueBlock(Start); - llvm::SmallBitVector Visited(Cfg.getNumBlockIDs() + 1); - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = getInState(B); + Lattice StateIn = *getInState(B); Lattice StateOut = transferBlock(B, StateIn); OutStates[B] = StateOut; - Visited.set(B->getBlockID()); for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { if (!AdjacentB) continue; - Lattice OldInState = getInState(AdjacentB); - Lattice NewInState = D.join(OldInState, StateOut); + std::optional OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); // Enqueue the adjacent block if its in-state has changed or if we have - // never visited it. - if (!Visited.test(AdjacentB->getBlockID()) || - NewInState != OldInState) { + // never seen it. + if (!OldInState || NewInState != *OldInState) { InStates[AdjacentB] = NewInState; W.enqueueBlock(AdjacentB); } @@ -898,7 +896,12 @@ class DataflowAnalysis { protected: Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); } + std::optional getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } @@ -977,19 +980,21 @@ static llvm::ImmutableSet join(llvm::ImmutableSet A, return A; } -/// Checks if set A is a subset of set B. -template -static bool isSubsetOf(const llvm::ImmutableSet &A, - const llvm::ImmutableSet &B) { - // Empty set is a subset of all sets. - if (A.isEmpty()) - return true; - - for (const T &Elem : A) - if (!B.contains(Elem)) - return false; - return true; -} +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; /// Computes the key-wise union of two ImmutableMaps. // TODO(opt): This key-wise join is a performance bottleneck. A more @@ -997,23 +1002,29 @@ static bool isSubsetOf(const llvm::ImmutableSet &A, // instead of the current AVL-tree-based ImmutableMap. template static llvm::ImmutableMap -join(llvm::ImmutableMap A, llvm::ImmutableMap B, - typename llvm::ImmutableMap::Factory &F, Joiner JoinValues) { +join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, + JoinKind Kind) { if (A.getHeight() < B.getHeight()) - std::swap(A, B); + return join(B, A, F, JoinValues, Kind); // For each element in B, join it with the corresponding element in A // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap Res = A; for (const auto &Entry : B) { const K &Key = Entry.first; const V &ValB = Entry.second; - const V *ValA = A.lookup(Key); - if (!ValA) - A = F.add(A, Key, ValB); - else if (*ValA != ValB) - A = F.add(A, Key, JoinValues(*ValA, ValB)); + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } } - return A; + return Res; } } // namespace utils @@ -1021,19 +1032,6 @@ join(llvm::ImmutableMap A, llvm::ImmutableMap B, // Loan Propagation Analysis // ========================================================================= // -using OriginLoanMap = llvm::ImmutableMap; -using ExpiredLoanMap = llvm::ImmutableMap; - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - llvm::BumpPtrAllocator Allocator; - OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; - ExpiredLoanMap::Factory ExpiredLoanMapFactory{Allocator, - /*canonicalize=*/false}; -}; - /// Represents the dataflow lattice for loan propagation. /// /// This lattice tracks which loans each origin may hold at a given program @@ -1077,10 +1075,10 @@ class LoanPropagationAnalysis public: LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LifetimeFactory &LFactory) - : DataflowAnalysis(C, AC, F), - OriginLoanMapFactory(LFactory.OriginMapFactory), - LoanSetFactory(LFactory.LoanSetFactory) {} + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} using Base::transfer; @@ -1091,11 +1089,19 @@ class LoanPropagationAnalysis /// Merges two lattices by taking the union of loans for each origin. // TODO(opt): Keep the state small by removing origins which become dead. Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = - utils::join(A.Origins, B.Origins, OriginLoanMapFactory, - [&](LoanSet S1, LoanSet S2) { - return utils::join(S1, S2, LoanSetFactory); - }); + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); return Lattice(JoinedOrigins); } @@ -1124,12 +1130,12 @@ class LoanPropagationAnalysis OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); } - LoanSet getLoans(OriginID OID, ProgramPoint P) { + LoanSet getLoans(OriginID OID, ProgramPoint P) const { return getLoans(getState(P), OID); } private: - LoanSet getLoans(Lattice L, OriginID OID) { + LoanSet getLoans(Lattice L, OriginID OID) const { if (auto *Loans = L.Origins.lookup(OID)) return *Loans; return LoanSetFactory.getEmptySet(); @@ -1137,96 +1143,195 @@ class LoanPropagationAnalysis }; // ========================================================================= // -// Expired Loans Analysis +// Live Origins Analysis +// ========================================================================= // +// +// A backward dataflow analysis that determines which origins are "live" at each +// program point. An origin is "live" at a program point if there's a potential +// future use of the pointer it represents. Liveness is "generated" by a read of +// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being +// live) when its loan set is overwritten (e.g. a OriginFlow killing the +// destination origin). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. // ========================================================================= // -/// The dataflow lattice for tracking the set of expired loans. -struct ExpiredLattice { - /// Map from an expired `LoanID` to the `ExpireFact` that made it expire. - ExpiredLoanMap Expired; +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} - ExpiredLattice() : Expired(nullptr) {}; - explicit ExpiredLattice(ExpiredLoanMap M) : Expired(M) {} + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - bool operator==(const ExpiredLattice &Other) const { - return Expired == Other.Expired; + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); } - bool operator!=(const ExpiredLattice &Other) const { +}; + +using LivenessMap = llvm::ImmutableMap; + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct LivenessLattice { + LivenessMap LiveOrigins; + + LivenessLattice() : LiveOrigins(nullptr) {}; + + explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const LivenessLattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const LivenessLattice &Other) const { return !(*this == Other); } - void dump(llvm::raw_ostream &OS) const { - OS << "ExpiredLattice State:\n"; - if (Expired.isEmpty()) + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) OS << " \n"; - for (const auto &[ID, _] : Expired) - OS << " Loan " << ID << " is expired\n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } } }; -/// The analysis that tracks which loans have expired. -class ExpiredLoansAnalysis - : public DataflowAnalysis { - - ExpiredLoanMap::Factory &Factory; +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class LiveOriginAnalysis + : public DataflowAnalysis { + FactManager &FactMgr; + LivenessMap::Factory &Factory; public: - ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LifetimeFactory &Factory) - : DataflowAnalysis(C, AC, F), Factory(Factory.ExpiredLoanMapFactory) {} - - using Base::transfer; + LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis::transfer; - StringRef getAnalysisName() const { return "ExpiredLoans"; } + StringRef getAnalysisName() const { return "LiveOrigins"; } Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - /// Merges two lattices by taking the union of the two expired loans. - Lattice join(Lattice L1, Lattice L2) { - return Lattice( - utils::join(L1.Expired, L2.Expired, Factory, - // Take the last expiry fact to make this hermetic. - [](const ExpireFact *F1, const ExpireFact *F2) { - return F1->getExpiryLoc() > F2->getExpiryLoc() ? F1 : F2; - })); - } - - Lattice transfer(Lattice In, const ExpireFact &F) { - return Lattice(Factory.add(In.Expired, F.getLoanID(), &F)); - } - - // Removes the loan from the set of expired loans. - // - // When a loan is re-issued (e.g., in a loop), it is no longer considered - // expired. A loan can be in the expired set at the point of issue due to - // the dataflow state from a previous loop iteration being propagated along - // a backedge in the CFG. - // - // Note: This has a subtle false-negative though where a loan from previous - // iteration is not overwritten by a reissue. This needs careful tracking - // of loans "across iterations" which can be considered for future - // enhancements. - // - // void foo(int safe) { - // int* p = &safe; - // int* q = &safe; - // while (condition()) { - // int x = 1; - // p = &x; // A loan to 'x' is issued to 'p' in every iteration. - // if (condition()) { - // q = p; - // } - // (void)*p; // OK — 'p' points to 'x' from new iteration. - // (void)*q; // UaF - 'q' still points to 'x' from previous iteration - // // which is now destroyed. - // } - // } - Lattice transfer(Lattice In, const IssueFact &F) { - return Lattice(Factory.remove(In.Expired, F.getLoanID())); + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); } - ExpiredLoanMap getExpiredLoans(ProgramPoint P) { return getState(P).Expired; } + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOrigins(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : LSA.getTestPoints()) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } }; // ========================================================================= // @@ -1244,84 +1349,75 @@ class LifetimeChecker { private: llvm::DenseMap FinalWarningsMap; LoanPropagationAnalysis &LoanPropagation; - ExpiredLoansAnalysis &ExpiredLoans; + LiveOriginAnalysis &LiveOrigins; FactManager &FactMgr; AnalysisDeclContext &ADC; LifetimeSafetyReporter *Reporter; public: - LifetimeChecker(LoanPropagationAnalysis &LPA, ExpiredLoansAnalysis &ELA, + LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, FactManager &FM, AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) - : LoanPropagation(LPA), ExpiredLoans(ELA), FactMgr(FM), ADC(ADC), + : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), Reporter(Reporter) {} void run() { llvm::TimeTraceScope TimeProfile("LifetimeChecker"); for (const CFGBlock *B : *ADC.getAnalysis()) for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *UF = F->getAs()) - checkUse(UF); + if (const auto *EF = F->getAs()) + checkExpiry(EF); issuePendingWarnings(); } - /// Checks for use-after-free errors for a given use of an Origin. + /// Checks for use-after-free errors when a loan expires. /// - /// This method is called for each 'UseFact' identified in the control flow - /// graph. It determines if the loans held by the used origin have expired - /// at the point of use. - void checkUse(const UseFact *UF) { - if (UF->isWritten()) - return; - OriginID O = UF->getUsedOrigin(FactMgr.getOriginMgr()); - - // Get the set of loans that the origin might hold at this program point. - LoanSet HeldLoans = LoanPropagation.getLoans(O, UF); - - // Get the set of all loans that have expired at this program point. - ExpiredLoanMap AllExpiredLoans = ExpiredLoans.getExpiredLoans(UF); - - // If the pointer holds no loans or no loans have expired, there's nothing - // to check. - if (HeldLoans.isEmpty() || AllExpiredLoans.isEmpty()) - return; - - // Identify loans that which have expired but are held by the pointer. Using - // them is a use-after-free. - llvm::SmallVector DefaultedLoans; - // A definite UaF error occurs if all loans the origin might hold have - // expired. - bool IsDefiniteError = true; - for (LoanID L : HeldLoans) { - if (AllExpiredLoans.contains(L)) - DefaultedLoans.push_back(L); - else - // If at least one loan is not expired, this use is not a definite UaF. - IsDefiniteError = false; + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } } - // If there are no defaulted loans, the use is safe. - if (DefaultedLoans.empty()) + if (!BadUse) return; - - // Determine the confidence level of the error (definite or maybe). - Confidence CurrentConfidence = - IsDefiniteError ? Confidence::Definite : Confidence::Maybe; - - // For each expired loan, create a pending warning. - for (LoanID DefaultedLoan : DefaultedLoans) { - // If we already have a warning for this loan with a higher or equal - // confidence, skip this one. - if (FinalWarningsMap.count(DefaultedLoan) && - CurrentConfidence <= FinalWarningsMap[DefaultedLoan].ConfidenceLevel) - continue; - - auto *EF = AllExpiredLoans.lookup(DefaultedLoan); - assert(EF && "Could not find ExpireFact for an expired loan."); - - FinalWarningsMap[DefaultedLoan] = {/*ExpiryLoc=*/(*EF)->getExpiryLoc(), - /*UseExpr=*/UF->getUseExpr(), - /*ConfidenceLevel=*/CurrentConfidence}; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; } + llvm_unreachable("unknown liveness kind"); } void issuePendingWarnings() { @@ -1340,6 +1436,15 @@ class LifetimeChecker { // LifetimeSafetyAnalysis Class Implementation // ========================================================================= // +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + llvm::BumpPtrAllocator Allocator; + OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; +}; + // We need this here for unique_ptr with forward declared class. LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; @@ -1370,15 +1475,16 @@ void LifetimeSafetyAnalysis::run() { /// the analysis. /// 3. Collapse ExpireFacts belonging to same source location into a single /// Fact. - LoanPropagation = - std::make_unique(Cfg, AC, *FactMgr, *Factory); + LoanPropagation = std::make_unique( + Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); LoanPropagation->run(); - ExpiredLoans = - std::make_unique(Cfg, AC, *FactMgr, *Factory); - ExpiredLoans->run(); + LiveOrigins = std::make_unique( + Cfg, AC, *FactMgr, Factory->LivenessMapFactory); + LiveOrigins->run(); + DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); - LifetimeChecker Checker(*LoanPropagation, *ExpiredLoans, *FactMgr, AC, + LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, Reporter); Checker.run(); } @@ -1389,15 +1495,6 @@ LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, return LoanPropagation->getLoans(OID, PP); } -std::vector -LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const { - assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run."); - std::vector Result; - for (const auto &pair : ExpiredLoans->getExpiredLoans(PP)) - Result.push_back(pair.first); - return Result; -} - std::optional LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { assert(FactMgr && "FactManager not initialized"); @@ -1417,6 +1514,15 @@ LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { return Result; } +std::vector> +LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { + assert(LiveOrigins && "LiveOriginAnalysis has not been run."); + std::vector> Result; + for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) + Result.push_back({OID, Info.Kind}); + return Result; +} + llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { assert(FactMgr && "FactManager not initialized"); llvm::StringMap AnnotationToPointMap; diff --git a/clang/lib/Analysis/LifetimeSafety.md b/clang/lib/Analysis/LifetimeSafety.md new file mode 100644 index 0000000000000..3f3d03d729b46 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.md @@ -0,0 +1,230 @@ +Excellent! This is a very strong and logical structure for the white paper. It follows a clear narrative, starting from the high-level problem and progressively diving into the specifics of your solution. The sections on why a traditional borrow checker doesn't fit C++ and the open questions are particularly good, as they show a deep engagement with the problem space. + +Here is a draft of the white paper following your new skeleton, with the details filled in based on my analysis of your implementation and the provided reference documents. I've also incorporated some of my own suggestions to enhance the flow and clarity. + +*** + + + +# Lifetime Safety: An Intuitive Approach for Temporal Safety in C++ +**Author:** +[Utkarsh Saxena](mailto:usx@google.com) + +**Purpose:** This document serves as a live RFC for a new lifetime safety analysis in C++, with the ultimate goal of publication as a white paper. + +## Intended Audience + +This document is intended for C++ compiler developers (especially those working on Clang), developers of other systems languages with advanced memory safety models (like Rust and Carbon), and all C++ users interested in writing safer code. + +## Goal + +* To describe a new lifetime model for C++ that aims to maximize the compile-time detection of temporal memory safety issues. +* To explore a path toward incremental safety in C++, offering a spectrum of checks that can be adopted without requiring a full plunge into a restrictive ownership model. + +**Out of Scope** + +* **Rigorous Temporal Memory Safety:** This analysis aims to detect a large class of common errors, but it does not formally prove the absence of all temporal safety bugs. +* **Runtime Solutions:** This paper focuses exclusively on static, compile-time analysis and does not cover runtime solutions like MTE or AddressSanitizer. + +# Paper: C++ Lifetimes Safety Analysis + +**Subtitle: A Flow-Sensitive, Alias-based Approach to Preventing Dangling Pointers** + +## Abstract + +This paper introduces a new intra-procedural, flow-sensitive lifetime analysis for C++ implemented in Clang. The analysis is designed to detect a significant class of temporal memory safety violations, such as use-after-free and use-after-return, at compile time. It is based on a model of "Loans" and "Origins," inspired by the Polonius borrow checker in Rust, but adapted for the semantics and flexibility of C++. + +The analysis works by translating the Clang CFG into a series of lifetime-relevant "Facts." These facts are then processed by dataflow analyses to precisely determine the validity of pointers and references at each program point. This fact-based approach, combined with a configurable strictness model, allows for both high-confidence error reporting and the detection of more subtle, potential bugs, without requiring extensive new annotations. The ultimate goal is to provide a powerful, low-overhead tool that makes C++ safer by default. + +## The Anatomy of a Temporal Safety Error + +At its core, a temporal safety error is a bug where an operation is performed on an object at a time when it is no longer valid to do so ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=at+its+core+a+temporal+safety+error+is+a+bug+where+an+operation+is+performed+on+an+object+at+a+time+when+it+is+no+longer+valid+to+do+so)). These bugs are notoriously difficult to debug because they often manifest as unpredictable crashes or silent data corruption far from the root cause. However, we can argue that this wide and varied class of errors—from use-after-free to iterator invalidation—all stem from a single, canonical pattern. + +**Conjecture: Any temporal safety issue is a form of Use-After-Free.** + +All sub-categories of temporal safety issues, such as returning a reference to a stack variable (`return-stack-addr`), using a variable after its scope has ended (`use-after-scope`), using heap memory after it has been deleted (`heap-use-after-free`), or using an iterator after its container has been modified (`use-after-invalidation`), can be described by a single sequence of events. + +In C++, an *object* is a region of storage, and pointers and references are the mechanisms we use to refer to them. A use-after-free occurs when we access an object after its lifetime has ended. But how can an object be accessed after it has been destroyed? This is only possible through an **alias**—a pointer or reference—that was created while the object was alive and that survived the object's destruction. + +This insight allows us to define a canonical use-after-free with four distinct events that happen in a specific order: + +1. **`t0`: Creation.** An object `M` is created in some region of storage (on the stack, on the heap, etc.). +2. **`t1`: Alias Creation.** An alias `P` (a pointer or reference) is created that refers to the object `M`. +3. **`t2`: End of Lifetime.** The lifetime of object `M` ends (e.g., it is deallocated, or it goes out of scope). +4. **`t3`: Use of Alias.** The alias `P`, which now dangles, is used to access the memory where `M` once resided. + +Let's examine this with a simple piece of C++ code: + +```cpp +void use_after_scope_example() { + int* p; + { + int s = 10; // t0: Object `s` is created on the stack. + p = &s; // t1: Alias `p` is made to refer to object `s`. + } // t2: The lifetime of `s` ends. `p` now dangles. + *p = 42; // t3: The dangling alias `p` is used. This is a use-after-free. +} +``` + +The fundamental problem is that the alias `p` outlived the object `s` it referred to. The challenge for a static analysis is therefore clear: to prevent temporal safety errors, the compiler must be able to track aliases and understand the lifetime of the objects they refer to. It needs to know the "points-to" set for every alias at every point in the program and verify that, at the moment of use, the alias does not point to an object whose lifetime has ended. + +This alias-based perspective is powerful because it generalizes beautifully. The "end of lifetime" event at `t2` doesn't have to be a variable going out of scope. It could be: + +* A call to `delete`, which ends the lifetime of a heap object. +* A function `return`, which ends the lifetime of all its local variables. +* A container modification, like `std::vector::push_back()`, which may reallocate storage, ending the lifetime of the objects in the old buffer and invalidating all existing iterators (aliases). + +By focusing on tracking aliases and their validity, we can build a unified model to detect a wide range of temporal safety errors without imposing the heavy "aliasing XOR mutability" restrictions of a traditional borrow checker ([source](https://gist.github.com/nmsmith/cdaa94aa74e8e0611221e65db8e41f7b?content_ref=the+major+advancement+is+to+eliminate+the+aliasing+xor+mutability+restriction+amongst+references+and+replace+it+with+a+similar+restriction+applied+to+lifetime+parameters)). This provides a more intuitive and C++-idiomatic path to memory safety. + +## Relation with Thread safety + +This analysis does not address Thread Safety. Thread safety is concerned with data races that occur across multiple threads. While it is possible to create temporal safety issues in multi-threaded scenarios, this analysis is focused on the sequential lifetime of objects within a single function. + +## Quest for Safer Aliasing + +Is it possible to achieve memory safety without a restrictive model like Rust's borrow checker? We believe the answer is yes. The key is to shift our focus from *restricting aliases* to *understanding them*. Instead of forbidding programs that have aliased mutable pointers, we can build a model that understands what each pointer can point to at any given time. This approach, similar to the one proposed in P1179 for C++ and explored in modern lifetime systems like Mojo's, allows us to directly detect the root cause of the problem: using a pointer after its target has ceased to exist ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=this+approach+similar+to+the+one+proposed+in+p1179+for+c+and+explored+in+modern+lifetime+systems+like+mojo+s+allows+us+to+directly+detect+the+root+cause+of+the+problem+using+a+pointer+after+its+target+has+ceased+to+exist)). + +This paper proposes such a model for C++. Let's begin with a simple, yet illustrative, dangling pointer bug: + +```cpp +// Example 1: A simple use-after-free +void definite_simple_case() { + MyObj* p; + { + MyObj s; + p = &s; // 'p' now points to 's' + } // 's' is destroyed, 'p' is now dangling + (void)*p; // Use-after-free +} +``` + +How can a compiler understand that the use of `p` is an error? It needs to answer a series of questions: + +1. What does `p` point to? +2. When does the object `p` points to cease to be valid? +3. Is `p` used after that point? + +Our model is designed to answer precisely these questions. + +## Core Concepts + +Our model is built on a few core concepts that allow us to formally track the relationships between pointers and the data they point to. + +### Access Paths + +An **Access Path** is a symbolic representation of a storage location in the program ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=struct+accesspath+const+clang+valuedecl+d+accesspath+const+clang+valuedecl+d+d+d)). It provides a way to uniquely identify a variable or a sub-object. For now, we will consider simple paths that refer to top-level variables, but the model can be extended to include field accesses (`a.b`), array elements (`a[i]`), and pointer indirections (`p->field`). + +### Loans: The Act of Borrowing + +A **Loan** is created whenever a reference or pointer to an object is created. It represents the act of "borrowing" that object's storage location ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=information+about+a+single+borrow+or+loan+a+loan+is+created+when+a+reference+or+pointer+is+created)). Each loan is associated with a unique ID and the `AccessPath` of the object being borrowed. + +In our `definite_simple_case` example, the expression `&s` creates a loan. The `AccessPath` for this loan is the variable `s`. + +### Origins: The Provenance of a Pointer + +An **Origin** is a symbolic identifier that represents the *set of possible loans* a pointer-like object could hold at any given time ([source](http://docs.google.com/document/d/1JpJ3M9yeXX-BnC4oKXBvRWzxoFrwziN1RzI4DrMrSp8?content_ref=ime+is+a+symbolic+identifier+representing+a+set+of+loans+from+which+a+pointer+or+reference+could+have+originated)). Every pointer-like variable or expression in the program is associated with an origin. + +* A variable declaration like `MyObj* p` introduces an origin for `p`. +* An expression like `&s` also has an origin. +* The complexity of origins can grow with type complexity. For example: + * `int* p;` has a single origin. + * `int** p;` has two origins: one for the outer pointer and one for the inner pointer. This allows us to distinguish between `p` itself being modified and what `*p` points to being modified. + * `struct S { int* p; };` also has an origin associated with the member `p`. + +The central goal of our analysis is to determine, for each origin at each point in the program, which loans it might contain. + +## Subtyping Rules and Subset Constraints + +The relationships between origins are established through the program's semantics, particularly assignments. When a pointer is assigned to another, as in `p = q`, the set of loans that `q` holds must be a subset of the loans that `p` can now hold. This is a fundamental subtyping rule: for `T*'a` to be a subtype of `T*'b`, the set of loans represented by `'a` must be a subset of the loans represented by `'b`. + +This leads to the concept of **subset constraints**. An assignment `p = q` generates a constraint `Origin(q) ⊆ Origin(p)`. The analysis doesn't solve these as a global system of equations. Instead, as we will see, it propagates the *consequences* of these constraints—the loans themselves—through the control-flow graph. This is a key departure from the Polonius model, which focuses on propagating the constraints (`'a: 'b`) themselves. + +## Invalidations: When Loans Expire + +A loan expires when the object it refers to is no longer valid. In our model, this is an **invalidation** event. The most common invalidation is deallocation, which in C++ can mean: +* A stack variable going out of scope. +* A `delete` call on a heap-allocated object. +* A container modification that reallocates its internal storage. + +## An Event-Based Representation of the Function + +To analyze a function, we first transform its CFG into a sequence of atomic, lifetime-relevant **Events**, which we call **Facts**. These facts abstract away the complexities of C++ syntax and provide a clean input for our analysis. The main facts are: + +* `Issue(LoanID, OriginID)`: A new loan is created. For example, `&s` generates an `Issue` fact. +* `Expire(LoanID)`: A loan expires. This is generated at the end of a variable's scope. +* `OriginFlow(Dest, Src, Kill)`: Loans from a source origin flow to a destination origin, as in an assignment. `Kill` indicates whether the destination's old loans are cleared. +* `Use(OriginID)`: An origin is used, such as in a pointer dereference. + +Let's trace our `definite_simple_case` example with these facts: + +```cpp +void definite_simple_case() { + MyObj* p; // Origin for p is O_p + { + MyObj s; + // The expression `&s` generates: + // - IssueFact(L1, O_&s) (A new loan L1 on 's' is created) + // The assignment `p = &s` generates: + // - OriginFlowFact(O_p, O_&s, Kill=true) + p = &s; + } // The end of the scope for 's' generates: + // - ExpireFact(L1) + // The dereference `*p` generates: + // - UseFact(O_p) + (void)*p; +} +``` + +## Flow-Sensitive Lifetime Policy + +With the program represented as a stream of facts, we can now define a flow-sensitive policy to answer our three core questions. We do this by maintaining a map from `Origin` to `Set` at each program point. This map represents the state of our analysis. + +The analysis proceeds as follows: +1. **Forward Propagation of Loans:** We perform a forward dataflow analysis. + * When we encounter an `Issue` fact, we add the new loan to its origin's loan set. + * When we see an `OriginFlow` fact, we update the destination origin's loan set with the loans from the source. + * At control-flow merge points, we take the *union* of the loan sets from all incoming branches. + +2. **Backward Propagation of Liveness:** We then perform a backward dataflow analysis, starting from `Use` facts. + * A `Use` of an origin marks it as "live." + * This liveness information is propagated backward. If an origin `O_p` is live, and it received its loans from `O_q`, then `O_q` is also considered live at that point. + +3. **Error Detection:** An error is flagged when the analysis determines that a **live** origin contains a loan that has **expired**. + +In our `definite_simple_case` example: +* The forward analysis determines that at the point of use, `Origin(p)` contains `Loan(s)`. +* The backward analysis determines that at the point where `s` is destroyed, `Origin(p)` is live. +* The `ExpireFact` for `Loan(s)` occurs before the `UseFact`. +* The combination of these three conditions triggers a use-after-free error. + +## Without Functions, Our Work is Done Here! + +The model described so far works perfectly for a single, monolithic function. However, the moment we introduce function calls, the problem becomes more complex. How do we reason about lifetimes across function boundaries, especially when we can't see the implementation of the called function? + +### Effects of a Function Call + +A function call has inputs and outputs. From a lifetime perspective, the key challenge is to understand how the lifetimes of the outputs relate to the lifetimes of the inputs. + +### Outlives Constraints and Placeholder Origins + +When analyzing a function like `const char* get_prefix(const string& s, int len)`, we don't know the specific lifetime of the `s` that will be passed by the caller. To handle this, we introduce **placeholder origins** for the input parameters. These placeholders act as variables in our analysis. + +If a function returns a pointer or reference, its lifetime must be tied to one of its inputs. This is an **outlives constraint**. For example, the return value of `get_prefix` must "outlive" the input `s`. In our model, this means the origin of the return value will contain the placeholder loan associated with `s`. + +### Opaque Functions + +What if a function's implementation is not visible (e.g., it's in a separate translation unit), and it has no lifetime annotations? In this case, we must be conservative. If we pass a pointer to an opaque function, we have to assume it might have been invalidated. Our model handles this by associating a special **OPAQUE loan** with the pointer after the call, signifying that its lifetime is now unknown. + +## Why a Borrow Checker is Not the Right Fit for C++ + +The "aliasing XOR mutability" rule, while powerful, is fundamentally at odds with many idiomatic C++ patterns. +* **Observer Patterns:** It's common to have multiple non-owning pointers observing a mutable object. +* **Intrusive Data Structures:** Data structures like intrusive linked lists require objects to hold pointers to one another, creating cycles that are difficult for a traditional borrow checker to handle. +* **Iterator Invalidation:** The core problem in C++ is often not aliasing itself, but the fact that a mutation can invalidate an alias (e.g., resizing a vector). An alias-based analysis, like the one proposed here, directly models this problem, whereas a borrow checker can feel like an indirect and overly restrictive solution. + +By focusing on tracking what pointers can point to, our model avoids rejecting these safe and useful patterns, making it a more natural fit for the existing C++ ecosystem. + +## Open Questions + +* **When and if to introduce the term "lifetime"?** The term "lifetime" is heavily associated with Rust's model. This paper has intentionally focused on "Origins" and "Loans" to avoid confusion. Is there a point where introducing "lifetime" would be helpful, or should we stick to the new terminology? +* **Syntax for Annotations:** While this model is designed to work with minimal annotations, some will be necessary for complex cases. What should the syntax for these annotations look like? Can we build on existing attributes like `[[clang::lifetimebound]]`? diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py index d2e5f0b2122a3..cd5b30818a4a8 100644 --- a/clang/test/Analysis/LifetimeSafety/benchmark.py +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -145,6 +145,60 @@ def generate_cpp_nested_loop_test(n: int) -> str: return cpp_code +def generate_cpp_switch_fan_out_test(n: int) -> str: + """ + Generates C++ code with a switch statement with N branches. + Each branch 'i' 'uses' (reads) a single, unique pointer 'pi'. + This pattern creates a "fan-in" join point for the backward + liveness analysis, stressing the LivenessMap::join operation + by forcing it to merge N disjoint, single-element sets of live origins. + The resulting complexity for LiveOrigins should be O(n log n) or higher. + + Example (n=3): + struct MyObj { int id; ~MyObj() {} }; + + void switch_fan_out_3(int condition) { + MyObj v1{1}; MyObj v2{1}; MyObj v3{1}; + MyObj* p1 = &v1; MyObj* p2 = &v2; MyObj* p3 = &v3; + + switch (condition % 3) { + case 0: + p1->id = 1; + break; + case 1: + p2->id = 1; + break; + case 2: + p3->id = 1; + break; + } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void switch_fan_out{n}(int condition) {{\n" + # Generate N distinct objects + for i in range(1, n + 1): + cpp_code += f" MyObj v{i}{{1}};\n" + cpp_code += "\n" + # Generate N distinct pointers, each as a separate variable + for i in range(1, n + 1): + cpp_code += f" MyObj* p{i} = &v{i};\n" + cpp_code += "\n" + + cpp_code += f" switch (condition % {n}) {{\n" + for case_num in range(n): + cpp_code += f" case {case_num}:\n" + cpp_code += f" p{case_num + 1}->id = 1;\n" + cpp_code += " break;\n" + + cpp_code += " }\n}\n" + cpp_code += f"\nint main() {{ switch_fan_out{n}(0); return 0; }}\n" + return cpp_code + + def analyze_trace_file(trace_path: str) -> dict: """ Parses the -ftime-trace JSON output to find durations for the lifetime @@ -156,14 +210,14 @@ def analyze_trace_file(trace_path: str) -> dict: "total_us": 0.0, "fact_gen_us": 0.0, "loan_prop_us": 0.0, - "expired_loans_us": 0.0, + "live_origins_us": 0.0, } event_name_map = { "LifetimeSafetyAnalysis": "lifetime_us", "ExecuteCompiler": "total_us", "FactGenerator": "fact_gen_us", "LoanPropagation": "loan_prop_us", - "ExpiredLoans": "expired_loans_us", + "LiveOrigins": "live_origins_us", } try: with open(trace_path, "r") as f: @@ -227,7 +281,7 @@ def generate_markdown_report(results: dict) -> str: # Table header report.append( - "| N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Expired Loans (%) |" + "| N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) |" ) report.append( "|:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:|" @@ -247,7 +301,7 @@ def generate_markdown_report(results: dict) -> str: f"{data['lifetime_ms'][i] / total_t * 100:>17.2f}% |", f"{data['fact_gen_ms'][i] / total_t * 100:>18.2f}% |", f"{data['loan_prop_ms'][i] / total_t * 100:>20.2f}% |", - f"{data['expired_loans_ms'][i] / total_t * 100:>17.2f}% |", + f"{data['live_origins_ms'][i] / total_t * 100:>17.2f}% |", ] report.append(" ".join(row)) @@ -259,7 +313,7 @@ def generate_markdown_report(results: dict) -> str: "Total Analysis": data["lifetime_ms"], "FactGenerator": data["fact_gen_ms"], "LoanPropagation": data["loan_prop_ms"], - "ExpiredLoans": data["expired_loans_ms"], + "LiveOrigins": data["live_origins_ms"], } for phase_name, y_data in analysis_phases.items(): @@ -302,7 +356,13 @@ def run_single_test( source_file, ] - result = subprocess.run(clang_command, capture_output=True, text=True, timeout=60) + try: + result = subprocess.run( + clang_command, capture_output=True, text=True, timeout=60 + ) + except subprocess.TimeoutExpired: + print(f"Compilation timed out for N={n}!", file=sys.stderr) + return {} if result.returncode != 0: print(f"Compilation failed for N={n}!", file=sys.stderr) @@ -354,6 +414,12 @@ def run_single_test( "generator_func": generate_cpp_nested_loop_test, "n_values": [50, 100, 150, 200], }, + { + "name": "switch_fan_out", + "title": "Switch Fan-out", + "generator_func": generate_cpp_switch_fan_out_test, + "n_values": [500, 1000, 2000, 4000], + }, ] results = {} @@ -368,7 +434,7 @@ def run_single_test( "total_ms": [], "fact_gen_ms": [], "loan_prop_ms": [], - "expired_loans_ms": [], + "live_origins_ms": [], } for n in config["n_values"]: durations_ms = run_single_test( @@ -387,7 +453,7 @@ def run_single_test( f" Total Analysis: {human_readable_time(durations_ms['lifetime_ms'])} | " f"FactGen: {human_readable_time(durations_ms['fact_gen_ms'])} | " f"LoanProp: {human_readable_time(durations_ms['loan_prop_ms'])} | " - f"ExpiredLoans: {human_readable_time(durations_ms['expired_loans_ms'])}" + f"LiveOrigins: {human_readable_time(durations_ms['live_origins_ms'])}" ) print("\n\n" + "=" * 80) diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index f6bec6e66c365..4f234f0ac6e2d 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -126,11 +126,15 @@ void definite_single_pointer_multiple_loans_gsl(bool cond) { v.use(); // expected-note 2 {{later used here}} } - -//===----------------------------------------------------------------------===// -// Potential (Maybe) Use-After-Free (-W...strict) -// These are cases where the pointer *may* become dangling, depending on the path taken. -//===----------------------------------------------------------------------===// +void definite_if_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} void potential_if_branch(bool cond) { MyObj safe; @@ -139,15 +143,18 @@ void potential_if_branch(bool cond) { MyObj temp; p = &temp; // expected-warning {{object whose reference is captured may not live long enough}} } // expected-note {{destroyed here}} - (void)*p; // expected-note {{later used here}} + if (!cond) + (void)*p; // expected-note {{later used here}} + else + p = &safe; } -void potential_if_branch_gsl(bool cond) { +void definite_if_branch_gsl(bool cond) { MyObj safe; View v = safe; if (cond) { MyObj temp; - v = temp; // expected-warning {{object whose reference is captured may not live long enough}} + v = temp; // expected-warning {{object whose reference is captured does not live long enough}} } // expected-note {{destroyed here}} v.use(); // expected-note {{later used here}} } @@ -159,13 +166,14 @@ void definite_potential_together(bool cond) { { MyObj s; - p_definite = &s; // expected-warning {{does not live long enough}} - if (cond) { - p_maybe = &s; // expected-warning {{may not live long enough}} - } - } // expected-note 2 {{destroyed here}} - (void)*p_definite; // expected-note {{later used here}} - (void)*p_maybe; // expected-note {{later used here}} + if (cond) + p_definite = &s; // expected-warning {{does not live long enough}} + if (cond) + p_maybe = &s; // expected-warning {{may not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p_definite; // expected-note {{later used here}} + if (!cond) + (void)*p_maybe; // expected-note {{later used here}} } void definite_overrides_potential(bool cond) { @@ -189,10 +197,19 @@ void definite_overrides_potential(bool cond) { (void)*q; } - -//===----------------------------------------------------------------------===// -// Control Flow Tests -//===----------------------------------------------------------------------===// +void potential_due_to_conditional_killing(bool cond) { + MyObj safe; + MyObj* q; + { + MyObj s; + q = &s; // expected-warning {{may not live long enough}} + } // expected-note {{destroyed here}} + if (cond) { + // 'q' is conditionally "rescued". 'p' is not. + q = &safe; + } + (void)*q; // expected-note {{later used here}} +} void potential_for_loop_use_after_loop_body(MyObj safe) { MyObj* p = &safe; @@ -215,34 +232,35 @@ void potential_for_loop_gsl() { void potential_for_loop_use_before_loop_body(MyObj safe) { MyObj* p = &safe; + // Prefer the earlier use for diagnsotics. for (int i = 0; i < 1; ++i) { (void)*p; // expected-note {{later used here}} MyObj s; - p = &s; // expected-warning {{may not live long enough}} + p = &s; // expected-warning {{does not live long enough}} } // expected-note {{destroyed here}} (void)*p; } -void potential_loop_with_break(bool cond) { +void definite_loop_with_break(bool cond) { MyObj safe; MyObj* p = &safe; for (int i = 0; i < 10; ++i) { if (cond) { MyObj temp; - p = &temp; // expected-warning {{may not live long enough}} + p = &temp; // expected-warning {{does not live long enough}} break; // expected-note {{destroyed here}} } } (void)*p; // expected-note {{later used here}} } -void potential_loop_with_break_gsl(bool cond) { +void definite_loop_with_break_gsl(bool cond) { MyObj safe; View v = safe; for (int i = 0; i < 10; ++i) { if (cond) { MyObj temp; - v = temp; // expected-warning {{object whose reference is captured may not live long enough}} + v = temp; // expected-warning {{object whose reference is captured does not live long enough}} break; // expected-note {{destroyed here}} } } @@ -250,37 +268,52 @@ void potential_loop_with_break_gsl(bool cond) { } void potential_multiple_expiry_of_same_loan(bool cond) { - // Choose the last expiry location for the loan. + // Choose the last expiry location for the loan (e.g., through scope-ends and break statements). MyObj safe; MyObj* p = &safe; for (int i = 0; i < 10; ++i) { MyObj unsafe; if (cond) { - p = &unsafe; // expected-warning {{may not live long enough}} - break; + p = &unsafe; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} } - } // expected-note {{destroyed here}} + } (void)*p; // expected-note {{later used here}} p = &safe; for (int i = 0; i < 10; ++i) { MyObj unsafe; if (cond) { - p = &unsafe; // expected-warning {{may not live long enough}} + p = &unsafe; // expected-warning {{does not live long enough}} if (cond) - break; + break; // expected-note {{destroyed here}} } - } // expected-note {{destroyed here}} + } (void)*p; // expected-note {{later used here}} p = &safe; for (int i = 0; i < 10; ++i) { if (cond) { MyObj unsafe2; - p = &unsafe2; // expected-warning {{may not live long enough}} + p = &unsafe2; // expected-warning {{does not live long enough}} break; // expected-note {{destroyed here}} } } + + // TODO: This can be argued to be a "maybe" warning. This is because + // we only check for confidence of liveness and not the confidence of + // the loan contained in an origin. To deal with this, we can introduce + // a confidence in loan propagation analysis as well like liveness. + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) + p = &unsafe; // expected-warning {{does not live long enough}} + if (cond) + break; // expected-note {{destroyed here}} + } (void)*p; // expected-note {{later used here}} } @@ -298,13 +331,14 @@ void potential_switch(int mode) { break; } } - (void)*p; // expected-note {{later used here}} + if (mode == 2) + (void)*p; // expected-note {{later used here}} } void definite_switch(int mode) { MyObj safe; MyObj* p = &safe; - // All cases are UaF --> Definite error. + // A use domintates all the loan expires --> all definite error. switch (mode) { case 1: { MyObj temp1; @@ -347,6 +381,21 @@ void definite_switch_gsl(int mode) { v.use(); // expected-note 3 {{later used here}} } +void loan_from_previous_iteration(MyObj safe, bool condition) { + MyObj* p = &safe; + MyObj* q = &safe; + + while (condition) { + MyObj x; + p = &x; // expected-warning {{may not live long enough}} + + if (condition) + q = p; + (void)*p; + (void)*q; // expected-note {{later used here}} + } // expected-note {{destroyed here}} +} + //===----------------------------------------------------------------------===// // No-Error Cases //===----------------------------------------------------------------------===// @@ -372,6 +421,19 @@ void no_error_if_dangle_then_rescue_gsl() { v.use(); // This is safe. } +void no_error_loan_from_current_iteration(bool cond) { + // See https://github.com/llvm/llvm-project/issues/156959. + MyObj b; + while (cond) { + MyObj a; + View p = b; + if (cond) { + p = a; + } + (void)p; + } +} + //===----------------------------------------------------------------------===// // Lifetimebound Attribute Tests @@ -415,9 +477,9 @@ void lifetimebound_multiple_args_potential(bool cond) { MyObj obj1; if (cond) { MyObj obj2; - v = Choose(true, - obj1, // expected-warning {{object whose reference is captured may not live long enough}} - obj2); // expected-warning {{object whose reference is captured may not live long enough}} + v = Choose(true, + obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); // expected-warning {{object whose reference is captured does not live long enough}} } // expected-note {{destroyed here}} } // expected-note {{destroyed here}} v.use(); // expected-note 2 {{later used here}} @@ -488,7 +550,7 @@ void lifetimebound_partial_safety(bool cond) { MyObj temp_obj; v = Choose(true, safe_obj, - temp_obj); // expected-warning {{object whose reference is captured may not live long enough}} + temp_obj); // expected-warning {{object whose reference is captured does not live long enough}} } // expected-note {{destroyed here}} v.use(); // expected-note {{later used here}} } diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 3821015f07fb1..169b2d2d5e9b4 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -126,12 +126,12 @@ class LifetimeTestHelper { return Analysis.getLoansAtPoint(OID, PP); } - std::optional> - getExpiredLoansAtPoint(llvm::StringRef Annotation) { + std::optional>> + getLiveOriginsAtPoint(llvm::StringRef Annotation) { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getExpiredLoansAtPoint(PP); + return Analysis.getLiveOriginsAtPoint(PP); } private: @@ -180,6 +180,15 @@ class OriginInfo { LifetimeTestHelper &Helper; }; +// A helper class to represent a set of origins, identified by variable names. +class OriginsInfo { +public: + OriginsInfo(const std::vector &Vars, LifetimeTestHelper &H) + : OriginVars(Vars), Helper(H) {} + std::vector OriginVars; + LifetimeTestHelper &Helper; +}; + /// Matcher to verify the set of loans held by an origin at a specific /// program point. /// @@ -221,14 +230,15 @@ MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { std::sort(ExpectedLoans.begin(), ExpectedLoans.end()); std::sort(ActualLoans.begin(), ActualLoans.end()); if (ExpectedLoans != ActualLoans) { - *result_listener << "Expected: "; + *result_listener << "Expected: {"; for (const auto &LoanID : ExpectedLoans) { *result_listener << LoanID.Value << ", "; } - *result_listener << "Actual: "; + *result_listener << "} Actual: {"; for (const auto &LoanID : ActualLoans) { *result_listener << LoanID.Value << ", "; } + *result_listener << "}"; return false; } @@ -236,32 +246,71 @@ MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { ActualLoans, result_listener); } -/// Matcher to verify that the complete set of expired loans at a program point -/// matches the expected loan set. -MATCHER_P(AreExpiredAt, Annotation, "") { - const LoanSetInfo &Info = arg; - auto &Helper = Info.Helper; +enum class LivenessKindFilter { Maybe, Must, All }; - auto ActualExpiredSetOpt = Helper.getExpiredLoansAtPoint(Annotation); - if (!ActualExpiredSetOpt) { - *result_listener << "could not get a valid expired loan set at point '" +/// Matcher to verify the complete set of live origins at a program point. +MATCHER_P2(AreLiveAtImpl, Annotation, ConfFilter, "") { + const OriginsInfo &Info = arg; + auto &Helper = Info.Helper; + auto ActualLiveSetOpt = Helper.getLiveOriginsAtPoint(Annotation); + if (!ActualLiveSetOpt) { + *result_listener << "could not get a valid live origin set at point '" << Annotation << "'"; return false; } - std::vector ActualExpiredLoans = *ActualExpiredSetOpt; - std::vector ExpectedExpiredLoans; - for (const auto &VarName : Info.LoanVars) { - auto LoanIDs = Helper.getLoansForVar(VarName); - if (LoanIDs.empty()) { - *result_listener << "could not find a loan for variable '" << VarName + std::vector ActualLiveOrigins; + for (const auto [OID, ActualConfidence] : ActualLiveSetOpt.value()) { + if (ConfFilter == LivenessKindFilter::All) + ActualLiveOrigins.push_back(OID); + if (ActualConfidence == LivenessKind::Maybe && + ConfFilter == LivenessKindFilter::Maybe) + ActualLiveOrigins.push_back(OID); + if (ActualConfidence == LivenessKind::Must && + ConfFilter == LivenessKindFilter::Must) + ActualLiveOrigins.push_back(OID); + } + + std::vector ExpectedLiveOrigins; + for (const auto &VarName : Info.OriginVars) { + auto OriginIDOpt = Helper.getOriginForDecl(VarName); + if (!OriginIDOpt) { + *result_listener << "could not find an origin for variable '" << VarName << "'"; return false; } - ExpectedExpiredLoans.insert(ExpectedExpiredLoans.end(), LoanIDs.begin(), - LoanIDs.end()); + ExpectedLiveOrigins.push_back(*OriginIDOpt); } - return ExplainMatchResult(UnorderedElementsAreArray(ExpectedExpiredLoans), - ActualExpiredLoans, result_listener); + std::sort(ExpectedLiveOrigins.begin(), ExpectedLiveOrigins.end()); + std::sort(ActualLiveOrigins.begin(), ActualLiveOrigins.end()); + if (ExpectedLiveOrigins != ActualLiveOrigins) { + *result_listener << "Expected: {"; + for (const auto &OriginID : ExpectedLiveOrigins) { + *result_listener << OriginID.Value << ", "; + } + *result_listener << "} Actual: {"; + for (const auto &OriginID : ActualLiveOrigins) { + *result_listener << OriginID.Value << ", "; + } + *result_listener << "}"; + return false; + } + return true; +} + +MATCHER_P(MustBeLiveAt, Annotation, "") { + return ExplainMatchResult(AreLiveAtImpl(Annotation, LivenessKindFilter::Must), + arg, result_listener); +} + +MATCHER_P(MaybeLiveAt, Annotation, "") { + return ExplainMatchResult( + AreLiveAtImpl(Annotation, LivenessKindFilter::Maybe), arg, + result_listener); +} + +MATCHER_P(AreLiveAt, Annotation, "") { + return ExplainMatchResult(AreLiveAtImpl(Annotation, LivenessKindFilter::All), + arg, result_listener); } // Base test fixture to manage the runner and helper. @@ -276,6 +325,13 @@ class LifetimeAnalysisTest : public ::testing::Test { return OriginInfo(OriginVar, *Helper); } + /// Factory function that hides the std::vector creation. + OriginsInfo Origins(std::initializer_list OriginVars) { + return OriginsInfo({OriginVars}, *Helper); + } + + OriginsInfo NoOrigins() { return Origins({}); } + /// Factory function that hides the std::vector creation. LoanSetInfo LoansTo(std::initializer_list LoanVars) { return LoanSetInfo({LoanVars}, *Helper); @@ -428,29 +484,6 @@ TEST_F(LifetimeAnalysisTest, AssignInSwitch) { EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2", "s3"}, "after_switch")); } -TEST_F(LifetimeAnalysisTest, LoanInLoop) { - SetupTest(R"( - void target(bool condition) { - MyObj* p = nullptr; - while (condition) { - POINT(start_loop); - MyObj inner; - p = &inner; - POINT(end_loop); - } - POINT(after_loop); - } - )"); - EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "start_loop")); - EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("start_loop")); - - EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "end_loop")); - EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop")); - - EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_loop")); - EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("after_loop")); -} - TEST_F(LifetimeAnalysisTest, LoopWithBreak) { SetupTest(R"( void target(int count) { @@ -528,20 +561,16 @@ TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) { )"); EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while")); EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while")); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_while")); EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "temp"}, "in_loop_before_temp")); EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_before_temp")); - EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("in_loop_before_temp")); EXPECT_THAT(Origin("p1"), HasLoansTo({"temp"}, "in_loop_after_temp")); EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_after_temp")); - EXPECT_THAT(NoLoans(), AreExpiredAt("in_loop_after_temp")); EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "temp"}, "after_loop")); EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "after_loop")); - EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("after_loop")); } TEST_F(LifetimeAnalysisTest, InfiniteLoopPrunesEdges) { @@ -585,178 +614,6 @@ TEST_F(LifetimeAnalysisTest, NestedScopes) { EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_inner_scope")); } -TEST_F(LifetimeAnalysisTest, SimpleExpiry) { - SetupTest(R"( - void target() { - MyObj* p = nullptr; - { - MyObj s; - p = &s; - POINT(before_expiry); - } // s goes out of scope here - POINT(after_expiry); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry")); - EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_expiry")); -} - -TEST_F(LifetimeAnalysisTest, NestedExpiry) { - SetupTest(R"( - void target() { - MyObj s1; - MyObj* p = &s1; - POINT(before_inner); - { - MyObj s2; - p = &s2; - POINT(in_inner); - } // s2 expires - POINT(after_inner); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_inner")); - EXPECT_THAT(NoLoans(), AreExpiredAt("in_inner")); - EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_inner")); -} - -TEST_F(LifetimeAnalysisTest, ConditionalExpiry) { - SetupTest(R"( - void target(bool cond) { - MyObj s1; - MyObj* p = &s1; - POINT(before_if); - if (cond) { - MyObj s2; - p = &s2; - POINT(then_block); - } // s2 expires here - POINT(after_if); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_if")); - EXPECT_THAT(NoLoans(), AreExpiredAt("then_block")); - EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_if")); -} - -TEST_F(LifetimeAnalysisTest, LoopExpiry) { - SetupTest(R"( - void target() { - MyObj *p = nullptr; - for (int i = 0; i < 2; ++i) { - POINT(start_loop); - MyObj s; - p = &s; - POINT(end_loop); - } // s expires here on each iteration - POINT(after_loop); - } - )"); - EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("start_loop")); - EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop")); - EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_loop")); -} - -TEST_F(LifetimeAnalysisTest, MultipleExpiredLoans) { - SetupTest(R"( - void target() { - MyObj *p1, *p2, *p3; - { - MyObj s1; - p1 = &s1; - POINT(p1); - } // s1 expires - POINT(p2); - { - MyObj s2; - p2 = &s2; - MyObj s3; - p3 = &s3; - POINT(p3); - } // s2, s3 expire - POINT(p4); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("p1")); - EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p2")); - EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p3")); - EXPECT_THAT(LoansTo({"s1", "s2", "s3"}), AreExpiredAt("p4")); -} - -TEST_F(LifetimeAnalysisTest, GotoJumpsOutOfScope) { - SetupTest(R"( - void target(bool cond) { - MyObj *p = nullptr; - { - MyObj s; - p = &s; - POINT(before_goto); - if (cond) { - goto end; - } - } // `s` expires here on the path that doesn't jump - POINT(after_scope); - end: - POINT(after_goto); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_goto")); - EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_scope")); - EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_goto")); -} - -TEST_F(LifetimeAnalysisTest, ContinueInLoop) { - SetupTest(R"( - void target(int count) { - MyObj *p = nullptr; - MyObj outer; - p = &outer; - POINT(before_loop); - - for (int i = 0; i < count; ++i) { - if (i % 2 == 0) { - MyObj s_even; - p = &s_even; - POINT(in_even_iter); - continue; - } - MyObj s_odd; - p = &s_odd; - POINT(in_odd_iter); - } - POINT(after_loop); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("before_loop")); - EXPECT_THAT(LoansTo({"s_odd"}), AreExpiredAt("in_even_iter")); - EXPECT_THAT(LoansTo({"s_even"}), AreExpiredAt("in_odd_iter")); - EXPECT_THAT(LoansTo({"s_even", "s_odd"}), AreExpiredAt("after_loop")); -} - -TEST_F(LifetimeAnalysisTest, ReassignedPointerThenOriginalExpires) { - SetupTest(R"( - void target() { - MyObj* p = nullptr; - { - MyObj s1; - p = &s1; - POINT(p_has_s1); - { - MyObj s2; - p = &s2; - POINT(p_has_s2); - } - POINT(p_after_s2_expires); - } // s1 expires here. - POINT(p_after_s1_expires); - } - )"); - EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s1")); - EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s2")); - EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("p_after_s2_expires")); - EXPECT_THAT(LoansTo({"s1", "s2"}), AreExpiredAt("p_after_s1_expires")); -} - TEST_F(LifetimeAnalysisTest, NoDuplicateLoansForImplicitCastToConst) { SetupTest(R"( void target() { @@ -880,23 +737,6 @@ TEST_F(LifetimeAnalysisTest, GslPointerPropagation) { EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p3")); } -TEST_F(LifetimeAnalysisTest, GslPointerLoanExpiration) { - SetupTest(R"( - void target() { - View x; - { - MyObj a; - x = a; - POINT(before_expiry); - } // `a` is destroyed here. - POINT(after_expiry); - } - )"); - - EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry")); - EXPECT_THAT(LoansTo({"a"}), AreExpiredAt("after_expiry")); -} - TEST_F(LifetimeAnalysisTest, GslPointerReassignment) { SetupTest(R"( void target() { @@ -916,7 +756,6 @@ TEST_F(LifetimeAnalysisTest, GslPointerReassignment) { EXPECT_THAT(Origin("v"), HasLoansTo({"safe"}, "p1")); EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p2")); EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p3")); - EXPECT_THAT(LoansTo({"unsafe"}), AreExpiredAt("p3")); } TEST_F(LifetimeAnalysisTest, GslPointerConversionOperator) { @@ -1174,5 +1013,187 @@ TEST_F(LifetimeAnalysisTest, LifetimeboundConversionOperator) { )"); EXPECT_THAT(Origin("v"), HasLoansTo({"owner"}, "p1")); } + +TEST_F(LifetimeAnalysisTest, LivenessDeadPointer) { + SetupTest(R"( + void target() { + POINT(p1); + MyObj s; + MyObj* p = &s; + POINT(p2); + } + )"); + EXPECT_THAT(NoOrigins(), AreLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessSimpleReturn) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessKilledByReassignment) { + SetupTest(R"( + MyObj* target() { + MyObj s1, s2; + MyObj* p = &s1; + POINT(p1); + p = &s2; + POINT(p2); + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessAcrossBranches) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj x, y; + MyObj* p = nullptr; + POINT(p1); + if (c) { + p = &x; + POINT(p2); + } else { + p = &y; + POINT(p3); + } + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p3")); + // Before the `if`, the value of `p` (`nullptr`) is always overwritten before. + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoop) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1, s2; + MyObj* p = &s1; + MyObj* q = &s2; + POINT(p1); + while(c) { + POINT(p2); + + p = q; + POINT(p3); + } + POINT(p4); + return p; + } + )"); + + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p4")); + EXPECT_THAT(NoOrigins(), MaybeLiveAt("p4")); + + EXPECT_THAT(Origins({"p", "q"}), MaybeLiveAt("p3")); + + EXPECT_THAT(Origins({"q"}), MustBeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), MaybeLiveAt("p2")); + + EXPECT_THAT(Origins({"p", "q"}), MaybeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoopAndIf) { + // See https://github.com/llvm/llvm-project/issues/156959. + SetupTest(R"( + void target(bool cond) { + MyObj b; + while (cond) { + POINT(p1); + + MyObj a; + View p = b; + + POINT(p2); + + if (cond) { + POINT(p3); + p = a; + } + POINT(p4); + (void)p; + POINT(p5); + } + } + )"); + EXPECT_THAT(NoOrigins(), AreLiveAt("p5")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p4")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p3")); + EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoopAndIf2) { + SetupTest(R"( + void target(MyObj safe, bool condition) { + MyObj* p = &safe; + MyObj* q = &safe; + POINT(p1); + + while (condition) { + POINT(p2); + MyObj x; + p = &x; + + POINT(p3); + + if (condition) { + q = p; + POINT(p4); + } + + POINT(p5); + (void)*p; + (void)*q; + POINT(p6); + } + } + )"); + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p6")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p6")); + + EXPECT_THAT(Origins({"p", "q"}), MustBeLiveAt("p5")); + + EXPECT_THAT(Origins({"p", "q"}), MustBeLiveAt("p4")); + + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p3")); + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p3")); + + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p2")); + + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p1")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessOutsideLoop) { + SetupTest(R"( + void target(MyObj safe) { + MyObj* p = &safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + p = &s; + POINT(p1); + } + POINT(p2); + (void)*p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p1")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From 0ca9dba53c90066a8cd19f6d8354865d81e21fbf Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Fri, 10 Oct 2025 13:43:39 +0200 Subject: [PATCH 19/35] [LifetimeSafety] Reorganize code into modular components (#162474) Restructure the C++ Lifetime Safety Analysis into modular components with clear separation of concerns. This PR reorganizes the C++ Lifetime Safety Analysis code by: 1. Breaking up the monolithic `LifetimeSafety.cpp` (1500+ lines) into multiple smaller, focused files 2. Creating a dedicated `LifetimeSafety` directory with a clean component structure 3. Introducing header files for each component with proper documentation 4. Moving existing code into the appropriate component files: - `Checker.h/cpp`: Core lifetime checking logic - `Dataflow.h`: Generic dataflow analysis framework - `Facts.h`: Lifetime-relevant events and fact management - `FactsGenerator.h/cpp`: AST traversal for fact generation - `LiveOrigins.h/cpp`: Backward dataflow analysis for origin liveness - `LoanPropagation.h/cpp`: Forward dataflow analysis for loan tracking - `Loans.h`: Loan and access path definitions - `Origins.h`: Origin management - `Reporter.h`: Interface for reporting lifetime violations - `Utils.h`: Common utilities for the analysis The code functionality remains the same, but is now better organized with clearer interfaces between components. --- .github/new-prs-labeler.yml | 4 +- .../clang/Analysis/Analyses/LifetimeSafety.h | 183 -- .../Analyses/LifetimeSafety/Checker.h | 35 + .../Analysis/Analyses/LifetimeSafety/Facts.h | 232 +++ .../Analyses/LifetimeSafety/FactsGenerator.h | 106 ++ .../LifetimeAnnotations.h | 7 +- .../Analyses/LifetimeSafety/LifetimeSafety.h | 87 + .../Analyses/LifetimeSafety/LiveOrigins.h | 97 ++ .../Analyses/LifetimeSafety/LoanPropagation.h | 48 + .../Analysis/Analyses/LifetimeSafety/Loans.h | 80 + .../Analyses/LifetimeSafety/Origins.h | 91 + .../Analysis/Analyses/LifetimeSafety/Utils.h | 118 ++ clang/lib/Analysis/CMakeLists.txt | 3 +- clang/lib/Analysis/LifetimeSafety.cpp | 1549 ----------------- .../Analysis/LifetimeSafety/CMakeLists.txt | 12 + clang/lib/Analysis/LifetimeSafety/Checker.cpp | 130 ++ clang/lib/Analysis/LifetimeSafety/Dataflow.h | 188 ++ clang/lib/Analysis/LifetimeSafety/Facts.cpp | 102 ++ .../LifetimeSafety/FactsGenerator.cpp | 348 ++++ .../LifetimeAnnotations.cpp | 8 +- .../LifetimeSafety/LifetimeSafety.cpp | 77 + .../Analysis/LifetimeSafety/LiveOrigins.cpp | 180 ++ .../LifetimeSafety/LoanPropagation.cpp | 138 ++ clang/lib/Analysis/LifetimeSafety/Loans.cpp | 18 + clang/lib/Analysis/LifetimeSafety/Origins.cpp | 89 + clang/lib/Sema/AnalysisBasedWarnings.cpp | 2 +- clang/lib/Sema/CMakeLists.txt | 1 + clang/lib/Sema/CheckExprLifetime.cpp | 2 +- clang/lib/Sema/SemaAPINotes.cpp | 2 +- .../unittests/Analysis/LifetimeSafetyTest.cpp | 25 +- llvm/include/llvm/ADT/ImmutableSet.h | 2 + 31 files changed, 2207 insertions(+), 1757 deletions(-) delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h rename clang/include/clang/Analysis/Analyses/{ => LifetimeSafety}/LifetimeAnnotations.h (95%) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h delete mode 100644 clang/lib/Analysis/LifetimeSafety.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/CMakeLists.txt create mode 100644 clang/lib/Analysis/LifetimeSafety/Checker.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Dataflow.h create mode 100644 clang/lib/Analysis/LifetimeSafety/Facts.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp rename clang/lib/Analysis/{ => LifetimeSafety}/LifetimeAnnotations.cpp (94%) create mode 100644 clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Loans.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Origins.cpp diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index f38d27a85d39a..786f6f5b215c9 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1083,8 +1083,8 @@ clang:openmp: - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety* - - clang/lib/Analysis/LifetimeSafety* + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** - clang/unittests/Analysis/LifetimeSafety* - clang/test/Sema/*lifetime-safety* - clang/test/Sema/*lifetime-analysis* diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h deleted file mode 100644 index e54fc26df28ea..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ /dev/null @@ -1,183 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the entry point for a dataflow-based static analysis -// that checks for C++ lifetime violations. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/StringMap.h" -#include - -namespace clang::lifetimes { - -/// Enum to track the confidence level of a potential error. -enum class Confidence : uint8_t { - None, - Maybe, // Reported as a potential error (-Wlifetime-safety-strict) - Definite // Reported as a definite error (-Wlifetime-safety-permissive) -}; - -enum class LivenessKind : uint8_t { - Dead, // Not alive - Maybe, // Live on some path but not all paths (may-be-live) - Must // Live on all paths (must-be-live) -}; - -class LifetimeSafetyReporter { -public: - LifetimeSafetyReporter() = default; - virtual ~LifetimeSafetyReporter() = default; - - virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, - SourceLocation FreeLoc, - Confidence Confidence) {} -}; - -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - -namespace internal { -// Forward declarations of internal types. -class Fact; -class FactManager; -class LoanPropagationAnalysis; -class ExpiredLoansAnalysis; -class LiveOriginAnalysis; -struct LifetimeFactory; - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template struct ID { - uint32_t Value = 0; - - bool operator==(const ID &Other) const { return Value == Other.Value; } - bool operator!=(const ID &Other) const { return !(*this == Other); } - bool operator<(const ID &Other) const { return Value < Other.Value; } - ID operator++(int) { - ID Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -using LoanID = ID; -using OriginID = ID; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { - return OS << ID.Value; -} -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { - return OS << ID.Value; -} - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet; -using OriginSet = llvm::ImmutableSet; -using OriginLoanMap = llvm::ImmutableMap; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// Running the lifetime safety analysis and querying its results. It -/// encapsulates the various dataflow analyses. -class LifetimeSafetyAnalysis { -public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - ~LifetimeSafetyAnalysis(); - - void run(); - - /// Returns the set of loans an origin holds at a specific program point. - LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; - - /// Returns the set of origins that are live at a specific program point, - /// along with the confidence level of their liveness. - /// - /// An origin is considered live if there are potential future uses of that - /// origin after the given program point. The confidence level indicates - /// whether the origin is definitely live (Definite) due to being domintated - /// by a set of uses or only possibly live (Maybe) only on some but not all - /// control flow paths. - std::vector> - getLiveOriginsAtPoint(ProgramPoint PP) const; - - /// Finds the OriginID for a given declaration. - /// Returns a null optional if not found. - std::optional getOriginIDForDecl(const ValueDecl *D) const; - - /// Finds the LoanID's for the loan created with the specific variable as - /// their Path. - std::vector getLoanIDForVar(const VarDecl *VD) const; - - /// Retrieves program points that were specially marked in the source code - /// for testing. - /// - /// The analysis recognizes special function calls of the form - /// `void("__lifetime_test_point_")` as test points. This method returns - /// a map from the annotation string () to the corresponding - /// `ProgramPoint`. This allows test harnesses to query the analysis state at - /// user-defined locations in the code. - /// \note This is intended for testing only. - llvm::StringMap getTestPoints() const; - -private: - AnalysisDeclContext &AC; - LifetimeSafetyReporter *Reporter; - std::unique_ptr Factory; - std::unique_ptr FactMgr; - std::unique_ptr LoanPropagation; - std::unique_ptr LiveOrigins; -}; -} // namespace internal -} // namespace clang::lifetimes - -namespace llvm { -template -struct DenseMapInfo> { - using ID = clang::lifetimes::internal::ID; - - static inline ID getEmptyKey() { - return {DenseMapInfo::getEmptyKey()}; - } - - static inline ID getTombstoneKey() { - return {DenseMapInfo::getTombstoneKey()}; - } - - static unsigned getHashValue(const ID &Val) { - return DenseMapInfo::getHashValue(Val.Value); - } - - static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } -}; -} // namespace llvm - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h new file mode 100644 index 0000000000000..03636be7d00c3 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h @@ -0,0 +1,35 @@ +//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines and enforces the lifetime safety policy. It detects +// use-after-free errors by examining loan expiration points and checking if +// any live origins hold the expired loans. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" + +namespace clang::lifetimes::internal { + +/// Runs the lifetime checker, which detects use-after-free errors by +/// examining loan expiration points and checking if any live origins hold +/// the expired loan. +void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter); + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h new file mode 100644 index 0000000000000..6a90aeb01e638 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -0,0 +1,232 @@ +//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Facts, which are atomic lifetime-relevant events (such as +// loan issuance, loan expiration, origin flow, and use), and the FactManager, +// which manages the storage and retrieval of facts for each CFG block. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include + +namespace clang::lifetimes::internal { +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template const T *getAs() const { + if (T::classof(this)) + return static_cast(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const; +}; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override; +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override; +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class UseFact : public Fact { + const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast(OM).get(*UseExpr); + } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override; +}; + +class FactManager { +public: + llvm::ArrayRef getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate(); + return new (Mem) FactType(std::forward(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_")` as test points. This method returns + /// a map from the annotation string () to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap getTestPoints() const; + + LoanManager &getLoanMgr() { return LoanMgr; } + const LoanManager &getLoanMgr() const { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + const OriginManager &getOriginMgr() const { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h new file mode 100644 index 0000000000000..5e58abee2bbb3 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -0,0 +1,106 @@ +//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FactsGenerator, which traverses the AST to generate +// lifetime-relevant facts (such as loan issuance, expiration, origin flow, +// and use) from CFG statements. These facts are used by the dataflow analyses +// to track pointer lifetimes and detect use-after-free errors. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H + +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::lifetimes::internal { + +class FactsGenerator : public ConstStmtVisitor { + using Base = ConstStmtVisitor; + +public: + FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run(); + + void VisitDeclStmt(const DeclStmt *DS); + void VisitDeclRefExpr(const DeclRefExpr *DRE); + void VisitCXXConstructExpr(const CXXConstructExpr *CCE); + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); + void VisitCallExpr(const CallExpr *CE); + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); + void VisitUnaryOperator(const UnaryOperator *UO); + void VisitReturnStmt(const ReturnStmt *RS); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); + void VisitInitListExpr(const InitListExpr *ILE); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); + +private: + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE); + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction = false); + + template + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE); + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE); + + void markUseAsWrite(const DeclRefExpr *DRE); + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap UseFacts; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h similarity index 95% rename from clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h rename to clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h index 229d16c20b0f8..f02969e0a9563 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h @@ -12,8 +12,7 @@ #include "clang/AST/DeclCXX.h" -namespace clang { -namespace lifetimes { +namespace clang ::lifetimes { /// Returns the most recent declaration of the method to ensure all /// lifetime-bound attributes from redeclarations are considered. @@ -38,7 +37,7 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// lifetimebound, either due to an explicit lifetimebound attribute on the /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); -} // namespace lifetimes -} // namespace clang + +} // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h new file mode 100644 index 0000000000000..91ffbb169f947 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -0,0 +1,87 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the main entry point and orchestrator for the C++ Lifetime +// Safety Analysis. It coordinates the entire analysis pipeline: fact +// generation, loan propagation, live origins analysis, and enforcement of +// lifetime safety policy. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; +}; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + + void run(); + + /// \note These are provided only for testing purposes. + LoanPropagationAnalysis &getLoanPropagation() const { + return *LoanPropagation; + } + LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } + FactManager &getFactManager() { return FactMgr; } + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + LifetimeFactory Factory; + FactManager FactMgr; + std::unique_ptr LiveOrigins; + std::unique_ptr LoanPropagation; +}; +} // namespace internal +} // namespace clang::lifetimes + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h new file mode 100644 index 0000000000000..c4f5f0e9ae46c --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h @@ -0,0 +1,97 @@ +//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveOriginAnalysis, a backward dataflow analysis that +// determines which origins are "live" at each program point. An origin is +// "live" at a program point if there's a potential future use of a pointer it +// is associated with. Liveness is "generated" by a use of an origin (e.g., a +// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being +// live) when the origin is replaced by flowing a different origin into it +// (e.g., an OriginFlow from an assignment that kills the destination). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/Support/Debug.h" + +namespace clang::lifetimes::internal { + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap; + +class LiveOriginsAnalysis { +public: + LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF); + ~LiveOriginsAnalysis(); + + /// Returns the set of origins that are live at a specific program point, + /// along with the the details of the liveness. + LivenessMap getLiveOriginsAt(ProgramPoint P) const; + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h new file mode 100644 index 0000000000000..447d05ca898fd --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h @@ -0,0 +1,48 @@ +//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoanPropagationAnalysis, a forward dataflow analysis +// that tracks which loans each origin holds at each program point. Loans +// represent borrows of storage locations and are propagated through the +// program as pointers are copied or assigned. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal { + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet; +using OriginLoanMap = llvm::ImmutableMap; + +class LoanPropagationAnalysis { +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory); + ~LoanPropagationAnalysis(); + + LoanSet getLoans(OriginID OID, ProgramPoint P) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h new file mode 100644 index 0000000000000..7f5cf03fd3e5f --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h @@ -0,0 +1,80 @@ +//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Loan and AccessPath structures, which represent +// borrows of storage locations, and the LoanManager, which manages the +// creation and retrieval of loans during lifetime analysis. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang::lifetimes::internal { + +using LoanID = utils::ID; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const; +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllLoans; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h new file mode 100644 index 0000000000000..ba138b078b379 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -0,0 +1,91 @@ +//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Origins, which represent the set of possible loans a +// pointer-like object could hold, and the OriginManager, which manages the +// creation, storage, and retrieval of origins for variables and expressions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" + +namespace clang::lifetimes::internal { + +using OriginID = utils::ID; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast(); + } +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); + Origin &addOrigin(OriginID ID, const clang::Expr &E); + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E); + + OriginID get(const ValueDecl &D); + + OriginID getOrCreate(const Expr &E); + + const Origin &getOrigin(OriginID ID) const; + + llvm::ArrayRef getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D); + + void dump(OriginID OID, llvm::raw_ostream &OS) const; + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllOrigins; + llvm::DenseMap DeclToOriginID; + llvm::DenseMap ExprToOriginID; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h new file mode 100644 index 0000000000000..4183cabe860a7 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h @@ -0,0 +1,118 @@ +//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file provides utilities for the lifetime safety analysis, including +// join operations for LLVM's immutable data structures. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H + +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal::utils { + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +/// Computes the union of two ImmutableSets. +template +static llvm::ImmutableSet join(llvm::ImmutableSet A, + llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template +static llvm::ImmutableMap +join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace clang::lifetimes::internal::utils + +namespace llvm { +template +struct DenseMapInfo> { + using ID = clang::lifetimes::internal::utils::ID; + + static inline ID getEmptyKey() { + return {DenseMapInfo::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 5a26f3eeea418..1dbd4153d856f 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,8 +21,6 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -51,3 +49,4 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) +add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp deleted file mode 100644 index 55129a0c831e9..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ /dev/null @@ -1,1549 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include -#include -#include - -namespace clang::lifetimes { -namespace internal { - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; - } -}; - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast(); - } -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllLoans; -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); - } - Origin &addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); - } - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); - } - - OriginID get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; - } - - OriginID getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; - } - - const Origin &getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; - } - - llvm::ArrayRef getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; - } - - void dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; - } - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllOrigins; - llvm::DenseMap DeclToOriginID; - llvm::DenseMap ExprToOriginID; -}; - -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template const T *getAs() const { - if (T::classof(this)) - return static_cast(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast(K) << ")\n"; - } -}; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; - } -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; - } -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; - } -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; - } -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; - } -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; - } -}; - -class FactManager { -public: - llvm::ArrayRef getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate(); - return new (Mem) FactType(std::forward(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } - } - - LoanManager &getLoanMgr() { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; - -class FactGenerator : public ConstStmtVisitor { - using Base = ConstStmtVisitor; - -public: - FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional CS = Element.getAs()) - Visit(CS->getStmt()); - else if (std::optional DtorOpt = - Element.getAs()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } - } - - void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); - } - - void VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact(L->ID, ExprOID)); - } - } - } - - void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } - } - - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } - } - - void VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); - } - - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); - } - - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); - } - - void VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } - } - - void VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back( - FactMgr.createFact(OID)); - } - } - } - - void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); - } - - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); - } - - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); - } - - void VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); - } - - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); - } - - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } - } - -private: - static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr(); - return RD->hasAttr(); - } - return false; - } - - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); - } - // Check if a type has an origin. - static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); - } - - static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); - } - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); - } - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef Args, - bool IsGslConstruction = false) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } - } - - /// Creates a loan for the storage path of a given declaration reference. - /// This function should be called whenever a DeclRefExpr represents a borrow. - /// \param DRE The declaration reference expression that initiates the borrow. - /// \return The new Loan on success, nullptr otherwise. - const Loan *createLoan(const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; - } - - template - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact(Annotation)); - return true; - } - } - return false; - } - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } - } - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } - } - - void markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); - } - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap UseFacts; -}; - -// ========================================================================= // -// Generic Dataflow Analysis -// ========================================================================= // - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &AllFacts; - - explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F) - : Cfg(C), AC(AC), AllFacts(F) {} - -public: - void run() { - Derived &D = static_cast(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; - -namespace utils { - -/// Computes the union of two ImmutableSets. -template -static llvm::ImmutableSet join(llvm::ImmutableSet A, - llvm::ImmutableSet B, - typename llvm::ImmutableSet::Factory &F) { - if (A == B) - return A; - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - if (!A.contains(E)) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template -static llvm::ImmutableMap -join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, - typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace utils - -// ========================================================================= // -// Loan Propagation Analysis -// ========================================================================= // - -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct LoanPropagationLattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} - LoanPropagationLattice() = default; - - bool operator==(const LoanPropagationLattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const LoanPropagationLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " \n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -/// The analysis that tracks which loans belong to which origins. -class LoanPropagationAnalysis - : public DataflowAnalysis { - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; - -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return LoanPropagationLattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return LoanPropagationLattice( - OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } -}; - -// ========================================================================= // -// Live Origins Analysis -// ========================================================================= // -// -// A backward dataflow analysis that determines which origins are "live" at each -// program point. An origin is "live" at a program point if there's a potential -// future use of the pointer it represents. Liveness is "generated" by a read of -// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being -// live) when its loan set is overwritten (e.g. a OriginFlow killing the -// destination origin). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// ========================================================================= // - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap; - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct LivenessLattice { - LivenessMap LiveOrigins; - - LivenessLattice() : LiveOrigins(nullptr) {}; - - explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const LivenessLattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const LivenessLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " \n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class LiveOriginAnalysis - : public DataflowAnalysis { - FactManager &FactMgr; - LivenessMap::Factory &Factory; - -public: - LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOrigins(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : LSA.getTestPoints()) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } -}; - -// ========================================================================= // -// Lifetime checker and Error reporter -// ========================================================================= // - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap FinalWarningsMap; - LoanPropagationAnalysis &LoanPropagation; - LiveOriginAnalysis &LiveOrigins; - FactManager &FactMgr; - AnalysisDeclContext &ADC; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, - FactManager &FM, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) - : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), - Reporter(Reporter) {} - - void run() { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - for (const CFGBlock *B : *ADC.getAnalysis()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; - -// ========================================================================= // -// LifetimeSafetyAnalysis Class Implementation -// ========================================================================= // - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - llvm::BumpPtrAllocator Allocator; - OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; -}; - -// We need this here for unique_ptr with forward declared class. -LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter), Factory(std::make_unique()), - FactMgr(std::make_unique()) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactGenerator FactGen(*FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique( - Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); - LoanPropagation->run(); - - LiveOrigins = std::make_unique( - Cfg, AC, *FactMgr, Factory->LivenessMapFactory); - LiveOrigins->run(); - DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); - - LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, - Reporter); - Checker.run(); -} - -LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, - ProgramPoint PP) const { - assert(LoanPropagation && "Analysis has not been run."); - return LoanPropagation->getLoans(OID, PP); -} - -std::optional -LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { - assert(FactMgr && "FactManager not initialized"); - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` logic - // in a const-query context if that becomes an issue. - return FactMgr->getOriginMgr().get(*D); -} - -std::vector -LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { - assert(FactMgr && "FactManager not initialized"); - std::vector Result; - for (const Loan &L : FactMgr->getLoanMgr().getLoans()) - if (L.Path.D == VD) - Result.push_back(L.ID); - return Result; -} - -std::vector> -LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { - assert(LiveOrigins && "LiveOriginAnalysis has not been run."); - std::vector> Result; - for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) - Result.push_back({OID, Info.Kind}); - return Result; -} - -llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { - assert(FactMgr && "FactManager not initialized"); - llvm::StringMap AnnotationToPointMap; - for (const CFGBlock *Block : *AC.getCFG()) { - for (const Fact *F : FactMgr->getFacts(Block)) { - if (const auto *TPF = F->getAs()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000000000..85842928770cd --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,12 @@ +add_clang_library(clangAnalysisLifetimeSafety + Checker.cpp + Facts.cpp + FactsGenerator.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp + LiveOrigins.cpp + Loans.cpp + LoanPropagation.cpp + Origins.cpp + ) + diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp new file mode 100644 index 0000000000000..c443c3a5d4f9b --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -0,0 +1,130 @@ +//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LifetimeChecker, which detects use-after-free +// errors by checking if live origins hold loans that have expired. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); +} + +namespace { + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap FinalWarningsMap; + const LoanPropagationAnalysis &LoanPropagation; + const LiveOriginsAnalysis &LiveOrigins; + const FactManager &FactMgr; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, + AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) + : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), + Reporter(Reporter) { + for (const CFGBlock *B : *ADC.getAnalysis()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; +} // namespace + +void runLifetimeChecker(const LoanPropagationAnalysis &LP, + const LiveOriginsAnalysis &LO, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h new file mode 100644 index 0000000000000..2f7bcb6e5dc81 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -0,0 +1,188 @@ +//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic, policy-based driver for dataflow analyses. +// It provides a flexible framework that combines the dataflow runner and +// transfer functions, allowing derived classes to implement specific analyses +// by defining their lattice, join, and transfer functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes::internal { + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &FactMgr; + + explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, + FactManager &FactMgr) + : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} + +public: + void run() { + Derived &D = static_cast(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = FactMgr.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; +} // namespace clang::lifetimes::internal +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp new file mode 100644 index 0000000000000..1aea64f864367 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -0,0 +1,102 @@ +//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" + +namespace clang::lifetimes::internal { + +void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast(K) << ")\n"; +} + +void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; +} + +void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; +} + +void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; +} + +void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; +} + +void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; +} + +void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; +} + +llvm::StringMap FactManager::getTestPoints() const { + llvm::StringMap AnnotationToPointMap; + for (const CFGBlock *Block : BlockToFactsMap.keys()) { + for (const Fact *F : getFacts(Block)) { + if (const auto *TPF = F->getAs()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} + +void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp new file mode 100644 index 0000000000000..485308f5b2c3b --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -0,0 +1,348 @@ +//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + return false; +} + +static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); +} +// Check if a type has an origin. +static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); +} + +static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); +} + +/// Creates a loan for the storage path of a given declaration reference. +/// This function should be called whenever a DeclRefExpr represents a borrow. +/// \param DRE The declaration reference expression that initiates the borrow. +/// \return The new Loan on success, nullptr otherwise. +static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; +} + +void FactsGenerator::run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional CS = Element.getAs()) + Visit(CS->getStmt()); + else if (std::optional DtorOpt = + Element.getAs()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } +} + +void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); +} + +void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(FactMgr, DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact(L->ID, ExprOID)); + } + } +} + +void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } +} + +void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } +} + +void FactsGenerator::VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); +} + +void FactsGenerator::VisitCXXNullPtrLiteralExpr( + const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); +} + +void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); +} + +void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } +} + +void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back(FactMgr.createFact(OID)); + } + } +} + +void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); +} + +void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); +} + +void FactsGenerator::VisitCXXFunctionalCastExpr( + const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); +} + +void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); +} + +void FactsGenerator::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); +} + +void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } +} + +void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); +} + +/// Checks if a call-like expression creates a borrow by passing a value to a +/// reference parameter, creating an IssueFact if it does. +/// \param IsGslConstruction True if this is a GSL construction where all +/// argument origins should flow to the returned origin. +void FactsGenerator::handleFunctionCall(const Expr *Call, + const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } +} + +/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. +/// If so, creates a `TestPointFact` and returns true. +bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact(Annotation)); + return true; + } + } + return false; +} + +void FactsGenerator::handleAssignment(const Expr *LHSExpr, + const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } +} + +// A DeclRefExpr will be treated as a use of the referenced decl. It will be +// checked for use-after-free unless it is later marked as being written to +// (e.g. on the left-hand side of an assignment). +void FactsGenerator::handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } +} + +void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp similarity index 94% rename from clang/lib/Analysis/LifetimeAnnotations.cpp rename to clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp index e79122475625e..ad61a42c0eaeb 100644 --- a/clang/lib/Analysis/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,8 +13,7 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang { -namespace lifetimes { +namespace clang::lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -71,5 +70,4 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace lifetimes -} // namespace clang +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp new file mode 100644 index 0000000000000..00c7ed90503e7 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -0,0 +1,77 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main LifetimeSafetyAnalysis class, which coordinates +// the various components (fact generation, loan propagation, live origins +// analysis, and checking) to detect lifetime safety violations in C++ code. +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes { +namespace internal { + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactsGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique( + Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); + + LiveOrigins = std::make_unique( + Cfg, AC, FactMgr, Factory.LivenessMapFactory); + DEBUG_WITH_TYPE("LiveOrigins", + LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); + + runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp new file mode 100644 index 0000000000000..cddb3f3ce4c1c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -0,0 +1,180 @@ +//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "Dataflow.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang::lifetimes::internal { +namespace { + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct Lattice { + LivenessMap LiveOrigins; + + Lattice() : LiveOrigins(nullptr) {}; + + explicit Lattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const Lattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class AnalysisImpl + : public DataflowAnalysis { + +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOriginsAt(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : TestPoints) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } + +private: + FactManager &FactMgr; + LivenessMap::Factory &Factory; +}; +} // namespace + +// PImpl wrapper implementation +class LiveOriginsAnalysis::Impl : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F, + LivenessMap::Factory &SF) + : PImpl(std::make_unique(C, AC, F, SF)) { + PImpl->run(); +} + +LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; + +LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { + return PImpl->getLiveOriginsAt(P); +} + +void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + PImpl->dump(OS, TestPoints); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp new file mode 100644 index 0000000000000..387097e705f94 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -0,0 +1,138 @@ +//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "Dataflow.h" +#include + +namespace clang::lifetimes::internal { +namespace { +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct Lattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &S) : Origins(S) {} + Lattice() = default; + + bool operator==(const Lattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " \n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +class AnalysisImpl + : public DataflowAnalysis { +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return Lattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } + + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; +}; +} // namespace + +class LoanPropagationAnalysis::Impl final : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LoanPropagationAnalysis::LoanPropagationAnalysis( + const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : PImpl(std::make_unique(C, AC, F, OriginLoanMapFactory, + LoanSetFactory)) { + PImpl->run(); +} + +LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; + +LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { + return PImpl->getLoans(OID, P); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp new file mode 100644 index 0000000000000..2c85a3c6083f3 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -0,0 +1,18 @@ +//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" + +namespace clang::lifetimes::internal { + +void Loan::dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp new file mode 100644 index 0000000000000..ea51a75324e06 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -0,0 +1,89 @@ +//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" + +namespace clang::lifetimes::internal { + +void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); +} + +// TODO: Mark this method as const once we remove the call to getOrCreate. +OriginID OriginManager::get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); +} + +OriginID OriginManager::get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; +} + +OriginID OriginManager::getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; +} + +const Origin &OriginManager::getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; +} + +OriginID OriginManager::getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 7fd88a9ca73d6..4c1fe60dd2db9 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -31,7 +31,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 20155d94900ee..9da50ecf1823c 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -119,6 +119,7 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e8a7ad3bd355a..8aebf53c0e76f 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index d0acb27f6a360..57a95fc39dca4 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 169b2d2d5e9b4..0c051847f4d47 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" @@ -63,7 +63,7 @@ class LifetimeTestRunner { Analysis = std::make_unique(*AnalysisCtx, nullptr); Analysis->run(); - AnnotationToPointMap = Analysis->getTestPoints(); + AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); } LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } @@ -98,10 +98,11 @@ class LifetimeTestHelper { auto *VD = findDecl(VarName); if (!VD) return std::nullopt; - auto OID = Analysis.getOriginIDForDecl(VD); - if (!OID) - ADD_FAILURE() << "Origin for '" << VarName << "' not found."; - return OID; + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` + // logic in a const-query context if that becomes an issue. + return const_cast(Analysis.getFactManager().getOriginMgr()) + .get(*VD); } std::vector getLoansForVar(llvm::StringRef VarName) { @@ -110,7 +111,10 @@ class LifetimeTestHelper { ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } - std::vector LID = Analysis.getLoanIDForVar(VD); + std::vector LID; + for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) + if (L.Path.D == VD) + LID.push_back(L.ID); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {}; @@ -123,7 +127,7 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLoansAtPoint(OID, PP); + return Analysis.getLoanPropagation().getLoans(OID, PP); } std::optional>> @@ -131,7 +135,10 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLiveOriginsAtPoint(PP); + std::vector> Result; + for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) + Result.push_back({OID, Info.Kind}); + return Result; } private: diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index ac86f43b2048e..b737d5b227340 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,7 +21,9 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Signals.h" #include #include #include From 0f9a415345332e74e7e9f7301660b404f2aa17dd Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 10 Oct 2025 11:49:49 -0400 Subject: [PATCH 20/35] Revert "[LifetimeSafety] Reorganize code into modular components (#162474)" This reverts commit 2eb8c47b88e83cf310b86480b5b8d0f45a1f91af since it completely breaks dylib build. --- .github/new-prs-labeler.yml | 4 +- .../LifetimeAnnotations.h | 7 +- .../clang/Analysis/Analyses/LifetimeSafety.h | 183 ++ .../Analyses/LifetimeSafety/Checker.h | 35 - .../Analysis/Analyses/LifetimeSafety/Facts.h | 232 --- .../Analyses/LifetimeSafety/FactsGenerator.h | 106 -- .../Analyses/LifetimeSafety/LifetimeSafety.h | 87 - .../Analyses/LifetimeSafety/LiveOrigins.h | 97 -- .../Analyses/LifetimeSafety/LoanPropagation.h | 48 - .../Analysis/Analyses/LifetimeSafety/Loans.h | 80 - .../Analyses/LifetimeSafety/Origins.h | 91 - .../Analysis/Analyses/LifetimeSafety/Utils.h | 118 -- clang/lib/Analysis/CMakeLists.txt | 3 +- .../LifetimeAnnotations.cpp | 8 +- clang/lib/Analysis/LifetimeSafety.cpp | 1546 +++++++++++++++++ .../Analysis/LifetimeSafety/CMakeLists.txt | 12 - clang/lib/Analysis/LifetimeSafety/Checker.cpp | 130 -- clang/lib/Analysis/LifetimeSafety/Dataflow.h | 188 -- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 102 -- .../LifetimeSafety/FactsGenerator.cpp | 348 ---- .../LifetimeSafety/LifetimeSafety.cpp | 77 - .../Analysis/LifetimeSafety/LiveOrigins.cpp | 180 -- .../LifetimeSafety/LoanPropagation.cpp | 138 -- clang/lib/Analysis/LifetimeSafety/Loans.cpp | 18 - clang/lib/Analysis/LifetimeSafety/Origins.cpp | 89 - clang/lib/Sema/AnalysisBasedWarnings.cpp | 2 +- clang/lib/Sema/CMakeLists.txt | 1 - clang/lib/Sema/CheckExprLifetime.cpp | 2 +- clang/lib/Sema/SemaAPINotes.cpp | 2 +- .../unittests/Analysis/LifetimeSafetyTest.cpp | 25 +- llvm/include/llvm/ADT/ImmutableSet.h | 2 - 31 files changed, 1754 insertions(+), 2207 deletions(-) rename clang/include/clang/Analysis/Analyses/{LifetimeSafety => }/LifetimeAnnotations.h (95%) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h rename clang/lib/Analysis/{LifetimeSafety => }/LifetimeAnnotations.cpp (94%) create mode 100644 clang/lib/Analysis/LifetimeSafety.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/CMakeLists.txt delete mode 100644 clang/lib/Analysis/LifetimeSafety/Checker.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/Dataflow.h delete mode 100644 clang/lib/Analysis/LifetimeSafety/Facts.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/Loans.cpp delete mode 100644 clang/lib/Analysis/LifetimeSafety/Origins.cpp diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index 786f6f5b215c9..f38d27a85d39a 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1083,8 +1083,8 @@ clang:openmp: - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety/** - - clang/lib/Analysis/LifetimeSafety/** + - clang/include/clang/Analysis/Analyses/LifetimeSafety* + - clang/lib/Analysis/LifetimeSafety* - clang/unittests/Analysis/LifetimeSafety* - clang/test/Sema/*lifetime-safety* - clang/test/Sema/*lifetime-analysis* diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h similarity index 95% rename from clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h rename to clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h index f02969e0a9563..229d16c20b0f8 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h @@ -12,7 +12,8 @@ #include "clang/AST/DeclCXX.h" -namespace clang ::lifetimes { +namespace clang { +namespace lifetimes { /// Returns the most recent declaration of the method to ensure all /// lifetime-bound attributes from redeclarations are considered. @@ -37,7 +38,7 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// lifetimebound, either due to an explicit lifetimebound attribute on the /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); - -} // namespace clang::lifetimes +} // namespace lifetimes +} // namespace clang #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000000000..e54fc26df28ea --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,183 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the entry point for a dataflow-based static analysis +// that checks for C++ lifetime violations. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/StringMap.h" +#include + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +// Forward declarations of internal types. +class Fact; +class FactManager; +class LoanPropagationAnalysis; +class ExpiredLoansAnalysis; +class LiveOriginAnalysis; +struct LifetimeFactory; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +using LoanID = ID; +using OriginID = ID; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet; +using OriginSet = llvm::ImmutableSet; +using OriginLoanMap = llvm::ImmutableMap; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + ~LifetimeSafetyAnalysis(); + + void run(); + + /// Returns the set of loans an origin holds at a specific program point. + LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; + + /// Returns the set of origins that are live at a specific program point, + /// along with the confidence level of their liveness. + /// + /// An origin is considered live if there are potential future uses of that + /// origin after the given program point. The confidence level indicates + /// whether the origin is definitely live (Definite) due to being domintated + /// by a set of uses or only possibly live (Maybe) only on some but not all + /// control flow paths. + std::vector> + getLiveOriginsAtPoint(ProgramPoint PP) const; + + /// Finds the OriginID for a given declaration. + /// Returns a null optional if not found. + std::optional getOriginIDForDecl(const ValueDecl *D) const; + + /// Finds the LoanID's for the loan created with the specific variable as + /// their Path. + std::vector getLoanIDForVar(const VarDecl *VD) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_")` as test points. This method returns + /// a map from the annotation string () to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap getTestPoints() const; + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + std::unique_ptr Factory; + std::unique_ptr FactMgr; + std::unique_ptr LoanPropagation; + std::unique_ptr LiveOrigins; +}; +} // namespace internal +} // namespace clang::lifetimes + +namespace llvm { +template +struct DenseMapInfo> { + using ID = clang::lifetimes::internal::ID; + + static inline ID getEmptyKey() { + return {DenseMapInfo::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h deleted file mode 100644 index 03636be7d00c3..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h +++ /dev/null @@ -1,35 +0,0 @@ -//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines and enforces the lifetime safety policy. It detects -// use-after-free errors by examining loan expiration points and checking if -// any live origins hold the expired loans. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" - -namespace clang::lifetimes::internal { - -/// Runs the lifetime checker, which detects use-after-free errors by -/// examining loan expiration points and checking if any live origins hold -/// the expired loan. -void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, - const LiveOriginsAnalysis &LiveOrigins, - const FactManager &FactMgr, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter); - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h deleted file mode 100644 index 6a90aeb01e638..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h +++ /dev/null @@ -1,232 +0,0 @@ -//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines Facts, which are atomic lifetime-relevant events (such as -// loan issuance, loan expiration, origin flow, and use), and the FactManager, -// which manages the storage and retrieval of facts for each CFG block. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include - -namespace clang::lifetimes::internal { -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template const T *getAs() const { - if (T::classof(this)) - return static_cast(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const; -}; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override; -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override; -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override; -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override; -}; - -class FactManager { -public: - llvm::ArrayRef getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate(); - return new (Mem) FactType(std::forward(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; - - /// Retrieves program points that were specially marked in the source code - /// for testing. - /// - /// The analysis recognizes special function calls of the form - /// `void("__lifetime_test_point_")` as test points. This method returns - /// a map from the annotation string () to the corresponding - /// `ProgramPoint`. This allows test harnesses to query the analysis state at - /// user-defined locations in the code. - /// \note This is intended for testing only. - llvm::StringMap getTestPoints() const; - - LoanManager &getLoanMgr() { return LoanMgr; } - const LoanManager &getLoanMgr() const { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - const OriginManager &getOriginMgr() const { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h deleted file mode 100644 index 5e58abee2bbb3..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ /dev/null @@ -1,106 +0,0 @@ -//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the FactsGenerator, which traverses the AST to generate -// lifetime-relevant facts (such as loan issuance, expiration, origin flow, -// and use) from CFG statements. These facts are used by the dataflow analyses -// to track pointer lifetimes and detect use-after-free errors. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H - -#include "clang/AST/StmtVisitor.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/SmallVector.h" - -namespace clang::lifetimes::internal { - -class FactsGenerator : public ConstStmtVisitor { - using Base = ConstStmtVisitor; - -public: - FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run(); - - void VisitDeclStmt(const DeclStmt *DS); - void VisitDeclRefExpr(const DeclRefExpr *DRE); - void VisitCXXConstructExpr(const CXXConstructExpr *CCE); - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); - void VisitCallExpr(const CallExpr *CE); - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); - void VisitUnaryOperator(const UnaryOperator *UO); - void VisitReturnStmt(const ReturnStmt *RS); - void VisitBinaryOperator(const BinaryOperator *BO); - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); - void VisitInitListExpr(const InitListExpr *ILE); - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); - -private: - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE); - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef Args, - bool IsGslConstruction = false); - - template - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE); - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE); - - void markUseAsWrite(const DeclRefExpr *DRE); - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap UseFacts; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h deleted file mode 100644 index 91ffbb169f947..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h +++ /dev/null @@ -1,87 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the main entry point and orchestrator for the C++ Lifetime -// Safety Analysis. It coordinates the entire analysis pipeline: fact -// generation, loan propagation, live origins analysis, and enforcement of -// lifetime safety policy. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/AnalysisDeclContext.h" - -namespace clang::lifetimes { - -/// Enum to track the confidence level of a potential error. -enum class Confidence : uint8_t { - None, - Maybe, // Reported as a potential error (-Wlifetime-safety-strict) - Definite // Reported as a definite error (-Wlifetime-safety-permissive) -}; - -class LifetimeSafetyReporter { -public: - LifetimeSafetyReporter() = default; - virtual ~LifetimeSafetyReporter() = default; - - virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, - SourceLocation FreeLoc, - Confidence Confidence) {} -}; - -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - -namespace internal { -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; -}; - -/// Running the lifetime safety analysis and querying its results. It -/// encapsulates the various dataflow analyses. -class LifetimeSafetyAnalysis { -public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - - void run(); - - /// \note These are provided only for testing purposes. - LoanPropagationAnalysis &getLoanPropagation() const { - return *LoanPropagation; - } - LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } - FactManager &getFactManager() { return FactMgr; } - -private: - AnalysisDeclContext &AC; - LifetimeSafetyReporter *Reporter; - LifetimeFactory Factory; - FactManager FactMgr; - std::unique_ptr LiveOrigins; - std::unique_ptr LoanPropagation; -}; -} // namespace internal -} // namespace clang::lifetimes - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h deleted file mode 100644 index c4f5f0e9ae46c..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h +++ /dev/null @@ -1,97 +0,0 @@ -//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the LiveOriginAnalysis, a backward dataflow analysis that -// determines which origins are "live" at each program point. An origin is -// "live" at a program point if there's a potential future use of a pointer it -// is associated with. Liveness is "generated" by a use of an origin (e.g., a -// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being -// live) when the origin is replaced by flowing a different origin into it -// (e.g., an OriginFlow from an assignment that kills the destination). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/Support/Debug.h" - -namespace clang::lifetimes::internal { - -enum class LivenessKind : uint8_t { - Dead, // Not alive - Maybe, // Live on some path but not all paths (may-be-live) - Must // Live on all paths (must-be-live) -}; - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap; - -class LiveOriginsAnalysis { -public: - LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF); - ~LiveOriginsAnalysis(); - - /// Returns the set of origins that are live at a specific program point, - /// along with the the details of the liveness. - LivenessMap getLiveOriginsAt(ProgramPoint P) const; - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, - llvm::StringMap TestPoints) const; - -private: - class Impl; - std::unique_ptr PImpl; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h deleted file mode 100644 index 447d05ca898fd..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h +++ /dev/null @@ -1,48 +0,0 @@ -//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the LoanPropagationAnalysis, a forward dataflow analysis -// that tracks which loans each origin holds at each program point. Loans -// represent borrows of storage locations and are propagated through the -// program as pointers are copied or assigned. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" - -namespace clang::lifetimes::internal { - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet; -using OriginLoanMap = llvm::ImmutableMap; - -class LoanPropagationAnalysis { -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory); - ~LoanPropagationAnalysis(); - - LoanSet getLoans(OriginID OID, ProgramPoint P) const; - -private: - class Impl; - std::unique_ptr PImpl; -}; - -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h deleted file mode 100644 index 7f5cf03fd3e5f..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h +++ /dev/null @@ -1,80 +0,0 @@ -//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the Loan and AccessPath structures, which represent -// borrows of storage locations, and the LoanManager, which manages the -// creation and retrieval of loans during lifetime analysis. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H - -#include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" -#include "llvm/Support/raw_ostream.h" - -namespace clang::lifetimes::internal { - -using LoanID = utils::ID; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { - return OS << ID.Value; -} - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const; -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllLoans; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h deleted file mode 100644 index ba138b078b379..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ /dev/null @@ -1,91 +0,0 @@ -//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines Origins, which represent the set of possible loans a -// pointer-like object could hold, and the OriginManager, which manages the -// creation, storage, and retrieval of origins for variables and expressions. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H - -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" - -namespace clang::lifetimes::internal { - -using OriginID = utils::ID; - -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { - return OS << ID.Value; -} - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast(); - } -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); - Origin &addOrigin(OriginID ID, const clang::Expr &E); - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E); - - OriginID get(const ValueDecl &D); - - OriginID getOrCreate(const Expr &E); - - const Origin &getOrigin(OriginID ID) const; - - llvm::ArrayRef getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D); - - void dump(OriginID OID, llvm::raw_ostream &OS) const; - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllOrigins; - llvm::DenseMap DeclToOriginID; - llvm::DenseMap ExprToOriginID; -}; -} // namespace clang::lifetimes::internal - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h deleted file mode 100644 index 4183cabe860a7..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h +++ /dev/null @@ -1,118 +0,0 @@ -//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// This file provides utilities for the lifetime safety analysis, including -// join operations for LLVM's immutable data structures. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H - -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" - -namespace clang::lifetimes::internal::utils { - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template struct ID { - uint32_t Value = 0; - - bool operator==(const ID &Other) const { return Value == Other.Value; } - bool operator!=(const ID &Other) const { return !(*this == Other); } - bool operator<(const ID &Other) const { return Value < Other.Value; } - ID operator++(int) { - ID Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -/// Computes the union of two ImmutableSets. -template -static llvm::ImmutableSet join(llvm::ImmutableSet A, - llvm::ImmutableSet B, - typename llvm::ImmutableSet::Factory &F) { - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template -static llvm::ImmutableMap -join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, - typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace clang::lifetimes::internal::utils - -namespace llvm { -template -struct DenseMapInfo> { - using ID = clang::lifetimes::internal::utils::ID; - - static inline ID getEmptyKey() { - return {DenseMapInfo::getEmptyKey()}; - } - - static inline ID getTombstoneKey() { - return {DenseMapInfo::getTombstoneKey()}; - } - - static unsigned getHashValue(const ID &Val) { - return DenseMapInfo::getHashValue(Val.Value); - } - - static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } -}; -} // namespace llvm - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 1dbd4153d856f..5a26f3eeea418 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,8 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -49,4 +51,3 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) -add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeAnnotations.cpp similarity index 94% rename from clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp rename to clang/lib/Analysis/LifetimeAnnotations.cpp index ad61a42c0eaeb..e79122475625e 100644 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,7 +13,8 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang::lifetimes { +namespace clang { +namespace lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -70,4 +71,5 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace clang::lifetimes +} // namespace lifetimes +} // namespace clang diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000000000..6196ec31fce1c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,1546 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include +#include +#include + +namespace clang::lifetimes { +namespace internal { + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; + } +}; + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast(); + } +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllLoans; +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); + } + Origin &addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); + } + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); + } + + OriginID get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; + } + + OriginID getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; + } + + const Origin &getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; + } + + llvm::ArrayRef getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; + } + + void dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; + } + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllOrigins; + llvm::DenseMap DeclToOriginID; + llvm::DenseMap ExprToOriginID; +}; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template const T *getAs() const { + if (T::classof(this)) + return static_cast(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast(K) << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; + } +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; + } +}; + +class UseFact : public Fact { + const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast(OM).get(*UseExpr); + } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; + } +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; + } +}; + +class FactManager { +public: + llvm::ArrayRef getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate(); + return new (Mem) FactType(std::forward(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + + LoanManager &getLoanMgr() { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +class FactGenerator : public ConstStmtVisitor { + using Base = ConstStmtVisitor; + +public: + FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional CS = Element.getAs()) + Visit(CS->getStmt()); + else if (std::optional DtorOpt = + Element.getAs()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); + } + + void VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact(L->ID, ExprOID)); + } + } + } + + void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } + } + + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } + } + + void VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); + } + + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back( + FactMgr.createFact(OID)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); + } + + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); + } + + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); + } + + void VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); + } + + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); + } + + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } + } + +private: + static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + return false; + } + + static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); + } + // Check if a type has an origin. + static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); + } + + static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); + } + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); + } + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction = false) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } + } + + /// Creates a loan for the storage path of a given declaration reference. + /// This function should be called whenever a DeclRefExpr represents a borrow. + /// \param DRE The declaration reference expression that initiates the borrow. + /// \return The new Loan on success, nullptr otherwise. + const Loan *createLoan(const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; + } + + template + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact(Annotation)); + return true; + } + } + return false; + } + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } + } + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } + } + + void markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); + } + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap UseFacts; +}; + +// ========================================================================= // +// Generic Dataflow Analysis +// ========================================================================= // + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &AllFacts; + + explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F) + : Cfg(C), AC(AC), AllFacts(F) {} + +public: + void run() { + Derived &D = static_cast(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = AllFacts.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; + +namespace utils { + +/// Computes the union of two ImmutableSets. +template +static llvm::ImmutableSet join(llvm::ImmutableSet A, + llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template +static llvm::ImmutableMap +join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace utils + +// ========================================================================= // +// Loan Propagation Analysis +// ========================================================================= // + +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct LoanPropagationLattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} + LoanPropagationLattice() = default; + + bool operator==(const LoanPropagationLattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const LoanPropagationLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " \n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +/// The analysis that tracks which loans belong to which origins. +class LoanPropagationAnalysis + : public DataflowAnalysis { + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; + +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return LoanPropagationLattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return LoanPropagationLattice( + OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } +}; + +// ========================================================================= // +// Live Origins Analysis +// ========================================================================= // +// +// A backward dataflow analysis that determines which origins are "live" at each +// program point. An origin is "live" at a program point if there's a potential +// future use of the pointer it represents. Liveness is "generated" by a read of +// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being +// live) when its loan set is overwritten (e.g. a OriginFlow killing the +// destination origin). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// ========================================================================= // + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap; + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct LivenessLattice { + LivenessMap LiveOrigins; + + LivenessLattice() : LiveOrigins(nullptr) {}; + + explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const LivenessLattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const LivenessLattice &Other) const { + return !(*this == Other); + } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class LiveOriginAnalysis + : public DataflowAnalysis { + FactManager &FactMgr; + LivenessMap::Factory &Factory; + +public: + LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOrigins(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : LSA.getTestPoints()) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } +}; + +// ========================================================================= // +// Lifetime checker and Error reporter +// ========================================================================= // + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap FinalWarningsMap; + LoanPropagationAnalysis &LoanPropagation; + LiveOriginAnalysis &LiveOrigins; + FactManager &FactMgr; + AnalysisDeclContext &ADC; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, + FactManager &FM, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) + : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), + Reporter(Reporter) {} + + void run() { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + for (const CFGBlock *B : *ADC.getAnalysis()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; + +// ========================================================================= // +// LifetimeSafetyAnalysis Class Implementation +// ========================================================================= // + +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + llvm::BumpPtrAllocator Allocator; + OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; +}; + +// We need this here for unique_ptr with forward declared class. +LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter), Factory(std::make_unique()), + FactMgr(std::make_unique()) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactGenerator FactGen(*FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique( + Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); + LoanPropagation->run(); + + LiveOrigins = std::make_unique( + Cfg, AC, *FactMgr, Factory->LivenessMapFactory); + LiveOrigins->run(); + DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); + + LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, + Reporter); + Checker.run(); +} + +LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, + ProgramPoint PP) const { + assert(LoanPropagation && "Analysis has not been run."); + return LoanPropagation->getLoans(OID, PP); +} + +std::optional +LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { + assert(FactMgr && "FactManager not initialized"); + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` logic + // in a const-query context if that becomes an issue. + return FactMgr->getOriginMgr().get(*D); +} + +std::vector +LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { + assert(FactMgr && "FactManager not initialized"); + std::vector Result; + for (const Loan &L : FactMgr->getLoanMgr().getLoans()) + if (L.Path.D == VD) + Result.push_back(L.ID); + return Result; +} + +std::vector> +LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { + assert(LiveOrigins && "LiveOriginAnalysis has not been run."); + std::vector> Result; + for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) + Result.push_back({OID, Info.Kind}); + return Result; +} + +llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { + assert(FactMgr && "FactManager not initialized"); + llvm::StringMap AnnotationToPointMap; + for (const CFGBlock *Block : *AC.getCFG()) { + for (const Fact *F : FactMgr->getFacts(Block)) { + if (const auto *TPF = F->getAs()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt deleted file mode 100644 index 85842928770cd..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_clang_library(clangAnalysisLifetimeSafety - Checker.cpp - Facts.cpp - FactsGenerator.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp - LiveOrigins.cpp - Loans.cpp - LoanPropagation.cpp - Origins.cpp - ) - diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp deleted file mode 100644 index c443c3a5d4f9b..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/Checker.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the LifetimeChecker, which detects use-after-free -// errors by checking if live origins hold loans that have expired. -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" -#include "clang/AST/Expr.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" - -namespace clang::lifetimes::internal { - -static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); -} - -namespace { - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap FinalWarningsMap; - const LoanPropagationAnalysis &LoanPropagation; - const LiveOriginsAnalysis &LiveOrigins; - const FactManager &FactMgr; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, - const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, - AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) - : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), - Reporter(Reporter) { - for (const CFGBlock *B : *ADC.getAnalysis()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; -} // namespace - -void runLifetimeChecker(const LoanPropagationAnalysis &LP, - const LiveOriginsAnalysis &LO, - const FactManager &FactMgr, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h deleted file mode 100644 index 2f7bcb6e5dc81..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/Dataflow.h +++ /dev/null @@ -1,188 +0,0 @@ -//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a generic, policy-based driver for dataflow analyses. -// It provides a flexible framework that combines the dataflow runner and -// transfer functions, allowing derived classes to implement specific analyses -// by defining their lattice, join, and transfer functions. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include - -namespace clang::lifetimes::internal { - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &FactMgr; - - explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, - FactManager &FactMgr) - : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} - -public: - void run() { - Derived &D = static_cast(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = FactMgr.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; -} // namespace clang::lifetimes::internal -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp deleted file mode 100644 index 1aea64f864367..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ /dev/null @@ -1,102 +0,0 @@ -//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" - -namespace clang::lifetimes::internal { - -void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast(K) << ")\n"; -} - -void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; -} - -void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; -} - -void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; -} - -void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; -} - -void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; -} - -void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; -} - -llvm::StringMap FactManager::getTestPoints() const { - llvm::StringMap AnnotationToPointMap; - for (const CFGBlock *Block : BlockToFactsMap.keys()) { - for (const Fact *F : getFacts(Block)) { - if (const auto *TPF = F->getAs()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} - -void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp deleted file mode 100644 index 485308f5b2c3b..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ /dev/null @@ -1,348 +0,0 @@ -//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "llvm/Support/TimeProfiler.h" - -namespace clang::lifetimes::internal { - -static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr(); - return RD->hasAttr(); - } - return false; -} - -static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); -} -// Check if a type has an origin. -static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); -} - -static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); -} - -/// Creates a loan for the storage path of a given declaration reference. -/// This function should be called whenever a DeclRefExpr represents a borrow. -/// \param DRE The declaration reference expression that initiates the borrow. -/// \return The new Loan on success, nullptr otherwise. -static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; -} - -void FactsGenerator::run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional CS = Element.getAs()) - Visit(CS->getStmt()); - else if (std::optional DtorOpt = - Element.getAs()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } -} - -void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); -} - -void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(FactMgr, DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact(L->ID, ExprOID)); - } - } -} - -void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } -} - -void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } -} - -void FactsGenerator::VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); -} - -void FactsGenerator::VisitCXXNullPtrLiteralExpr( - const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); -} - -void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); -} - -void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } -} - -void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back(FactMgr.createFact(OID)); - } - } -} - -void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); -} - -void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); -} - -void FactsGenerator::VisitCXXFunctionalCastExpr( - const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); -} - -void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); -} - -void FactsGenerator::VisitMaterializeTemporaryExpr( - const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); -} - -void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } -} - -void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); -} - -/// Checks if a call-like expression creates a borrow by passing a value to a -/// reference parameter, creating an IssueFact if it does. -/// \param IsGslConstruction True if this is a GSL construction where all -/// argument origins should flow to the returned origin. -void FactsGenerator::handleFunctionCall(const Expr *Call, - const FunctionDecl *FD, - ArrayRef Args, - bool IsGslConstruction) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } -} - -/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. -/// If so, creates a `TestPointFact` and returns true. -bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact(Annotation)); - return true; - } - } - return false; -} - -void FactsGenerator::handleAssignment(const Expr *LHSExpr, - const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } -} - -// A DeclRefExpr will be treated as a use of the referenced decl. It will be -// checked for use-after-free unless it is later marked as being written to -// (e.g. on the left-hand side of an assignment). -void FactsGenerator::handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } -} - -void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp deleted file mode 100644 index 00c7ed90503e7..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the main LifetimeSafetyAnalysis class, which coordinates -// the various components (fact generation, loan propagation, live origins -// analysis, and checking) to detect lifetime safety violations in C++ code. -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" -#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" -#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include - -namespace clang::lifetimes { -namespace internal { - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactsGenerator FactGen(FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique( - Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); - - LiveOrigins = std::make_unique( - Cfg, AC, FactMgr, Factory.LivenessMapFactory); - DEBUG_WITH_TYPE("LiveOrigins", - LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); - - runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp deleted file mode 100644 index cddb3f3ce4c1c..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" -#include "Dataflow.h" -#include "llvm/Support/ErrorHandling.h" - -namespace clang::lifetimes::internal { -namespace { - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct Lattice { - LivenessMap LiveOrigins; - - Lattice() : LiveOrigins(nullptr) {}; - - explicit Lattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const Lattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const Lattice &Other) const { return !(*this == Other); } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " \n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class AnalysisImpl - : public DataflowAnalysis { - -public: - AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOriginsAt(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, - llvm::StringMap TestPoints) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : TestPoints) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } - -private: - FactManager &FactMgr; - LivenessMap::Factory &Factory; -}; -} // namespace - -// PImpl wrapper implementation -class LiveOriginsAnalysis::Impl : public AnalysisImpl { - using AnalysisImpl::AnalysisImpl; -}; - -LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F, - LivenessMap::Factory &SF) - : PImpl(std::make_unique(C, AC, F, SF)) { - PImpl->run(); -} - -LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; - -LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { - return PImpl->getLiveOriginsAt(P); -} - -void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, - llvm::StringMap TestPoints) const { - PImpl->dump(OS, TestPoints); -} -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp deleted file mode 100644 index 387097e705f94..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "Dataflow.h" -#include - -namespace clang::lifetimes::internal { -namespace { -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct Lattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit Lattice(const OriginLoanMap &S) : Origins(S) {} - Lattice() = default; - - bool operator==(const Lattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const Lattice &Other) const { return !(*this == Other); } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " \n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -class AnalysisImpl - : public DataflowAnalysis { -public: - AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return Lattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } - - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; -}; -} // namespace - -class LoanPropagationAnalysis::Impl final : public AnalysisImpl { - using AnalysisImpl::AnalysisImpl; -}; - -LoanPropagationAnalysis::LoanPropagationAnalysis( - const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : PImpl(std::make_unique(C, AC, F, OriginLoanMapFactory, - LoanSetFactory)) { - PImpl->run(); -} - -LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; - -LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { - return PImpl->getLoans(OID, P); -} -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp deleted file mode 100644 index 2c85a3c6083f3..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/Loans.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" - -namespace clang::lifetimes::internal { - -void Loan::dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp deleted file mode 100644 index ea51a75324e06..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" - -namespace clang::lifetimes::internal { - -void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; -} - -Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); -} - -Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); -} - -// TODO: Mark this method as const once we remove the call to getOrCreate. -OriginID OriginManager::get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); -} - -OriginID OriginManager::get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; -} - -OriginID OriginManager::getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; -} - -const Origin &OriginManager::getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; -} - -OriginID OriginManager::getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; -} - -} // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 4c1fe60dd2db9..7fd88a9ca73d6 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -31,7 +31,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 9da50ecf1823c..20155d94900ee 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -119,7 +119,6 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis - clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index 8aebf53c0e76f..e8a7ad3bd355a 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 57a95fc39dca4..d0acb27f6a360 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 0c051847f4d47..169b2d2d5e9b4 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" @@ -63,7 +63,7 @@ class LifetimeTestRunner { Analysis = std::make_unique(*AnalysisCtx, nullptr); Analysis->run(); - AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); + AnnotationToPointMap = Analysis->getTestPoints(); } LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } @@ -98,11 +98,10 @@ class LifetimeTestHelper { auto *VD = findDecl(VarName); if (!VD) return std::nullopt; - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` - // logic in a const-query context if that becomes an issue. - return const_cast(Analysis.getFactManager().getOriginMgr()) - .get(*VD); + auto OID = Analysis.getOriginIDForDecl(VD); + if (!OID) + ADD_FAILURE() << "Origin for '" << VarName << "' not found."; + return OID; } std::vector getLoansForVar(llvm::StringRef VarName) { @@ -111,10 +110,7 @@ class LifetimeTestHelper { ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } - std::vector LID; - for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) - if (L.Path.D == VD) - LID.push_back(L.ID); + std::vector LID = Analysis.getLoanIDForVar(VD); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {}; @@ -127,7 +123,7 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLoanPropagation().getLoans(OID, PP); + return Analysis.getLoansAtPoint(OID, PP); } std::optional>> @@ -135,10 +131,7 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - std::vector> Result; - for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) - Result.push_back({OID, Info.Kind}); - return Result; + return Analysis.getLiveOriginsAtPoint(PP); } private: diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index b737d5b227340..ac86f43b2048e 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,9 +21,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Signals.h" #include #include #include From 255a6dd4b58c43a42e2d1c448ee11e9c8432c57a Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Sat, 11 Oct 2025 18:21:01 +0000 Subject: [PATCH 21/35] Reapply "[LifetimeSafety] Reorganize code into modular components (#162474)" This reverts commit 24a5d8a9cae9b766351d2f500a190471aa23cb13. Add link libs to the new clang library --- .github/new-prs-labeler.yml | 4 +- .../clang/Analysis/Analyses/LifetimeSafety.h | 183 -- .../Analyses/LifetimeSafety/Checker.h | 35 + .../Analysis/Analyses/LifetimeSafety/Facts.h | 232 +++ .../Analyses/LifetimeSafety/FactsGenerator.h | 106 ++ .../LifetimeAnnotations.h | 7 +- .../Analyses/LifetimeSafety/LifetimeSafety.h | 87 + .../Analyses/LifetimeSafety/LiveOrigins.h | 97 ++ .../Analyses/LifetimeSafety/LoanPropagation.h | 48 + .../Analysis/Analyses/LifetimeSafety/Loans.h | 80 + .../Analyses/LifetimeSafety/Origins.h | 91 + .../Analysis/Analyses/LifetimeSafety/Utils.h | 118 ++ clang/lib/Analysis/CMakeLists.txt | 3 +- clang/lib/Analysis/LifetimeSafety.cpp | 1546 ----------------- .../Analysis/LifetimeSafety/CMakeLists.txt | 17 + clang/lib/Analysis/LifetimeSafety/Checker.cpp | 130 ++ clang/lib/Analysis/LifetimeSafety/Dataflow.h | 188 ++ clang/lib/Analysis/LifetimeSafety/Facts.cpp | 102 ++ .../LifetimeSafety/FactsGenerator.cpp | 348 ++++ .../LifetimeAnnotations.cpp | 8 +- .../LifetimeSafety/LifetimeSafety.cpp | 77 + .../Analysis/LifetimeSafety/LiveOrigins.cpp | 180 ++ .../LifetimeSafety/LoanPropagation.cpp | 138 ++ clang/lib/Analysis/LifetimeSafety/Loans.cpp | 18 + clang/lib/Analysis/LifetimeSafety/Origins.cpp | 89 + clang/lib/Sema/AnalysisBasedWarnings.cpp | 2 +- clang/lib/Sema/CMakeLists.txt | 1 + clang/lib/Sema/CheckExprLifetime.cpp | 2 +- clang/lib/Sema/SemaAPINotes.cpp | 2 +- .../unittests/Analysis/LifetimeSafetyTest.cpp | 25 +- llvm/include/llvm/ADT/ImmutableSet.h | 2 + 31 files changed, 2212 insertions(+), 1754 deletions(-) delete mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h rename clang/include/clang/Analysis/Analyses/{ => LifetimeSafety}/LifetimeAnnotations.h (95%) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h delete mode 100644 clang/lib/Analysis/LifetimeSafety.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/CMakeLists.txt create mode 100644 clang/lib/Analysis/LifetimeSafety/Checker.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Dataflow.h create mode 100644 clang/lib/Analysis/LifetimeSafety/Facts.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp rename clang/lib/Analysis/{ => LifetimeSafety}/LifetimeAnnotations.cpp (94%) create mode 100644 clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Loans.cpp create mode 100644 clang/lib/Analysis/LifetimeSafety/Origins.cpp diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index f38d27a85d39a..786f6f5b215c9 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1083,8 +1083,8 @@ clang:openmp: - llvm/test/Transforms/OpenMP/** clang:temporal-safety: - - clang/include/clang/Analysis/Analyses/LifetimeSafety* - - clang/lib/Analysis/LifetimeSafety* + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** - clang/unittests/Analysis/LifetimeSafety* - clang/test/Sema/*lifetime-safety* - clang/test/Sema/*lifetime-analysis* diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h deleted file mode 100644 index e54fc26df28ea..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ /dev/null @@ -1,183 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the entry point for a dataflow-based static analysis -// that checks for C++ lifetime violations. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Basic/SourceLocation.h" -#include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/StringMap.h" -#include - -namespace clang::lifetimes { - -/// Enum to track the confidence level of a potential error. -enum class Confidence : uint8_t { - None, - Maybe, // Reported as a potential error (-Wlifetime-safety-strict) - Definite // Reported as a definite error (-Wlifetime-safety-permissive) -}; - -enum class LivenessKind : uint8_t { - Dead, // Not alive - Maybe, // Live on some path but not all paths (may-be-live) - Must // Live on all paths (must-be-live) -}; - -class LifetimeSafetyReporter { -public: - LifetimeSafetyReporter() = default; - virtual ~LifetimeSafetyReporter() = default; - - virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, - SourceLocation FreeLoc, - Confidence Confidence) {} -}; - -/// The main entry point for the analysis. -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - -namespace internal { -// Forward declarations of internal types. -class Fact; -class FactManager; -class LoanPropagationAnalysis; -class ExpiredLoansAnalysis; -class LiveOriginAnalysis; -struct LifetimeFactory; - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template struct ID { - uint32_t Value = 0; - - bool operator==(const ID &Other) const { return Value == Other.Value; } - bool operator!=(const ID &Other) const { return !(*this == Other); } - bool operator<(const ID &Other) const { return Value < Other.Value; } - ID operator++(int) { - ID Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -using LoanID = ID; -using OriginID = ID; -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { - return OS << ID.Value; -} -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { - return OS << ID.Value; -} - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet; -using OriginSet = llvm::ImmutableSet; -using OriginLoanMap = llvm::ImmutableMap; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// Running the lifetime safety analysis and querying its results. It -/// encapsulates the various dataflow analyses. -class LifetimeSafetyAnalysis { -public: - LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter); - ~LifetimeSafetyAnalysis(); - - void run(); - - /// Returns the set of loans an origin holds at a specific program point. - LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const; - - /// Returns the set of origins that are live at a specific program point, - /// along with the confidence level of their liveness. - /// - /// An origin is considered live if there are potential future uses of that - /// origin after the given program point. The confidence level indicates - /// whether the origin is definitely live (Definite) due to being domintated - /// by a set of uses or only possibly live (Maybe) only on some but not all - /// control flow paths. - std::vector> - getLiveOriginsAtPoint(ProgramPoint PP) const; - - /// Finds the OriginID for a given declaration. - /// Returns a null optional if not found. - std::optional getOriginIDForDecl(const ValueDecl *D) const; - - /// Finds the LoanID's for the loan created with the specific variable as - /// their Path. - std::vector getLoanIDForVar(const VarDecl *VD) const; - - /// Retrieves program points that were specially marked in the source code - /// for testing. - /// - /// The analysis recognizes special function calls of the form - /// `void("__lifetime_test_point_")` as test points. This method returns - /// a map from the annotation string () to the corresponding - /// `ProgramPoint`. This allows test harnesses to query the analysis state at - /// user-defined locations in the code. - /// \note This is intended for testing only. - llvm::StringMap getTestPoints() const; - -private: - AnalysisDeclContext &AC; - LifetimeSafetyReporter *Reporter; - std::unique_ptr Factory; - std::unique_ptr FactMgr; - std::unique_ptr LoanPropagation; - std::unique_ptr LiveOrigins; -}; -} // namespace internal -} // namespace clang::lifetimes - -namespace llvm { -template -struct DenseMapInfo> { - using ID = clang::lifetimes::internal::ID; - - static inline ID getEmptyKey() { - return {DenseMapInfo::getEmptyKey()}; - } - - static inline ID getTombstoneKey() { - return {DenseMapInfo::getTombstoneKey()}; - } - - static unsigned getHashValue(const ID &Val) { - return DenseMapInfo::getHashValue(Val.Value); - } - - static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } -}; -} // namespace llvm - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h new file mode 100644 index 0000000000000..03636be7d00c3 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h @@ -0,0 +1,35 @@ +//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines and enforces the lifetime safety policy. It detects +// use-after-free errors by examining loan expiration points and checking if +// any live origins hold the expired loans. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" + +namespace clang::lifetimes::internal { + +/// Runs the lifetime checker, which detects use-after-free errors by +/// examining loan expiration points and checking if any live origins hold +/// the expired loan. +void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter); + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h new file mode 100644 index 0000000000000..6a90aeb01e638 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -0,0 +1,232 @@ +//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Facts, which are atomic lifetime-relevant events (such as +// loan issuance, loan expiration, origin flow, and use), and the FactManager, +// which manages the storage and retrieval of facts for each CFG block. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include + +namespace clang::lifetimes::internal { +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin escapes the function by flowing into the return value. + ReturnOfOrigin, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template const T *getAs() const { + if (T::classof(this)) + return static_cast(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const; +}; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override; +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override; +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class UseFact : public Fact { + const Expr *UseExpr; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + + OriginID getUsedOrigin(const OriginManager &OM) const { + // TODO: Remove const cast and make OriginManager::get as const. + return const_cast(OM).get(*UseExpr); + } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override; +}; + +class FactManager { +public: + llvm::ArrayRef getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { + if (!NewFacts.empty()) + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + + template + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate(); + return new (Mem) FactType(std::forward(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_")` as test points. This method returns + /// a map from the annotation string () to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap getTestPoints() const; + + LoanManager &getLoanMgr() { return LoanMgr; } + const LoanManager &getLoanMgr() const { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + const OriginManager &getOriginMgr() const { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h new file mode 100644 index 0000000000000..5e58abee2bbb3 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -0,0 +1,106 @@ +//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FactsGenerator, which traverses the AST to generate +// lifetime-relevant facts (such as loan issuance, expiration, origin flow, +// and use) from CFG statements. These facts are used by the dataflow analyses +// to track pointer lifetimes and detect use-after-free errors. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H + +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::lifetimes::internal { + +class FactsGenerator : public ConstStmtVisitor { + using Base = ConstStmtVisitor; + +public: + FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run(); + + void VisitDeclStmt(const DeclStmt *DS); + void VisitDeclRefExpr(const DeclRefExpr *DRE); + void VisitCXXConstructExpr(const CXXConstructExpr *CCE); + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); + void VisitCallExpr(const CallExpr *CE); + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); + void VisitUnaryOperator(const UnaryOperator *UO); + void VisitReturnStmt(const ReturnStmt *RS); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); + void VisitInitListExpr(const InitListExpr *ILE); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); + +private: + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE); + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction = false); + + template + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE); + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE); + + void markUseAsWrite(const DeclRefExpr *DRE); + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector CurrentBlockFacts; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap UseFacts; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h similarity index 95% rename from clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h rename to clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h index 229d16c20b0f8..f02969e0a9563 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h @@ -12,8 +12,7 @@ #include "clang/AST/DeclCXX.h" -namespace clang { -namespace lifetimes { +namespace clang ::lifetimes { /// Returns the most recent declaration of the method to ensure all /// lifetime-bound attributes from redeclarations are considered. @@ -38,7 +37,7 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// lifetimebound, either due to an explicit lifetimebound attribute on the /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); -} // namespace lifetimes -} // namespace clang + +} // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h new file mode 100644 index 0000000000000..91ffbb169f947 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -0,0 +1,87 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the main entry point and orchestrator for the C++ Lifetime +// Safety Analysis. It coordinates the entire analysis pipeline: fact +// generation, loan propagation, live origins analysis, and enforcement of +// lifetime safety policy. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; +}; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + + void run(); + + /// \note These are provided only for testing purposes. + LoanPropagationAnalysis &getLoanPropagation() const { + return *LoanPropagation; + } + LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } + FactManager &getFactManager() { return FactMgr; } + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + LifetimeFactory Factory; + FactManager FactMgr; + std::unique_ptr LiveOrigins; + std::unique_ptr LoanPropagation; +}; +} // namespace internal +} // namespace clang::lifetimes + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h new file mode 100644 index 0000000000000..c4f5f0e9ae46c --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h @@ -0,0 +1,97 @@ +//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveOriginAnalysis, a backward dataflow analysis that +// determines which origins are "live" at each program point. An origin is +// "live" at a program point if there's a potential future use of a pointer it +// is associated with. Liveness is "generated" by a use of an origin (e.g., a +// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being +// live) when the origin is replaced by flowing a different origin into it +// (e.g., an OriginFlow from an assignment that kills the destination). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/Support/Debug.h" + +namespace clang::lifetimes::internal { + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + const UseFact *CausingUseFact; + + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(const UseFact *UF, LivenessKind K) + : CausingUseFact(UF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingUseFact); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap; + +class LiveOriginsAnalysis { +public: + LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF); + ~LiveOriginsAnalysis(); + + /// Returns the set of origins that are live at a specific program point, + /// along with the the details of the liveness. + LivenessMap getLiveOriginsAt(ProgramPoint P) const; + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h new file mode 100644 index 0000000000000..447d05ca898fd --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h @@ -0,0 +1,48 @@ +//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoanPropagationAnalysis, a forward dataflow analysis +// that tracks which loans each origin holds at each program point. Loans +// represent borrows of storage locations and are propagated through the +// program as pointers are copied or assigned. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal { + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet; +using OriginLoanMap = llvm::ImmutableMap; + +class LoanPropagationAnalysis { +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory); + ~LoanPropagationAnalysis(); + + LoanSet getLoans(OriginID OID, ProgramPoint P) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h new file mode 100644 index 0000000000000..7f5cf03fd3e5f --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h @@ -0,0 +1,80 @@ +//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Loan and AccessPath structures, which represent +// borrows of storage locations, and the LoanManager, which manages the +// creation and retrieval of loans during lifetime analysis. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang::lifetimes::internal { + +using LoanID = utils::ID; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is created. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + + Loan(LoanID id, AccessPath path, const Expr *IssueExpr) + : ID(id), Path(path), IssueExpr(IssueExpr) {} + + void dump(llvm::raw_ostream &OS) const; +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { + AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllLoans; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h new file mode 100644 index 0000000000000..ba138b078b379 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -0,0 +1,91 @@ +//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Origins, which represent the set of possible loans a +// pointer-like object could hold, and the OriginManager, which manages the +// creation, storage, and retrieval of origins for variables and expressions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" + +namespace clang::lifetimes::internal { + +using OriginID = utils::ID; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast(); + } +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); + Origin &addOrigin(OriginID ID, const clang::Expr &E); + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E); + + OriginID get(const ValueDecl &D); + + OriginID getOrCreate(const Expr &E); + + const Origin &getOrigin(OriginID ID) const; + + llvm::ArrayRef getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D); + + void dump(OriginID OID, llvm::raw_ostream &OS) const; + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllOrigins; + llvm::DenseMap DeclToOriginID; + llvm::DenseMap ExprToOriginID; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h new file mode 100644 index 0000000000000..4183cabe860a7 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h @@ -0,0 +1,118 @@ +//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file provides utilities for the lifetime safety analysis, including +// join operations for LLVM's immutable data structures. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H + +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal::utils { + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +/// Computes the union of two ImmutableSets. +template +static llvm::ImmutableSet join(llvm::ImmutableSet A, + llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template +static llvm::ImmutableMap +join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace clang::lifetimes::internal::utils + +namespace llvm { +template +struct DenseMapInfo> { + using ID = clang::lifetimes::internal::utils::ID; + + static inline ID getEmptyKey() { + return {DenseMapInfo::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 5a26f3eeea418..1dbd4153d856f 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,8 +21,6 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp - LifetimeAnnotations.cpp - LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -51,3 +49,4 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) +add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp deleted file mode 100644 index 6196ec31fce1c..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ /dev/null @@ -1,1546 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TimeProfiler.h" -#include -#include -#include - -namespace clang::lifetimes { -namespace internal { - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - /// The expression that creates the loan, e.g., &x. - const Expr *IssueExpr; - - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} - - void dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; - OS << Path.D->getNameAsString() << ")"; - } -}; - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast(); - } -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllLoans; -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); - } - Origin &addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); - } - - // TODO: Mark this method as const once we remove the call to getOrCreate. - OriginID get(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - // If the expression itself has no specific origin, and it's a reference - // to a declaration, its origin is that of the declaration it refers to. - // For pointer types, where we don't pre-emptively create an origin for the - // DeclRefExpr itself. - if (const auto *DRE = dyn_cast(&E)) - return get(*DRE->getDecl()); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - return getOrCreate(E); - } - - OriginID get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; - } - - OriginID getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; - } - - const Origin &getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; - } - - llvm::ArrayRef getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; - } - - void dump(OriginID OID, llvm::raw_ostream &OS) const { - OS << OID << " ("; - Origin O = getOrigin(OID); - if (const ValueDecl *VD = O.getDecl()) - OS << "Decl: " << VD->getNameAsString(); - else if (const Expr *E = O.getExpr()) - OS << "Expr: " << E->getStmtClassName(); - else - OS << "Unknown"; - OS << ")"; - } - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllOrigins; - llvm::DenseMap DeclToOriginID; - llvm::DenseMap ExprToOriginID; -}; - -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - /// This can also optionally kill the destination origin before flowing into - /// it. Otherwise, the source's loan set is merged into the destination's - /// loan set. - OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, - /// An origin is used (eg. appears as l-value expression like DeclRefExpr). - Use, - /// A marker for a specific point in the code, for testing. - TestPoint, - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template const T *getAs() const { - if (T::classof(this)) - return static_cast(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const { - OS << "Fact (Kind: " << static_cast(K) << ")\n"; - } -}; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &OM) const override { - OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ", ToOrigin: "; - OM.dump(getOriginID(), OS); - OS << ")\n"; - } -}; - -class ExpireFact : public Fact { - LoanID LID; - SourceLocation ExpiryLoc; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID, SourceLocation ExpiryLoc) - : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} - - LoanID getLoanID() const { return LID; } - SourceLocation getExpiryLoc() const { return ExpiryLoc; } - - void dump(llvm::raw_ostream &OS, const LoanManager &LM, - const OriginManager &) const override { - OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); - OS << ")\n"; - } -}; - -class OriginFlowFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - // True if the destination origin should be killed (i.e., its current loans - // cleared) before the source origin's loans are flowed into it. - bool KillDest; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::OriginFlow; - } - - OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) - : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), - KillDest(KillDest) {} - - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - bool getKillDest() const { return KillDest; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "OriginFlow (Dest: "; - OM.dump(getDestOriginID(), OS); - OS << ", Src: "; - OM.dump(getSrcOriginID(), OS); - OS << (getKillDest() ? "" : ", Merge"); - OS << ")\n"; - } -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); - OS << ")\n"; - } -}; - -class UseFact : public Fact { - const Expr *UseExpr; - // True if this use is a write operation (e.g., left-hand side of assignment). - // Write operations are exempted from use-after-free checks. - bool IsWritten = false; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} - - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast(OM).get(*UseExpr); - } - const Expr *getUseExpr() const { return UseExpr; } - void markAsWritten() { IsWritten = true; } - bool isWritten() const { return IsWritten; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const override { - OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); - OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; - } -}; - -/// A dummy-fact used to mark a specific point in the code for testing. -/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. -class TestPointFact : public Fact { - StringRef Annotation; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } - - explicit TestPointFact(StringRef Annotation) - : Fact(Kind::TestPoint), Annotation(Annotation) {} - - StringRef getAnnotation() const { return Annotation; } - - void dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &) const override { - OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; - } -}; - -class FactManager { -public: - llvm::ArrayRef getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate(); - return new (Mem) FactType(std::forward(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } - } - llvm::dbgs() << " End of Block\n"; - } - } - - LoanManager &getLoanMgr() { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; - -class FactGenerator : public ConstStmtVisitor { - using Base = ConstStmtVisitor; - -public: - FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional CS = Element.getAs()) - Visit(CS->getStmt()); - else if (std::optional DtorOpt = - Element.getAs()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } - } - - void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast(D)) - if (hasOrigin(VD)) - if (const Expr *InitExpr = VD->getInit()) - killAndFlowOrigin(*VD, *InitExpr); - } - - void VisitDeclRefExpr(const DeclRefExpr *DRE) { - handleUse(DRE); - // For non-pointer/non-view types, a reference to the variable's storage - // is a borrow. We create a loan for it. - // For pointer/view types, we stick to the existing model for now and do - // not create an extra origin for the l-value expression itself. - - // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is - // not sufficient to model the different levels of indirection. The current - // single-origin model cannot distinguish between a loan to the variable's - // storage and a loan to what it points to. A multi-origin model would be - // required for this. - if (!isPointerType(DRE->getType())) { - if (const Loan *L = createLoan(DRE)) { - OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); - CurrentBlockFacts.push_back( - FactMgr.createFact(L->ID, ExprOID)); - } - } - } - - void VisitCXXConstructExpr(const CXXConstructExpr *CCE) { - if (isGslPointerType(CCE->getType())) { - handleGSLPointerConstruction(CCE); - return; - } - } - - void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { - // Specifically for conversion operators, - // like `std::string_view p = std::string{};` - if (isGslPointerType(MCE->getType()) && - isa(MCE->getCalleeDecl())) { - // The argument is the implicit object itself. - handleFunctionCall(MCE, MCE->getMethodDecl(), - {MCE->getImplicitObjectArgument()}, - /*IsGslConstruction=*/true); - } - if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { - // Construct the argument list, with the implicit 'this' object as the - // first argument. - llvm::SmallVector Args; - Args.push_back(MCE->getImplicitObjectArgument()); - Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); - - handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); - } - } - - void VisitCallExpr(const CallExpr *CE) { - handleFunctionCall(CE, CE->getDirectCallee(), - {CE->getArgs(), CE->getNumArgs()}); - } - - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); - } - - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE)) - return; - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - killAndFlowOrigin(*ICE, *ICE->getSubExpr()); - } - - void VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - // Taking address of a pointer-type expression is not yet supported and - // will be supported in multi-origin model. - if (isPointerType(SubExpr->getType())) - return; - // The origin of an address-of expression (e.g., &x) is the origin of - // its sub-expression (x). This fact will cause the dataflow analysis - // to propagate any loans held by the sub-expression's origin to the - // origin of this UnaryOperator expression. - killAndFlowOrigin(*UO, *SubExpr); - } - } - - void VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr)) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back( - FactMgr.createFact(OID)); - } - } - } - - void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) - handleAssignment(BO->getLHS(), BO->getRHS()); - } - - void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { - // Assignment operators have special "kill-then-propagate" semantics - // and are handled separately. - if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { - handleAssignment(OCE->getArg(0), OCE->getArg(1)); - return; - } - handleFunctionCall(OCE, OCE->getDirectCallee(), - {OCE->getArgs(), OCE->getNumArgs()}, - /*IsGslConstruction=*/false); - } - - void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) { - // Check if this is a test point marker. If so, we are done with this - // expression. - if (handleTestPoint(FCE)) - return; - if (isGslPointerType(FCE->getType())) - killAndFlowOrigin(*FCE, *FCE->getSubExpr()); - } - - void VisitInitListExpr(const InitListExpr *ILE) { - if (!hasOrigin(ILE)) - return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) - killAndFlowOrigin(*ILE, *ILE->getInit(0)); - } - - void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE) { - if (!hasOrigin(MTE)) - return; - // A temporary object's origin is the same as the origin of the - // expression that initializes it. - killAndFlowOrigin(*MTE, *MTE->getSubExpr()); - } - - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); - } - } - -private: - static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr(); - return RD->hasAttr(); - } - return false; - } - - static bool isPointerType(QualType QT) { - return QT->isPointerOrReferenceType() || isGslPointerType(QT); - } - // Check if a type has an origin. - static bool hasOrigin(const Expr *E) { - return E->isGLValue() || isPointerType(E->getType()); - } - - static bool hasOrigin(const VarDecl *VD) { - return isPointerType(VD->getType()); - } - - void handleGSLPointerConstruction(const CXXConstructExpr *CCE) { - assert(isGslPointerType(CCE->getType())); - if (CCE->getNumArgs() != 1) - return; - if (hasOrigin(CCE->getArg(0))) - killAndFlowOrigin(*CCE, *CCE->getArg(0)); - else - // This could be a new borrow. - handleFunctionCall(CCE, CCE->getConstructor(), - {CCE->getArgs(), CCE->getNumArgs()}, - /*IsGslConstruction=*/true); - } - - /// Checks if a call-like expression creates a borrow by passing a value to a - /// reference parameter, creating an IssueFact if it does. - /// \param IsGslConstruction True if this is a GSL construction where all - /// argument origins should flow to the returned origin. - void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, - ArrayRef Args, - bool IsGslConstruction = false) { - // Ignore functions returning values with no origin. - if (!FD || !hasOrigin(Call)) - return; - auto IsArgLifetimeBound = [FD](unsigned I) -> bool { - const ParmVarDecl *PVD = nullptr; - if (const auto *Method = dyn_cast(FD); - Method && Method->isInstance()) { - if (I == 0) - // For the 'this' argument, the attribute is on the method itself. - return implicitObjectParamIsLifetimeBound(Method); - if ((I - 1) < Method->getNumParams()) - // For explicit arguments, find the corresponding parameter - // declaration. - PVD = Method->getParamDecl(I - 1); - } else if (I < FD->getNumParams()) - // For free functions or static methods. - PVD = FD->getParamDecl(I); - return PVD ? PVD->hasAttr() : false; - }; - if (Args.empty()) - return; - bool killedSrc = false; - for (unsigned I = 0; I < Args.size(); ++I) - if (IsGslConstruction || IsArgLifetimeBound(I)) { - if (!killedSrc) { - killedSrc = true; - killAndFlowOrigin(*Call, *Args[I]); - } else - flowOrigin(*Call, *Args[I]); - } - } - - /// Creates a loan for the storage path of a given declaration reference. - /// This function should be called whenever a DeclRefExpr represents a borrow. - /// \param DRE The declaration reference expression that initiates the borrow. - /// \return The new Loan on success, nullptr otherwise. - const Loan *createLoan(const DeclRefExpr *DRE) { - if (const auto *VD = dyn_cast(DRE->getDecl())) { - AccessPath Path(VD); - // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); - } - return nullptr; - } - - template - void flowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back(FactMgr.createFact( - DestOID, SrcOID, /*KillDest=*/false)); - } - - template - void killAndFlowOrigin(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); - } - - /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. - /// If so, creates a `TestPointFact` and returns true. - bool handleTestPoint(const CXXFunctionalCastExpr *FCE) { - if (!FCE->getType()->isVoidType()) - return false; - - const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); - if (const auto *SL = dyn_cast(SubExpr)) { - llvm::StringRef LiteralValue = SL->getString(); - const std::string Prefix = "__lifetime_test_point_"; - - if (LiteralValue.starts_with(Prefix)) { - StringRef Annotation = LiteralValue.drop_front(Prefix.length()); - CurrentBlockFacts.push_back( - FactMgr.createFact(Annotation)); - return true; - } - } - return false; - } - - void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { - if (!hasOrigin(LHSExpr)) - return; - // Find the underlying variable declaration for the left-hand side. - if (const auto *DRE_LHS = - dyn_cast(LHSExpr->IgnoreParenImpCasts())) { - markUseAsWrite(DRE_LHS); - if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { - // Kill the old loans of the destination origin and flow the new loans - // from the source origin. - killAndFlowOrigin(*VD_LHS, *RHSExpr); - } - } - } - - // A DeclRefExpr will be treated as a use of the referenced decl. It will be - // checked for use-after-free unless it is later marked as being written to - // (e.g. on the left-hand side of an assignment). - void handleUse(const DeclRefExpr *DRE) { - if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact(DRE); - CurrentBlockFacts.push_back(UF); - assert(!UseFacts.contains(DRE)); - UseFacts[DRE] = UF; - } - } - - void markUseAsWrite(const DeclRefExpr *DRE) { - if (!isPointerType(DRE->getType())) - return; - assert(UseFacts.contains(DRE)); - UseFacts[DRE]->markAsWritten(); - } - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector CurrentBlockFacts; - // To distinguish between reads and writes for use-after-free checks, this map - // stores the `UseFact` for each `DeclRefExpr`. We initially identify all - // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use - // corresponding to the left-hand side is updated to be a "write", thereby - // exempting it from the check. - llvm::DenseMap UseFacts; -}; - -// ========================================================================= // -// Generic Dataflow Analysis -// ========================================================================= // - -enum class Direction { Forward, Backward }; - -/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific -/// `Fact`. identified by a lifetime-related event (`Fact`). -/// -/// A `ProgramPoint` has "after" semantics: it represents the location -/// immediately after its corresponding `Fact`. -using ProgramPoint = const Fact *; - -/// A generic, policy-based driver for dataflow analyses. It combines -/// the dataflow runner and the transferer logic into a single class hierarchy. -/// -/// The derived class is expected to provide: -/// - A `Lattice` type. -/// - `StringRef getAnalysisName() const` -/// - `Lattice getInitialState();` The initial state of the analysis. -/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. -/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single -/// lifetime-relevant `Fact` transforms the lattice state. Only overloads -/// for facts relevant to the analysis need to be implemented. -/// -/// \tparam Derived The CRTP derived class that implements the specific -/// analysis. -/// \tparam LatticeType The dataflow lattice used by the analysis. -/// \tparam Dir The direction of the analysis (Forward or Backward). -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -template -class DataflowAnalysis { -public: - using Lattice = LatticeType; - using Base = DataflowAnalysis; - -private: - const CFG &Cfg; - AnalysisDeclContext &AC; - - /// The dataflow state before a basic block is processed. - llvm::DenseMap InStates; - /// The dataflow state after a basic block is processed. - llvm::DenseMap OutStates; - /// The dataflow state at a Program Point. - /// In a forward analysis, this is the state after the Fact at that point has - /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap PerPointStates; - - static constexpr bool isForward() { return Dir == Direction::Forward; } - -protected: - FactManager &AllFacts; - - explicit DataflowAnalysis(const CFG &C, AnalysisDeclContext &AC, - FactManager &F) - : Cfg(C), AC(AC), AllFacts(F) {} - -public: - void run() { - Derived &D = static_cast(*this); - llvm::TimeTraceScope Time(D.getAnalysisName()); - - using Worklist = - std::conditional_t; - Worklist W(Cfg, AC); - - const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); - InStates[Start] = D.getInitialState(); - W.enqueueBlock(Start); - - while (const CFGBlock *B = W.dequeue()) { - Lattice StateIn = *getInState(B); - Lattice StateOut = transferBlock(B, StateIn); - OutStates[B] = StateOut; - for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { - if (!AdjacentB) - continue; - std::optional OldInState = getInState(AdjacentB); - Lattice NewInState = - !OldInState ? StateOut : D.join(*OldInState, StateOut); - // Enqueue the adjacent block if its in-state has changed or if we have - // never seen it. - if (!OldInState || NewInState != *OldInState) { - InStates[AdjacentB] = NewInState; - W.enqueueBlock(AdjacentB); - } - } - } - } - -protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } - - std::optional getInState(const CFGBlock *B) const { - auto It = InStates.find(B); - if (It == InStates.end()) - return std::nullopt; - return It->second; - } - - Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } - - void dump() const { - const Derived *D = static_cast(this); - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << D->getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); - getOutState(&B).dump(llvm::dbgs()); - } - -private: - /// Computes the state at one end of a block by applying all its facts - /// sequentially to a given state from the other end. - Lattice transferBlock(const CFGBlock *Block, Lattice State) { - auto Facts = AllFacts.getFacts(Block); - if constexpr (isForward()) { - for (const Fact *F : Facts) { - State = transferFact(State, F); - PerPointStates[F] = State; - } - } else { - for (const Fact *F : llvm::reverse(Facts)) { - // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; - State = transferFact(State, F); - } - } - return State; - } - - Lattice transferFact(Lattice In, const Fact *F) { - assert(F); - Derived *D = static_cast(this); - switch (F->getKind()) { - case Fact::Kind::Issue: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Expire: - return D->transfer(In, *F->getAs()); - case Fact::Kind::OriginFlow: - return D->transfer(In, *F->getAs()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs()); - case Fact::Kind::Use: - return D->transfer(In, *F->getAs()); - case Fact::Kind::TestPoint: - return D->transfer(In, *F->getAs()); - } - llvm_unreachable("Unknown fact kind"); - } - -public: - Lattice transfer(Lattice In, const IssueFact &) { return In; } - Lattice transfer(Lattice In, const ExpireFact &) { return In; } - Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } - Lattice transfer(Lattice In, const UseFact &) { return In; } - Lattice transfer(Lattice In, const TestPointFact &) { return In; } -}; - -namespace utils { - -/// Computes the union of two ImmutableSets. -template -static llvm::ImmutableSet join(llvm::ImmutableSet A, - llvm::ImmutableSet B, - typename llvm::ImmutableSet::Factory &F) { - if (A.getHeight() < B.getHeight()) - std::swap(A, B); - for (const T &E : B) - A = F.add(A, E); - return A; -} - -/// Describes the strategy for joining two `ImmutableMap` instances, primarily -/// differing in how they handle keys that are unique to one of the maps. -/// -/// A `Symmetric` join is universally correct, while an `Asymmetric` join -/// serves as a performance optimization. The latter is applicable only when the -/// join operation possesses a left identity element, allowing for a more -/// efficient, one-sided merge. -enum class JoinKind { - /// A symmetric join applies the `JoinValues` operation to keys unique to - /// either map, ensuring that values from both maps contribute to the result. - Symmetric, - /// An asymmetric join preserves keys unique to the first map as-is, while - /// applying the `JoinValues` operation only to keys unique to the second map. - Asymmetric, -}; - -/// Computes the key-wise union of two ImmutableMaps. -// TODO(opt): This key-wise join is a performance bottleneck. A more -// efficient merge could be implemented using a Patricia Trie or HAMT -// instead of the current AVL-tree-based ImmutableMap. -template -static llvm::ImmutableMap -join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, - typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, - JoinKind Kind) { - if (A.getHeight() < B.getHeight()) - return join(B, A, F, JoinValues, Kind); - - // For each element in B, join it with the corresponding element in A - // (or with an empty value if it doesn't exist in A). - llvm::ImmutableMap Res = A; - for (const auto &Entry : B) { - const K &Key = Entry.first; - const V &ValB = Entry.second; - Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); - } - if (Kind == JoinKind::Symmetric) { - for (const auto &Entry : A) { - const K &Key = Entry.first; - const V &ValA = Entry.second; - if (!B.contains(Key)) - Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); - } - } - return Res; -} -} // namespace utils - -// ========================================================================= // -// Loan Propagation Analysis -// ========================================================================= // - -/// Represents the dataflow lattice for loan propagation. -/// -/// This lattice tracks which loans each origin may hold at a given program -/// point.The lattice has a finite height: An origin's loan set is bounded by -/// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. -struct LoanPropagationLattice { - /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit LoanPropagationLattice(const OriginLoanMap &S) : Origins(S) {} - LoanPropagationLattice() = default; - - bool operator==(const LoanPropagationLattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const LoanPropagationLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS) const { - OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) - OS << " \n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -/// The analysis that tracks which loans belong to which origins. -class LoanPropagationAnalysis - : public DataflowAnalysis { - OriginLoanMap::Factory &OriginLoanMapFactory; - LoanSet::Factory &LoanSetFactory; - -public: - LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - OriginLoanMap::Factory &OriginLoanMapFactory, - LoanSet::Factory &LoanSetFactory) - : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} - - using Base::transfer; - - StringRef getAnalysisName() const { return "LoanPropagation"; } - - Lattice getInitialState() { return Lattice{}; } - - /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. - Lattice join(Lattice A, Lattice B) { - OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, - [&](const LoanSet *S1, const LoanSet *S2) { - assert((S1 || S2) && "unexpectedly merging 2 empty sets"); - if (!S1) - return *S2; - if (!S2) - return *S1; - return utils::join(*S1, *S2, LoanSetFactory); - }, - // Asymmetric join is a performance win. For origins present only on one - // branch, the loan set can be carried over as-is. - utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); - } - - /// A new loan is issued to the origin. Old loans are erased. - Lattice transfer(Lattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return LoanPropagationLattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); - } - - /// A flow from source to destination. If `KillDest` is true, this replaces - /// the destination's loans with the source's. Otherwise, the source's loans - /// are merged into the destination's. - Lattice transfer(Lattice In, const OriginFlowFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - - LoanSet DestLoans = - F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); - LoanSet SrcLoans = getLoans(In, SrcOID); - LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - - return LoanPropagationLattice( - OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); - } - - LoanSet getLoans(OriginID OID, ProgramPoint P) const { - return getLoans(getState(P), OID); - } - -private: - LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) - return *Loans; - return LoanSetFactory.getEmptySet(); - } -}; - -// ========================================================================= // -// Live Origins Analysis -// ========================================================================= // -// -// A backward dataflow analysis that determines which origins are "live" at each -// program point. An origin is "live" at a program point if there's a potential -// future use of the pointer it represents. Liveness is "generated" by a read of -// origin's loan set (e.g., a `UseFact`) and is "killed" (i.e., it stops being -// live) when its loan set is overwritten (e.g. a OriginFlow killing the -// destination origin). -// -// This information is used for detecting use-after-free errors, as it allows us -// to check if a live origin holds a loan to an object that has already expired. -// ========================================================================= // - -/// Information about why an origin is live at a program point. -struct LivenessInfo { - /// The use that makes the origin live. If liveness is propagated from - /// multiple uses along different paths, this will point to the use appearing - /// earlier in the translation unit. - /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; - /// The kind of liveness of the origin. - /// `Must`: The origin is live on all control-flow paths from the current - /// point to the function's exit (i.e. the current point is dominated by a set - /// of uses). - /// `Maybe`: indicates it is live on some but not all paths. - /// - /// This determines the diagnostic's confidence level. - /// `Must`-be-alive at expiration implies a definite use-after-free, - /// while `Maybe`-be-alive suggests a potential one on some paths. - LivenessKind Kind; - - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} - - bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; - } - bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } - - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); - IDBuilder.Add(Kind); - } -}; - -using LivenessMap = llvm::ImmutableMap; - -/// The dataflow lattice for origin liveness analysis. -/// It tracks which origins are live, why they're live (which UseFact), -/// and the confidence level of that liveness. -struct LivenessLattice { - LivenessMap LiveOrigins; - - LivenessLattice() : LiveOrigins(nullptr) {}; - - explicit LivenessLattice(LivenessMap L) : LiveOrigins(L) {} - - bool operator==(const LivenessLattice &Other) const { - return LiveOrigins == Other.LiveOrigins; - } - - bool operator!=(const LivenessLattice &Other) const { - return !(*this == Other); - } - - void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { - if (LiveOrigins.isEmpty()) - OS << " \n"; - for (const auto &Entry : LiveOrigins) { - OriginID OID = Entry.first; - const LivenessInfo &Info = Entry.second; - OS << " "; - OM.dump(OID, OS); - OS << " is "; - switch (Info.Kind) { - case LivenessKind::Must: - OS << "definitely"; - break; - case LivenessKind::Maybe: - OS << "maybe"; - break; - case LivenessKind::Dead: - llvm_unreachable("liveness kind of live origins should not be dead."); - } - OS << " live at this point\n"; - } - } -}; - -/// The analysis that tracks which origins are live, with granular information -/// about the causing use fact and confidence level. This is a backward -/// analysis. -class LiveOriginAnalysis - : public DataflowAnalysis { - FactManager &FactMgr; - LivenessMap::Factory &Factory; - -public: - LiveOriginAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, - LivenessMap::Factory &SF) - : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} - using DataflowAnalysis::transfer; - - StringRef getAnalysisName() const { return "LiveOrigins"; } - - Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } - - /// Merges two lattices by combining liveness information. - /// When the same origin has different confidence levels, we take the lower - /// one. - Lattice join(Lattice L1, Lattice L2) const { - LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; - }; - auto CombineLivenessKind = [](LivenessKind K1, - LivenessKind K2) -> LivenessKind { - assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); - assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); - // Only return "Must" if both paths are "Must", otherwise Maybe. - if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) - return LivenessKind::Must; - return LivenessKind::Maybe; - }; - auto CombineLivenessInfo = [&](const LivenessInfo *L1, - const LivenessInfo *L2) -> LivenessInfo { - assert((L1 || L2) && "unexpectedly merging 2 empty sets"); - if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); - if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); - }; - return Lattice(utils::join( - L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, - // A symmetric join is required here. If an origin is live on one - // branch but not the other, its confidence must be demoted to `Maybe`. - utils::JoinKind::Symmetric)); - } - - /// A read operation makes the origin live with definite confidence, as it - /// dominates this program point. A write operation kills the liveness of - /// the origin since it overwrites the value. - Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); - // Write kills liveness. - if (UF.isWritten()) - return Lattice(Factory.remove(In.LiveOrigins, OID)); - // Read makes origin live with definite confidence (dominates this point). - return Lattice(Factory.add(In.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } - - /// Issuing a new loan to an origin kills its liveness. - Lattice transfer(Lattice In, const IssueFact &IF) { - return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); - } - - /// An OriginFlow kills the liveness of the destination origin if `KillDest` - /// is true. Otherwise, it propagates liveness from destination to source. - Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); - } - - LivenessMap getLiveOrigins(ProgramPoint P) const { - return getState(P).LiveOrigins; - } - - // Dump liveness values on all test points in the program. - void dump(llvm::raw_ostream &OS, const LifetimeSafetyAnalysis &LSA) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << getAnalysisName() << " results:\n"; - llvm::dbgs() << "==========================================\n"; - for (const auto &Entry : LSA.getTestPoints()) { - OS << "TestPoint: " << Entry.getKey() << "\n"; - getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); - } - } -}; - -// ========================================================================= // -// Lifetime checker and Error reporter -// ========================================================================= // - -/// Struct to store the complete context for a potential lifetime violation. -struct PendingWarning { - SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. - Confidence ConfidenceLevel; -}; - -class LifetimeChecker { -private: - llvm::DenseMap FinalWarningsMap; - LoanPropagationAnalysis &LoanPropagation; - LiveOriginAnalysis &LiveOrigins; - FactManager &FactMgr; - AnalysisDeclContext &ADC; - LifetimeSafetyReporter *Reporter; - -public: - LifetimeChecker(LoanPropagationAnalysis &LPA, LiveOriginAnalysis &LOA, - FactManager &FM, AnalysisDeclContext &ADC, - LifetimeSafetyReporter *Reporter) - : LoanPropagation(LPA), LiveOrigins(LOA), FactMgr(FM), ADC(ADC), - Reporter(Reporter) {} - - void run() { - llvm::TimeTraceScope TimeProfile("LifetimeChecker"); - for (const CFGBlock *B : *ADC.getAnalysis()) - for (const Fact *F : FactMgr.getFacts(B)) - if (const auto *EF = F->getAs()) - checkExpiry(EF); - issuePendingWarnings(); - } - - /// Checks for use-after-free errors when a loan expires. - /// - /// This method examines all live origins at the expiry point and determines - /// if any of them hold the expiring loan. If so, it creates a pending - /// warning with the appropriate confidence level based on the liveness - /// information. The confidence reflects whether the origin is definitely - /// or maybe live at this point. - /// - /// Note: This implementation considers only the confidence of origin - /// liveness. Future enhancements could also consider the confidence of loan - /// propagation (e.g., a loan may only be held on some execution paths). - void checkExpiry(const ExpireFact *EF) { - LoanID ExpiredLoan = EF->getLoanID(); - LivenessMap Origins = LiveOrigins.getLiveOrigins(EF); - Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; - for (auto &[OID, LiveInfo] : Origins) { - LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); - if (!HeldLoans.contains(ExpiredLoan)) - continue; - // Loan is defaulted. - Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); - if (CurConfidence < NewConfidence) { - CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; - } - } - if (!BadUse) - return; - // We have a use-after-free. - Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; - if (LastConf >= CurConfidence) - return; - FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), - /*ConfidenceLevel=*/CurConfidence}; - } - - static Confidence livenessKindToConfidence(LivenessKind K) { - switch (K) { - case LivenessKind::Must: - return Confidence::Definite; - case LivenessKind::Maybe: - return Confidence::Maybe; - case LivenessKind::Dead: - return Confidence::None; - } - llvm_unreachable("unknown liveness kind"); - } - - void issuePendingWarnings() { - if (!Reporter) - return; - for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); - } - } -}; - -// ========================================================================= // -// LifetimeSafetyAnalysis Class Implementation -// ========================================================================= // - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - llvm::BumpPtrAllocator Allocator; - OriginLoanMap::Factory OriginMapFactory{Allocator, /*canonicalize=*/false}; - LoanSet::Factory LoanSetFactory{Allocator, /*canonicalize=*/false}; - LivenessMap::Factory LivenessMapFactory{Allocator, /*canonicalize=*/false}; -}; - -// We need this here for unique_ptr with forward declared class. -LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default; - -LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) - : AC(AC), Reporter(Reporter), Factory(std::make_unique()), - FactMgr(std::make_unique()) {} - -void LifetimeSafetyAnalysis::run() { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - - const CFG &Cfg = *AC.getCFG(); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - - FactGenerator FactGen(*FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - /// 3. Collapse ExpireFacts belonging to same source location into a single - /// Fact. - LoanPropagation = std::make_unique( - Cfg, AC, *FactMgr, Factory->OriginMapFactory, Factory->LoanSetFactory); - LoanPropagation->run(); - - LiveOrigins = std::make_unique( - Cfg, AC, *FactMgr, Factory->LivenessMapFactory); - LiveOrigins->run(); - DEBUG_WITH_TYPE("LiveOrigins", LiveOrigins->dump(llvm::dbgs(), *this)); - - LifetimeChecker Checker(*LoanPropagation, *LiveOrigins, *FactMgr, AC, - Reporter); - Checker.run(); -} - -LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID, - ProgramPoint PP) const { - assert(LoanPropagation && "Analysis has not been run."); - return LoanPropagation->getLoans(OID, PP); -} - -std::optional -LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const { - assert(FactMgr && "FactManager not initialized"); - // This assumes the OriginManager's `get` can find an existing origin. - // We might need a `find` method on OriginManager to avoid `getOrCreate` logic - // in a const-query context if that becomes an issue. - return FactMgr->getOriginMgr().get(*D); -} - -std::vector -LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const { - assert(FactMgr && "FactManager not initialized"); - std::vector Result; - for (const Loan &L : FactMgr->getLoanMgr().getLoans()) - if (L.Path.D == VD) - Result.push_back(L.ID); - return Result; -} - -std::vector> -LifetimeSafetyAnalysis::getLiveOriginsAtPoint(ProgramPoint PP) const { - assert(LiveOrigins && "LiveOriginAnalysis has not been run."); - std::vector> Result; - for (auto &[OID, Info] : LiveOrigins->getLiveOrigins(PP)) - Result.push_back({OID, Info.Kind}); - return Result; -} - -llvm::StringMap LifetimeSafetyAnalysis::getTestPoints() const { - assert(FactMgr && "FactManager not initialized"); - llvm::StringMap AnnotationToPointMap; - for (const CFGBlock *Block : *AC.getCFG()) { - for (const Fact *F : FactMgr->getFacts(Block)) { - if (const auto *TPF = F->getAs()) { - StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && - "more than one test points with the same name"); - AnnotationToPointMap[PointName] = F; - } - } - } - return AnnotationToPointMap; -} -} // namespace internal - -void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, - LifetimeSafetyReporter *Reporter) { - internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); - Analysis.run(); -} -} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000000000..5874e8405baf6 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,17 @@ +add_clang_library(clangAnalysisLifetimeSafety + Checker.cpp + Facts.cpp + FactsGenerator.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp + LiveOrigins.cpp + Loans.cpp + LoanPropagation.cpp + Origins.cpp + + LINK_LIBS + clangAST + clangAnalysis + clangBasic + ) + diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp new file mode 100644 index 0000000000000..c443c3a5d4f9b --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -0,0 +1,130 @@ +//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LifetimeChecker, which detects use-after-free +// errors by checking if live origins hold loans that have expired. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); +} + +namespace { + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + const Expr *UseExpr; // Where the origin holding this loan was used. + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap FinalWarningsMap; + const LoanPropagationAnalysis &LoanPropagation; + const LiveOriginsAnalysis &LiveOrigins; + const FactManager &FactMgr; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, + AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) + : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), + Reporter(Reporter) { + for (const CFGBlock *B : *ADC.getAnalysis()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs()) + checkExpiry(EF); + issuePendingWarnings(); + } + + /// Checks for use-after-free errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); + Confidence CurConfidence = Confidence::None; + const UseFact *BadUse = nullptr; + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BadUse = LiveInfo.CausingUseFact; + } + } + if (!BadUse) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*UseExpr=*/BadUse->getUseExpr(), + /*ConfidenceLevel=*/CurConfidence}; + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan &L = FactMgr.getLoanMgr().getLoan(LID); + const Expr *IssueExpr = L.IssueExpr; + Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, + Warning.ExpiryLoc, Warning.ConfidenceLevel); + } + } +}; +} // namespace + +void runLifetimeChecker(const LoanPropagationAnalysis &LP, + const LiveOriginsAnalysis &LO, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h new file mode 100644 index 0000000000000..2f7bcb6e5dc81 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -0,0 +1,188 @@ +//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic, policy-based driver for dataflow analyses. +// It provides a flexible framework that combines the dataflow runner and +// transfer functions, allowing derived classes to implement specific analyses +// by defining their lattice, join, and transfer functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes::internal { + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap OutStates; + /// The dataflow state at a Program Point. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::DenseMap PerPointStates; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &FactMgr; + + explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, + FactManager &FactMgr) + : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} + +public: + void run() { + Derived &D = static_cast(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + using Worklist = + std::conditional_t; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + + std::optional getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = FactMgr.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PerPointStates[F] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PerPointStates[F] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs()); + case Fact::Kind::ReturnOfOrigin: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; +} // namespace clang::lifetimes::internal +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp new file mode 100644 index 0000000000000..1aea64f864367 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -0,0 +1,102 @@ +//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" + +namespace clang::lifetimes::internal { + +void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast(K) << ")\n"; +} + +void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const { + OS << "Issue ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; +} + +void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const { + OS << "Expire ("; + LM.getLoan(getLoanID()).dump(OS); + OS << ")\n"; +} + +void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; +} + +void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "ReturnOfOrigin ("; + OM.dump(getReturnedOriginID(), OS); + OS << ")\n"; +} + +void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "Use ("; + OM.dump(getUsedOrigin(OM), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; +} + +void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; +} + +llvm::StringMap FactManager::getTestPoints() const { + llvm::StringMap AnnotationToPointMap; + for (const CFGBlock *Block : BlockToFactsMap.keys()) { + for (const Fact *F : getFacts(Block)) { + if (const auto *TPF = F->getAs()) { + StringRef PointName = TPF->getAnnotation(); + assert(AnnotationToPointMap.find(PointName) == + AnnotationToPointMap.end() && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} + +void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + } + llvm::dbgs() << " End of Block\n"; + } +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp new file mode 100644 index 0000000000000..485308f5b2c3b --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -0,0 +1,348 @@ +//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static bool isGslPointerType(QualType QT) { + if (const auto *RD = QT->getAsCXXRecordDecl()) { + // We need to check the template definition for specializations. + if (auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + return false; +} + +static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); +} +// Check if a type has an origin. +static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); +} + +static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); +} + +/// Creates a loan for the storage path of a given declaration reference. +/// This function should be called whenever a DeclRefExpr represents a borrow. +/// \param DRE The declaration reference expression that initiates the borrow. +/// \return The new Loan on success, nullptr otherwise. +static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return &FactMgr.getLoanMgr().addLoan(Path, DRE); + } + return nullptr; +} + +void FactsGenerator::run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional CS = Element.getAs()) + Visit(CS->getStmt()); + else if (std::optional DtorOpt = + Element.getAs()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } +} + +void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); +} + +void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(FactMgr, DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact(L->ID, ExprOID)); + } + } +} + +void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } +} + +void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } +} + +void FactsGenerator::VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); +} + +void FactsGenerator::VisitCXXNullPtrLiteralExpr( + const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); +} + +void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); +} + +void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } +} + +void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back(FactMgr.createFact(OID)); + } + } +} + +void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); +} + +void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); +} + +void FactsGenerator::VisitCXXFunctionalCastExpr( + const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); +} + +void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); +} + +void FactsGenerator::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); +} + +void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact( + L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + } +} + +void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); +} + +/// Checks if a call-like expression creates a borrow by passing a value to a +/// reference parameter, creating an IssueFact if it does. +/// \param IsGslConstruction True if this is a GSL construction where all +/// argument origins should flow to the returned origin. +void FactsGenerator::handleFunctionCall(const Expr *Call, + const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } +} + +/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. +/// If so, creates a `TestPointFact` and returns true. +bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact(Annotation)); + return true; + } + } + return false; +} + +void FactsGenerator::handleAssignment(const Expr *LHSExpr, + const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } +} + +// A DeclRefExpr will be treated as a use of the referenced decl. It will be +// checked for use-after-free unless it is later marked as being written to +// (e.g. on the left-hand side of an assignment). +void FactsGenerator::handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact(DRE); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } +} + +void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp similarity index 94% rename from clang/lib/Analysis/LifetimeAnnotations.cpp rename to clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp index e79122475625e..ad61a42c0eaeb 100644 --- a/clang/lib/Analysis/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -13,8 +13,7 @@ #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" -namespace clang { -namespace lifetimes { +namespace clang::lifetimes { const FunctionDecl * getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { @@ -71,5 +70,4 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } -} // namespace lifetimes -} // namespace clang +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp new file mode 100644 index 0000000000000..00c7ed90503e7 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -0,0 +1,77 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main LifetimeSafetyAnalysis class, which coordinates +// the various components (fact generation, loan propagation, live origins +// analysis, and checking) to detect lifetime safety violations in C++ code. +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes { +namespace internal { + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + + FactsGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique( + Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); + + LiveOrigins = std::make_unique( + Cfg, AC, FactMgr, Factory.LivenessMapFactory); + DEBUG_WITH_TYPE("LiveOrigins", + LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); + + runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp new file mode 100644 index 0000000000000..cddb3f3ce4c1c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -0,0 +1,180 @@ +//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "Dataflow.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang::lifetimes::internal { +namespace { + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct Lattice { + LivenessMap LiveOrigins; + + Lattice() : LiveOrigins(nullptr) {}; + + explicit Lattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const Lattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class AnalysisImpl + : public DataflowAnalysis { + +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest UseFact to make the join hermetic and commutative. + auto CombineUseFact = [](const UseFact &A, + const UseFact &B) -> const UseFact * { + return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A + : &B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo( + CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOriginsAt(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : TestPoints) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } + +private: + FactManager &FactMgr; + LivenessMap::Factory &Factory; +}; +} // namespace + +// PImpl wrapper implementation +class LiveOriginsAnalysis::Impl : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F, + LivenessMap::Factory &SF) + : PImpl(std::make_unique(C, AC, F, SF)) { + PImpl->run(); +} + +LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; + +LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { + return PImpl->getLiveOriginsAt(P); +} + +void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + PImpl->dump(OS, TestPoints); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp new file mode 100644 index 0000000000000..387097e705f94 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -0,0 +1,138 @@ +//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "Dataflow.h" +#include + +namespace clang::lifetimes::internal { +namespace { +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +/// TODO(opt): To reduce the lattice size, propagate origins of declarations, +/// not expressions, because expressions are not visible across blocks. +struct Lattice { + /// The map from an origin to the set of loans it contains. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &S) : Origins(S) {} + Lattice() = default; + + bool operator==(const Lattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + if (Origins.isEmpty()) + OS << " \n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +class AnalysisImpl + : public DataflowAnalysis { +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + // TODO(opt): Keep the state small by removing origins which become dead. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.Origins, B.Origins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return Lattice(OriginLoanMapFactory.add( + In.Origins, OID, + LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + LoanSet getLoans(Lattice L, OriginID OID) const { + if (auto *Loans = L.Origins.lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } + + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; +}; +} // namespace + +class LoanPropagationAnalysis::Impl final : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LoanPropagationAnalysis::LoanPropagationAnalysis( + const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : PImpl(std::make_unique(C, AC, F, OriginLoanMapFactory, + LoanSetFactory)) { + PImpl->run(); +} + +LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; + +LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { + return PImpl->getLoans(OID, P); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp new file mode 100644 index 0000000000000..2c85a3c6083f3 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -0,0 +1,18 @@ +//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" + +namespace clang::lifetimes::internal { + +void Loan::dump(llvm::raw_ostream &OS) const { + OS << ID << " (Path: "; + OS << Path.D->getNameAsString() << ")"; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp new file mode 100644 index 0000000000000..ea51a75324e06 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -0,0 +1,89 @@ +//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" + +namespace clang::lifetimes::internal { + +void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); +} + +// TODO: Mark this method as const once we remove the call to getOrCreate. +OriginID OriginManager::get(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); +} + +OriginID OriginManager::get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; +} + +OriginID OriginManager::getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; +} + +const Origin &OriginManager::getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; +} + +OriginID OriginManager::getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 7fd88a9ca73d6..4c1fe60dd2db9 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -31,7 +31,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 20155d94900ee..9da50ecf1823c 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -119,6 +119,7 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e8a7ad3bd355a..8aebf53c0e76f 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,7 +10,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index d0acb27f6a360..57a95fc39dca4 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -17,7 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" -#include "clang/Analysis/Analyses/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 169b2d2d5e9b4..0c051847f4d47 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Testing/TestAST.h" @@ -63,7 +63,7 @@ class LifetimeTestRunner { Analysis = std::make_unique(*AnalysisCtx, nullptr); Analysis->run(); - AnnotationToPointMap = Analysis->getTestPoints(); + AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); } LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } @@ -98,10 +98,11 @@ class LifetimeTestHelper { auto *VD = findDecl(VarName); if (!VD) return std::nullopt; - auto OID = Analysis.getOriginIDForDecl(VD); - if (!OID) - ADD_FAILURE() << "Origin for '" << VarName << "' not found."; - return OID; + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` + // logic in a const-query context if that becomes an issue. + return const_cast(Analysis.getFactManager().getOriginMgr()) + .get(*VD); } std::vector getLoansForVar(llvm::StringRef VarName) { @@ -110,7 +111,10 @@ class LifetimeTestHelper { ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; return {}; } - std::vector LID = Analysis.getLoanIDForVar(VD); + std::vector LID; + for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) + if (L.Path.D == VD) + LID.push_back(L.ID); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {}; @@ -123,7 +127,7 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLoansAtPoint(OID, PP); + return Analysis.getLoanPropagation().getLoans(OID, PP); } std::optional>> @@ -131,7 +135,10 @@ class LifetimeTestHelper { ProgramPoint PP = Runner.getProgramPoint(Annotation); if (!PP) return std::nullopt; - return Analysis.getLiveOriginsAtPoint(PP); + std::vector> Result; + for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) + Result.push_back({OID, Info.Kind}); + return Result; } private: diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index ac86f43b2048e..b737d5b227340 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,7 +21,9 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Signals.h" #include #include #include From 5119f73deb4ab98b592aad31f2dad4bea39369c3 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Sat, 11 Oct 2025 18:45:33 -0400 Subject: [PATCH 22/35] [LifetimeSafety] Fix unittest shared build. NFC --- clang/unittests/Analysis/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 52e7d2854633d..e0acf436b37c7 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -11,6 +11,7 @@ add_clang_unittest(ClangAnalysisTests clangAST clangASTMatchers clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangFrontend clangLex From e055ca582f15e5f858a1d365308e09de3e2f918e Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 14 Oct 2025 16:26:44 +0000 Subject: [PATCH 23/35] [LifetimeSafety] Fix a crash caused by nullptr to llvm::isa --- clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 485308f5b2c3b..9b68de107e314 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -9,9 +9,11 @@ #include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/TimeProfiler.h" namespace clang::lifetimes::internal { +using llvm::isa_and_present; static bool isGslPointerType(QualType QT) { if (const auto *RD = QT->getAsCXXRecordDecl()) { @@ -108,7 +110,7 @@ void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { // Specifically for conversion operators, // like `std::string_view p = std::string{};` if (isGslPointerType(MCE->getType()) && - isa(MCE->getCalleeDecl())) { + isa_and_present(MCE->getCalleeDecl())) { // The argument is the implicit object itself. handleFunctionCall(MCE, MCE->getMethodDecl(), {MCE->getImplicitObjectArgument()}, From 288479518c8a31b7ebc9226bf972e91de124d3b5 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 4 Nov 2025 12:53:58 -0500 Subject: [PATCH 24/35] [LifetimeSafety] Fix Python path for Windows compatibility (#166291) Fix Python virtual environment paths for Windows in the Lifetime Safety Analysis benchmark ### What changed? - Added conditional path setting for the Python executable in the virtual environment based on the platform - For Windows, use `Scripts/python` path - For other platforms, use `bin/python` path - Updated the commands that use the Python virtual environment to use the platform-specific path ### How to test? `ninja benchmark_lifetime_safety_analysis` Fixes #166143 --- clang/test/Analysis/LifetimeSafety/CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt index ce37a29655668..2f9c2ac247497 100644 --- a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt +++ b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt @@ -15,6 +15,13 @@ set(LIFETIME_BENCHMARK_REQUIREMENTS set(LIFETIME_BENCHMARK_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark_results") +if(WIN32) + set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE + "${LIFETIME_BENCHMARK_VENV_DIR}/Scripts/python") +else() + set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE + "${LIFETIME_BENCHMARK_VENV_DIR}/bin/python") +endif() if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS}) @@ -22,7 +29,7 @@ if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREME add_custom_command( OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR} - COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} + COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS} COMMENT "Creating Python virtual environment and installing dependencies for benchmark..." ) @@ -32,7 +39,7 @@ if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREME # Main benchmark target add_custom_target(benchmark_lifetime_safety_analysis - COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT} + COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} ${LIFETIME_BENCHMARK_SCRIPT} --clang-binary ${LLVM_BINARY_DIR}/bin/clang --output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR} From ded247627a9a2384e0c619e85496666b3969695b Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 6 Nov 2025 22:02:39 -0500 Subject: [PATCH 25/35] [LifetimeSafety] Optimize loan propagation by separating persistent and block-local origins (#165789) ## Summary Optimizes the lifetime analysis loan propagation by preventing block-local origins from participating in expensive join operations at block boundaries. ## Problem The lifetime analysis currently performs join operations on all origins at every block boundary. However, many origins are block-local: they exist only to propagate loans between persistent origins across temporary declarations and expressions within a single block. Including them in joins is unnecessary and expensive. ## Solution This PR classifies origins into two categories: - **Persistent origins**: referenced in multiple basic blocks, must participate in joins - **Block-local origins**: confined to a single block, can be discarded at block boundaries ### Implementation 1. **Pre-pass** (`computePersistentOrigins`): Analyzes all facts in the CFG to identify which origins appear in multiple blocks 2. **Split lattice**: Maintains two separate `OriginLoanMap`s: - `PersistentOrigins`: participates in join operations - `BlockLocalOrigins`: used during block execution, discarded at boundaries 3. **Optimized join**: Only merges persistent origins; block-local map is reset to empty ### Benefits - Significantly reduces join complexity, especially in functions with many temporary locals - Performance scales with the ratio of temporary to persistent origins - Correctness preserved: block-local loan propagation still works within blocks Fixes: https://github.com/llvm/llvm-project/issues/165780 Fixes: https://github.com/llvm/llvm-project/issues/164625. It brings down regression from **150% to 2%**. --- --- .../Analysis/Analyses/LifetimeSafety/Facts.h | 9 +- .../Analyses/LifetimeSafety/Origins.h | 2 + clang/lib/Analysis/LifetimeSafety/Facts.cpp | 2 +- .../LifetimeSafety/FactsGenerator.cpp | 2 +- .../Analysis/LifetimeSafety/LiveOrigins.cpp | 2 +- .../LifetimeSafety/LoanPropagation.cpp | 135 +++++++++++++++--- .../unittests/Analysis/LifetimeSafetyTest.cpp | 9 +- 7 files changed, 132 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h index 6a90aeb01e638..063cb5c2d42ab 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -144,6 +144,7 @@ class ReturnOfOriginFact : public Fact { class UseFact : public Fact { const Expr *UseExpr; + OriginID OID; // True if this use is a write operation (e.g., left-hand side of assignment). // Write operations are exempted from use-after-free checks. bool IsWritten = false; @@ -151,12 +152,10 @@ class UseFact : public Fact { public: static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } - UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {} + UseFact(const Expr *UseExpr, OriginManager &OM) + : Fact(Kind::Use), UseExpr(UseExpr), OID(OM.get(*UseExpr)) {} - OriginID getUsedOrigin(const OriginManager &OM) const { - // TODO: Remove const cast and make OriginManager::get as const. - return const_cast(OM).get(*UseExpr); - } + OriginID getUsedOrigin() const { return OID; } const Expr *getUseExpr() const { return UseExpr; } void markAsWritten() { IsWritten = true; } bool isWritten() const { return IsWritten; } diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h index ba138b078b379..56b9010f41fa2 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -74,6 +74,8 @@ class OriginManager { OriginID getOrCreate(const ValueDecl &D); + unsigned getNumOrigins() const { return NextOriginID.Value; } + void dump(OriginID OID, llvm::raw_ostream &OS) const; private: diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index 1aea64f864367..ecde390cd6ca3 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -53,7 +53,7 @@ void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, const OriginManager &OM) const { OS << "Use ("; - OM.dump(getUsedOrigin(OM), OS); + OM.dump(getUsedOrigin(), OS); OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; } diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 9b68de107e314..bec8e1dabb0b5 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -333,7 +333,7 @@ void FactsGenerator::handleAssignment(const Expr *LHSExpr, // (e.g. on the left-hand side of an assignment). void FactsGenerator::handleUse(const DeclRefExpr *DRE) { if (isPointerType(DRE->getType())) { - UseFact *UF = FactMgr.createFact(DRE); + UseFact *UF = FactMgr.createFact(DRE, FactMgr.getOriginMgr()); CurrentBlockFacts.push_back(UF); assert(!UseFacts.contains(DRE)); UseFacts[DRE] = UF; diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp index cddb3f3ce4c1c..59f594e50fb46 100644 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -111,7 +111,7 @@ class AnalysisImpl /// dominates this program point. A write operation kills the liveness of /// the origin since it overwrites the value. Lattice transfer(Lattice In, const UseFact &UF) { - OriginID OID = UF.getUsedOrigin(FactMgr.getOriginMgr()); + OriginID OID = UF.getUsedOrigin(); // Write kills liveness. if (UF.isWritten()) return Lattice(Factory.remove(In.LiveOrigins, OID)); diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp index 387097e705f94..0e6c194123df8 100644 --- a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -5,36 +5,114 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" -#include "Dataflow.h" +#include #include +#include "Dataflow.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/raw_ostream.h" + namespace clang::lifetimes::internal { + +// Prepass to find persistent origins. An origin is persistent if it is +// referenced in more than one basic block. +static llvm::BitVector computePersistentOrigins(const FactManager &FactMgr, + const CFG &C) { + llvm::TimeTraceScope("ComputePersistentOrigins"); + unsigned NumOrigins = FactMgr.getOriginMgr().getNumOrigins(); + llvm::BitVector PersistentOrigins(NumOrigins); + + llvm::SmallVector OriginToFirstSeenBlock(NumOrigins, + nullptr); + for (const CFGBlock *B : C) { + for (const Fact *F : FactMgr.getFacts(B)) { + auto CheckOrigin = [&](OriginID OID) { + if (PersistentOrigins.test(OID.Value)) + return; + auto &FirstSeenBlock = OriginToFirstSeenBlock[OID.Value]; + if (FirstSeenBlock == nullptr) + FirstSeenBlock = B; + if (FirstSeenBlock != B) { + // We saw this origin in more than one block. + PersistentOrigins.set(OID.Value); + } + }; + + switch (F->getKind()) { + case Fact::Kind::Issue: + CheckOrigin(F->getAs()->getOriginID()); + break; + case Fact::Kind::OriginFlow: { + const auto *OF = F->getAs(); + CheckOrigin(OF->getDestOriginID()); + CheckOrigin(OF->getSrcOriginID()); + break; + } + case Fact::Kind::ReturnOfOrigin: + CheckOrigin(F->getAs()->getReturnedOriginID()); + break; + case Fact::Kind::Use: + CheckOrigin(F->getAs()->getUsedOrigin()); + break; + case Fact::Kind::Expire: + case Fact::Kind::TestPoint: + break; + } + } + } + return PersistentOrigins; +} + namespace { + /// Represents the dataflow lattice for loan propagation. /// /// This lattice tracks which loans each origin may hold at a given program /// point.The lattice has a finite height: An origin's loan set is bounded by /// the total number of loans in the function. -/// TODO(opt): To reduce the lattice size, propagate origins of declarations, -/// not expressions, because expressions are not visible across blocks. struct Lattice { /// The map from an origin to the set of loans it contains. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit Lattice(const OriginLoanMap &S) : Origins(S) {} + /// Origins that appear in multiple blocks. Participates in join operations. + OriginLoanMap PersistentOrigins = OriginLoanMap(nullptr); + /// Origins confined to a single block. Discarded at block boundaries. + OriginLoanMap BlockLocalOrigins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &Persistent, + const OriginLoanMap &BlockLocal) + : PersistentOrigins(Persistent), BlockLocalOrigins(BlockLocal) {} Lattice() = default; bool operator==(const Lattice &Other) const { - return Origins == Other.Origins; + return PersistentOrigins == Other.PersistentOrigins && + BlockLocalOrigins == Other.BlockLocalOrigins; } bool operator!=(const Lattice &Other) const { return !(*this == Other); } void dump(llvm::raw_ostream &OS) const { OS << "LoanPropagationLattice State:\n"; - if (Origins.isEmpty()) + OS << " Persistent Origins:\n"; + if (PersistentOrigins.isEmpty()) OS << " \n"; - for (const auto &Entry : Origins) { + for (const auto &Entry : PersistentOrigins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + OS << " Block-Local Origins:\n"; + if (BlockLocalOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : BlockLocalOrigins) { if (Entry.second.isEmpty()) OS << " Origin " << Entry.first << " contains no loans\n"; for (const LoanID &LID : Entry.second) @@ -50,7 +128,8 @@ class AnalysisImpl OriginLoanMap::Factory &OriginLoanMapFactory, LoanSet::Factory &LoanSetFactory) : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), - LoanSetFactory(LoanSetFactory) {} + LoanSetFactory(LoanSetFactory), + PersistentOrigins(computePersistentOrigins(F, C)) {} using Base::transfer; @@ -59,10 +138,10 @@ class AnalysisImpl Lattice getInitialState() { return Lattice{}; } /// Merges two lattices by taking the union of loans for each origin. - // TODO(opt): Keep the state small by removing origins which become dead. + /// Only persistent origins are joined; block-local origins are discarded. Lattice join(Lattice A, Lattice B) { OriginLoanMap JoinedOrigins = utils::join( - A.Origins, B.Origins, OriginLoanMapFactory, + A.PersistentOrigins, B.PersistentOrigins, OriginLoanMapFactory, [&](const LoanSet *S1, const LoanSet *S2) { assert((S1 || S2) && "unexpectedly merging 2 empty sets"); if (!S1) @@ -74,16 +153,15 @@ class AnalysisImpl // Asymmetric join is a performance win. For origins present only on one // branch, the loan set can be carried over as-is. utils::JoinKind::Asymmetric); - return Lattice(JoinedOrigins); + return Lattice(JoinedOrigins, OriginLoanMapFactory.getEmptyMap()); } /// A new loan is issued to the origin. Old loans are erased. Lattice transfer(Lattice In, const IssueFact &F) { OriginID OID = F.getOriginID(); LoanID LID = F.getLoanID(); - return Lattice(OriginLoanMapFactory.add( - In.Origins, OID, - LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID))); + LoanSet NewLoans = LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID); + return setLoans(In, OID, NewLoans); } /// A flow from source to destination. If `KillDest` is true, this replaces @@ -98,7 +176,7 @@ class AnalysisImpl LoanSet SrcLoans = getLoans(In, SrcOID); LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); - return Lattice(OriginLoanMapFactory.add(In.Origins, DestOID, MergedLoans)); + return setLoans(In, DestOID, MergedLoans); } LoanSet getLoans(OriginID OID, ProgramPoint P) const { @@ -106,14 +184,33 @@ class AnalysisImpl } private: + /// Returns true if the origin is persistent (referenced in multiple blocks). + bool isPersistent(OriginID OID) const { + return PersistentOrigins.test(OID.Value); + } + + Lattice setLoans(Lattice L, OriginID OID, LoanSet Loans) { + if (isPersistent(OID)) + return Lattice(OriginLoanMapFactory.add(L.PersistentOrigins, OID, Loans), + L.BlockLocalOrigins); + return Lattice(L.PersistentOrigins, + OriginLoanMapFactory.add(L.BlockLocalOrigins, OID, Loans)); + } + LoanSet getLoans(Lattice L, OriginID OID) const { - if (auto *Loans = L.Origins.lookup(OID)) + const OriginLoanMap *Map = + isPersistent(OID) ? &L.PersistentOrigins : &L.BlockLocalOrigins; + if (auto *Loans = Map->lookup(OID)) return *Loans; return LoanSetFactory.getEmptySet(); } OriginLoanMap::Factory &OriginLoanMapFactory; LoanSet::Factory &LoanSetFactory; + /// Boolean vector indexed by origin ID. If true, the origin appears in + /// multiple basic blocks and must participate in join operations. If false, + /// the origin is block-local and can be discarded at block boundaries. + llvm::BitVector PersistentOrigins; }; } // namespace diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 0c051847f4d47..f27b56eb518aa 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -529,7 +529,8 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) { MyObj* temp = p1; p1 = p2; p2 = p3; - p3 = temp; + p3 = temp;B + POINT(in_loop); } POINT(after_loop); } @@ -543,7 +544,11 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) { EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); EXPECT_THAT(Origin("p2"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); EXPECT_THAT(Origin("p3"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); - EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + + EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "in_loop")); + // 'temp' is a block-local origin and it's loans are not tracked outside the + // block. + EXPECT_THAT(Origin("temp"), HasLoansTo({}, "after_loop")); } TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) { From f3a92d66842bdd3717556dc684de783d9dc976b9 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 6 Nov 2025 22:07:56 -0500 Subject: [PATCH 26/35] [LifetimeSafety] Fix typo which breaks the test Fixes test failure introduced by a typo in https://github.com/llvm/llvm-project/pull/165789 --- clang/unittests/Analysis/LifetimeSafetyTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index f27b56eb518aa..34af476843c0d 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -529,7 +529,7 @@ TEST_F(LifetimeAnalysisTest, PointersInACycle) { MyObj* temp = p1; p1 = p2; p2 = p3; - p3 = temp;B + p3 = temp; POINT(in_loop); } POINT(after_loop); From 10492e4b511d1f7274fb466aee041a622544c9e9 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 6 Nov 2025 23:27:33 -0500 Subject: [PATCH 27/35] [LifetimeSafety] Optimize fact storage with IDs and vector-based lookup (#165963) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize the FactManager and DataflowAnalysis classes by using vector-based storage with ID-based lookups instead of maps. - Added a `FactID` type using the `utils::ID` template to uniquely identify facts - Modified `Fact` class to store and manage IDs - Changed `FactManager` to use vector-based storage indexed by block ID instead of a map - Updated `DataflowAnalysis` to use vector-based storage for states instead of maps - Modified lookups to use ID-based indexing for better performance Improves compile time hit on long-tail targets like `tools/clang/lib/CodeGen/CMakeFiles/obj.clangCodeGen.dir/TargetBuiltins/RISCV.cpp.o`​ from [21%](http://llvm-compile-time-tracker.com/compare_clang.php?from=6e25a04027ca786b7919657c7df330a33985ceea&to=20b42efa277c8b1915db757863e1fc26531cfd53&stat=instructions%3Au&sortBy=absolute-difference) to [3.2%](http://llvm-compile-time-tracker.com/compare_clang.php?from=6e25a04027ca786b7919657c7df330a33985ceea&to=d2d1cd1109c3a85344457bfff6f092ae7b96b211&stat=instructions%3Au&sortBy=absolute-difference) --- .../Analysis/Analyses/LifetimeSafety/Facts.h | 31 ++++++++++++++----- clang/lib/Analysis/LifetimeSafety/Dataflow.h | 14 ++++++--- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 13 +++----- .../LifetimeSafety/LifetimeSafety.cpp | 1 + 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h index 063cb5c2d42ab..b9cad5340c940 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -16,6 +16,7 @@ #include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" #include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" #include "llvm/ADT/SmallVector.h" @@ -23,6 +24,9 @@ #include namespace clang::lifetimes::internal { + +using FactID = utils::ID; + /// An abstract base class for a single, atomic lifetime-relevant event. class Fact { @@ -48,6 +52,7 @@ class Fact { private: Kind K; + FactID ID; protected: Fact(Kind K) : K(K) {} @@ -56,6 +61,9 @@ class Fact { virtual ~Fact() = default; Kind getKind() const { return K; } + void setID(FactID ID) { this->ID = ID; } + FactID getID() const { return ID; } + template const T *getAs() const { if (T::classof(this)) return static_cast(this); @@ -183,22 +191,26 @@ class TestPointFact : public Fact { class FactManager { public: + void init(const CFG &Cfg) { + assert(BlockToFacts.empty() && "FactManager already initialized"); + BlockToFacts.resize(Cfg.getNumBlockIDs()); + } + llvm::ArrayRef getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; + return BlockToFacts[B->getBlockID()]; } void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + BlockToFacts[B->getBlockID()].assign(NewFacts.begin(), NewFacts.end()); } template FactType *createFact(Args &&...args) { void *Mem = FactAllocator.Allocate(); - return new (Mem) FactType(std::forward(args)...); + FactType *Res = new (Mem) FactType(std::forward(args)...); + Res->setID(NextFactID++); + return Res; } void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; @@ -214,16 +226,19 @@ class FactManager { /// \note This is intended for testing only. llvm::StringMap getTestPoints() const; + unsigned getNumFacts() const { return NextFactID.Value; } + LoanManager &getLoanMgr() { return LoanMgr; } const LoanManager &getLoanMgr() const { return LoanMgr; } OriginManager &getOriginMgr() { return OriginMgr; } const OriginManager &getOriginMgr() const { return OriginMgr; } private: + FactID NextFactID{0}; LoanManager LoanMgr; OriginManager OriginMgr; - llvm::DenseMap> - BlockToFactsMap; + /// Facts for each CFG block, indexed by block ID. + llvm::SmallVector> BlockToFacts; llvm::BumpPtrAllocator FactAllocator; }; } // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h index 2f7bcb6e5dc81..de821bb17eb6b 100644 --- a/clang/lib/Analysis/LifetimeSafety/Dataflow.h +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -67,10 +67,10 @@ class DataflowAnalysis { llvm::DenseMap InStates; /// The dataflow state after a basic block is processed. llvm::DenseMap OutStates; - /// The dataflow state at a Program Point. + /// Dataflow state at each program point, indexed by Fact ID. /// In a forward analysis, this is the state after the Fact at that point has /// been applied, while in a backward analysis, it is the state before. - llvm::DenseMap PerPointStates; + llvm::SmallVector PointToState; static constexpr bool isForward() { return Dir == Direction::Forward; } @@ -86,6 +86,8 @@ class DataflowAnalysis { Derived &D = static_cast(*this); llvm::TimeTraceScope Time(D.getAnalysisName()); + PointToState.resize(FactMgr.getNumFacts()); + using Worklist = std::conditional_t; @@ -116,7 +118,9 @@ class DataflowAnalysis { } protected: - Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); } + Lattice getState(ProgramPoint P) const { + return PointToState[P->getID().Value]; + } std::optional getInState(const CFGBlock *B) const { auto It = InStates.find(B); @@ -144,12 +148,12 @@ class DataflowAnalysis { if constexpr (isForward()) { for (const Fact *F : Facts) { State = transferFact(State, F); - PerPointStates[F] = State; + PointToState[F->getID().Value] = State; } } else { for (const Fact *F : llvm::reverse(Facts)) { // In backward analysis, capture the state before applying the fact. - PerPointStates[F] = State; + PointToState[F->getID().Value] = State; State = transferFact(State, F); } } diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index ecde390cd6ca3..4a4172fe55bf3 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -64,8 +64,8 @@ void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, llvm::StringMap FactManager::getTestPoints() const { llvm::StringMap AnnotationToPointMap; - for (const CFGBlock *Block : BlockToFactsMap.keys()) { - for (const Fact *F : getFacts(Block)) { + for (const auto &BlockFacts : BlockToFacts) { + for (const Fact *F : BlockFacts) { if (const auto *TPF = F->getAs()) { StringRef PointName = TPF->getAnnotation(); assert(AnnotationToPointMap.find(PointName) == @@ -88,12 +88,9 @@ void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { // Print blocks in the order as they appear in code for a stable ordering. for (const CFGBlock *B : *AC.getAnalysis()) { llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs(), LoanMgr, OriginMgr); - } + for (const Fact *F : getFacts(B)) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); } llvm::dbgs() << " End of Block\n"; } diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp index 00c7ed90503e7..a51ba4280f284 100644 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -41,6 +41,7 @@ void LifetimeSafetyAnalysis::run() { const CFG &Cfg = *AC.getCFG(); DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), /*ShowColors=*/true)); + FactMgr.init(Cfg); FactsGenerator FactGen(FactMgr, AC); FactGen.run(); From 68b6c60966918578159eb087e741110e34b16824 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 8 Nov 2025 22:30:09 -0800 Subject: [PATCH 28/35] [LifetimeSafety] Use StringMap::contains (NFC) (#167186) Identified with readability-container-contains. --- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index 4a4172fe55bf3..190c038f46401 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -68,8 +68,7 @@ llvm::StringMap FactManager::getTestPoints() const { for (const Fact *F : BlockFacts) { if (const auto *TPF = F->getAs()) { StringRef PointName = TPF->getAnnotation(); - assert(AnnotationToPointMap.find(PointName) == - AnnotationToPointMap.end() && + assert(!AnnotationToPointMap.contains(PointName) && "more than one test points with the same name"); AnnotationToPointMap[PointName] = F; } From 9786377dae9051568b42bc5314c825bdf52800ae Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 12 Nov 2025 16:58:16 +0100 Subject: [PATCH 29/35] [LifetimeSafety] Add support for conditional operators (#167240) Added support for conditional operators in the lifetime safety analysis. Added a `VisitConditionalOperator` method to the `FactsGenerator` class to handle the ternary operator (`?:`) in lifetime safety analysis. Fixes https://github.com/llvm/llvm-project/issues/157108 --- .../Analyses/LifetimeSafety/FactsGenerator.h | 1 + .../LifetimeSafety/FactsGenerator.cpp | 9 +++ .../Sema/warn-lifetime-safety-dataflow.cpp | 17 +++++ clang/test/Sema/warn-lifetime-safety.cpp | 73 +++++++++++++++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 3 +- 5 files changed, 101 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 5e58abee2bbb3..4c8ab3f859a49 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -43,6 +43,7 @@ class FactsGenerator : public ConstStmtVisitor { void VisitUnaryOperator(const UnaryOperator *UO); void VisitReturnStmt(const ReturnStmt *RS); void VisitBinaryOperator(const BinaryOperator *BO); + void VisitConditionalOperator(const ConditionalOperator *CO); void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); void VisitInitListExpr(const InitListExpr *ILE); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index bec8e1dabb0b5..381ff99aae420 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -176,6 +176,15 @@ void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { handleAssignment(BO->getLHS(), BO->getRHS()); } +void FactsGenerator::VisitConditionalOperator(const ConditionalOperator *CO) { + if (hasOrigin(CO)) { + // Merge origins from both branches of the conditional operator. + // We kill to clear the initial state and merge both origins into it. + killAndFlowOrigin(*CO, *CO->getTrueExpr()); + flowOrigin(*CO, *CO->getFalseExpr()); + } +} + void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { // Assignment operators have special "kill-then-propagate" semantics // and are handled separately. diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index f34c1e95454d7..60650f295305b 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -414,3 +414,20 @@ void test_use_lifetimebound_call() { // CHECK: Expire ([[L_Y]] (Path: y)) // CHECK: Expire ([[L_X]] (Path: x)) } +// CHECK-LABEL: Function: test_conditional_operator +void test_conditional_operator(bool cond) { + MyObj x, y; + MyObj *p = cond ? &x : &y; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: OriginFlow (Dest: [[O_COND_OP:[0-9]+]] (Expr: ConditionalOperator), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_COND_OP]] (Expr: ConditionalOperator), Src: [[O_ADDR_Y]] (Expr: UnaryOperator), Merge) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_COND_OP]] (Expr: ConditionalOperator)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: Expire ([[L_X]] (Path: x)) +} diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 4f234f0ac6e2d..3460a8675bf04 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -440,6 +440,7 @@ void no_error_loan_from_current_iteration(bool cond) { //===----------------------------------------------------------------------===// View Identity(View v [[clang::lifetimebound]]); +MyObj* Identity(MyObj* v [[clang::lifetimebound]]); View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); MyObj* GetPointer(const MyObj& obj [[clang::lifetimebound]]); @@ -582,3 +583,75 @@ void lifetimebound_ctor() { } (void)v; } + +// Conditional operator. +void conditional_operator_one_unsafe_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + { + MyObj temp; + p = cond ? &temp // expected-warning {{object whose reference is captured may not live long enough}} + : &safe; + } // expected-note {{destroyed here}} + + // This is not a use-after-free for any value of `cond` but the analysis + // cannot reason this and marks the above as a false positive. This + // ensures safety regardless of cond's value. + if (cond) + p = &safe; + (void)*p; // expected-note {{later used here}} +} + +void conditional_operator_two_unsafe_branches(bool cond) { + MyObj* p; + { + MyObj a, b; + p = cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_nested(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = cond ? cond ? &a // expected-warning {{object whose reference is captured does not live long enough}}. + : &b // expected-warning {{object whose reference is captured does not live long enough}}. + : cond ? &c // expected-warning {{object whose reference is captured does not live long enough}}. + : &d; // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} + +void conditional_operator_lifetimebound(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? Identity(&a) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(&b)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested_deep(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = Identity(cond ? Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(cond ? &c // expected-warning {{object whose reference is captured does not live long enough}} + : &d)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 34af476843c0d..9d61d56e078e3 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -689,7 +689,6 @@ TEST_F(LifetimeAnalysisTest, GslPointerConstructFromView) { EXPECT_THAT(Origin("q"), HasLoansTo({"a"}, "p1")); } -// FIXME: Handle loans in ternary operator! TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { SetupTest(R"( void target(bool cond) { @@ -698,7 +697,7 @@ TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { POINT(p1); } )"); - EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); + EXPECT_THAT(Origin("v"), HasLoansTo({"a", "b"}, "p1")); } // FIXME: Handle temporaries. From a6beb1221194353f1bc1437083a8e72338db0901 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 13 Nov 2025 00:13:57 +0100 Subject: [PATCH 30/35] [LifetimeSafety] Ignore parentheses when tracking expressions (#167245) Add support for handling parenthesized expressions in lifetime safety analysis. Modified the `OriginManager::get` method to ignore parentheses when retrieving origins by recursively calling itself on the unparenthesized expression. This ensures that expressions with extra parentheses are properly analyzed for lifetime safety issues. --- clang/lib/Analysis/LifetimeSafety/Origins.cpp | 2 ++ clang/test/Sema/warn-lifetime-safety.cpp | 31 +++++++++++++++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 17 ++++++++++ 3 files changed, 50 insertions(+) diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index ea51a75324e06..0f2eaa94a5987 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -34,6 +34,8 @@ Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { // TODO: Mark this method as const once we remove the call to getOrCreate. OriginID OriginManager::get(const Expr &E) { + if (auto *ParenIgnored = E.IgnoreParens(); ParenIgnored != &E) + return get(*ParenIgnored); auto It = ExprToOriginID.find(&E); if (It != ExprToOriginID.end()) return It->second; diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 3460a8675bf04..b9368db550805 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -655,3 +655,34 @@ void conditional_operator_lifetimebound_nested_deep(bool cond) { } // expected-note 4 {{destroyed here}} (void)*p; // expected-note 4 {{later used here}} } + +void parentheses(bool cond) { + MyObj* p; + { + MyObj a; + p = &((((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a; + p = ((GetPointer((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a, b, c, d; + p = &(cond ? (cond ? a // expected-warning {{object whose reference is captured does not live long enough}}. + : b) // expected-warning {{object whose reference is captured does not live long enough}}. + : (cond ? c // expected-warning {{object whose reference is captured does not live long enough}}. + : d)); // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} + + { + MyObj a, b, c, d; + p = ((cond ? (((cond ? &a : &b))) // expected-warning 2 {{object whose reference is captured does not live long enough}}. + : &(((cond ? c : d))))); // expected-warning 2 {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 9d61d56e078e3..601308c53f9a9 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -700,6 +700,23 @@ TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { EXPECT_THAT(Origin("v"), HasLoansTo({"a", "b"}, "p1")); } +TEST_F(LifetimeAnalysisTest, ExtraParenthesis) { + SetupTest(R"( + void target() { + MyObj a; + View x = ((View((((a)))))); + View y = ((View{(((x)))})); + View z = ((View(((y))))); + View p = ((View{((x))})); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); +} + // FIXME: Handle temporaries. TEST_F(LifetimeAnalysisTest, ViewFromTemporary) { SetupTest(R"( From 33d1123b75e144dd87f4596195c8bc82e4bd8288 Mon Sep 17 00:00:00 2001 From: Kashika Akhouri <77923634+kashika0112@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:38:02 +0530 Subject: [PATCH 31/35] [LifetimeSafety] Detect use-after-return (#165370) Adding "use-after-return" in Lifetime Analysis. Detecting when a function returns a reference to its own stack memory: [UAR Design Doc](https://docs.google.com/document/d/1Wxjn_rJD_tuRdejP81dlb9VOckTkCq5-aE1nGcerb_o/edit?usp=sharing) Consider the following example: ```cpp std::string_view foo() { std::string_view a; std::string str = "small scoped string"; a = str; return a; } ``` The code adds a new Fact "OriginEscape" in the end of the CFG to determine any loan that is escaping the function as shown below: ``` Function: foo Block B2: End of Block Block B1: OriginFlow (Dest: 0 (Decl: a), Src: 1 (Expr: CXXConstructExpr)) OriginFlow (Dest: 2 (Expr: ImplicitCastExpr), Src: 3 (Expr: StringLiteral)) Issue (0 (Path: operator=), ToOrigin: 4 (Expr: DeclRefExpr)) OriginFlow (Dest: 5 (Expr: ImplicitCastExpr), Src: 4 (Expr: DeclRefExpr)) Use (0 (Decl: a), Write) Issue (1 (Path: str), ToOrigin: 6 (Expr: DeclRefExpr)) OriginFlow (Dest: 7 (Expr: ImplicitCastExpr), Src: 6 (Expr: DeclRefExpr)) OriginFlow (Dest: 8 (Expr: CXXMemberCallExpr), Src: 7 (Expr: ImplicitCastExpr)) OriginFlow (Dest: 9 (Expr: ImplicitCastExpr), Src: 8 (Expr: CXXMemberCallExpr)) OriginFlow (Dest: 10 (Expr: ImplicitCastExpr), Src: 9 (Expr: ImplicitCastExpr)) OriginFlow (Dest: 11 (Expr: MaterializeTemporaryExpr), Src: 10 (Expr: ImplicitCastExpr)) OriginFlow (Dest: 0 (Decl: a), Src: 11 (Expr: MaterializeTemporaryExpr)) Use (0 (Decl: a), Read) OriginFlow (Dest: 12 (Expr: ImplicitCastExpr), Src: 0 (Decl: a)) OriginFlow (Dest: 13 (Expr: CXXConstructExpr), Src: 12 (Expr: ImplicitCastExpr)) Expire (1 (Path: str)) OriginEscapes (13 (Expr: CXXConstructExpr)) End of Block Block B0: End of Block ``` The confidence of the report is determined by checking if at least one of the loans returned is not expired (strict). If all loans are expired it is considered permissive. More information [UAR Design Doc](https://docs.google.com/document/d/1Wxjn_rJD_tuRdejP81dlb9VOckTkCq5-aE1nGcerb_o/edit?usp=sharing) --- .../Analysis/Analyses/LifetimeSafety/Facts.h | 18 +- .../Analyses/LifetimeSafety/FactsGenerator.h | 4 + .../Analyses/LifetimeSafety/LifetimeSafety.h | 5 + .../Analyses/LifetimeSafety/LiveOrigins.h | 14 +- .../clang/Basic/DiagnosticSemaKinds.td | 11 + clang/lib/Analysis/LifetimeSafety/Checker.cpp | 32 +- clang/lib/Analysis/LifetimeSafety/Dataflow.h | 6 +- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 18 +- .../LifetimeSafety/FactsGenerator.cpp | 6 +- .../Analysis/LifetimeSafety/LiveOrigins.cpp | 38 ++- .../LifetimeSafety/LoanPropagation.cpp | 4 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 12 + .../Sema/warn-lifetime-safety-dataflow.cpp | 4 +- clang/test/Sema/warn-lifetime-safety.cpp | 156 +++++++++- .../unittests/Analysis/LifetimeSafetyTest.cpp | 283 ++++++++++++++++++ 15 files changed, 567 insertions(+), 44 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h index b9cad5340c940..b5f7f8746186a 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -42,12 +42,12 @@ class Fact { /// it. Otherwise, the source's loan set is merged into the destination's /// loan set. OriginFlow, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin, /// An origin is used (eg. appears as l-value expression like DeclRefExpr). Use, /// A marker for a specific point in the code, for testing. TestPoint, + /// An origin that escapes the function scope (e.g., via return). + OriginEscapes, }; private: @@ -136,16 +136,19 @@ class OriginFlowFact : public Fact { const OriginManager &OM) const override; }; -class ReturnOfOriginFact : public Fact { +class OriginEscapesFact : public Fact { OriginID OID; + const Expr *EscapeExpr; public: static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; + return F->getKind() == Kind::OriginEscapes; } - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } + OriginEscapesFact(OriginID OID, const Expr *EscapeExpr) + : Fact(Kind::OriginEscapes), OID(OID), EscapeExpr(EscapeExpr) {} + OriginID getEscapedOriginID() const { return OID; } + const Expr *getEscapeExpr() const { return EscapeExpr; }; void dump(llvm::raw_ostream &OS, const LoanManager &, const OriginManager &OM) const override; }; @@ -225,6 +228,9 @@ class FactManager { /// user-defined locations in the code. /// \note This is intended for testing only. llvm::StringMap getTestPoints() const; + /// Retrieves all the facts in the block containing Program Point P. + /// \note This is intended for testing only. + llvm::ArrayRef getBlockContaining(ProgramPoint P) const; unsigned getNumFacts() const { return NextFactID.Value; } diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 4c8ab3f859a49..8ea37259c570b 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -94,6 +94,10 @@ class FactsGenerator : public ConstStmtVisitor { FactManager &FactMgr; AnalysisDeclContext &AC; llvm::SmallVector CurrentBlockFacts; + // Collect origins that escape the function in this block (OriginEscapesFact), + // appended at the end of CurrentBlockFacts to ensure they appear after + // ExpireFact entries. + llvm::SmallVector EscapesInCurrentBlock; // To distinguish between reads and writes for use-after-free checks, this map // stores the `UseFact` for each `DeclRefExpr`. We initially identify all // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h index 91ffbb169f947..b34a7f18b5809 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -42,6 +42,11 @@ class LifetimeSafetyReporter { virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, SourceLocation FreeLoc, Confidence Confidence) {} + + virtual void reportUseAfterReturn(const Expr *IssueExpr, + const Expr *EscapeExpr, + SourceLocation ExpiryLoc, + Confidence Confidence) {} }; /// The main entry point for the analysis. diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h index c4f5f0e9ae46c..8ad17db83499d 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h @@ -31,6 +31,9 @@ namespace clang::lifetimes::internal { +using CausingFactType = + ::llvm::PointerUnion; + enum class LivenessKind : uint8_t { Dead, // Not alive Maybe, // Live on some path but not all paths (may-be-live) @@ -43,7 +46,7 @@ struct LivenessInfo { /// multiple uses along different paths, this will point to the use appearing /// earlier in the translation unit. /// This is 'null' when the origin is not live. - const UseFact *CausingUseFact; + CausingFactType CausingFact; /// The kind of liveness of the origin. /// `Must`: The origin is live on all control-flow paths from the current @@ -56,17 +59,16 @@ struct LivenessInfo { /// while `Maybe`-be-alive suggests a potential one on some paths. LivenessKind Kind; - LivenessInfo() : CausingUseFact(nullptr), Kind(LivenessKind::Dead) {} - LivenessInfo(const UseFact *UF, LivenessKind K) - : CausingUseFact(UF), Kind(K) {} + LivenessInfo() : CausingFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(CausingFactType CF, LivenessKind K) : CausingFact(CF), Kind(K) {} bool operator==(const LivenessInfo &Other) const { - return CausingUseFact == Other.CausingUseFact && Kind == Other.Kind; + return CausingFact == Other.CausingFact && Kind == Other.Kind; } bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddPointer(CausingUseFact); + IDBuilder.AddPointer(CausingFact.getOpaqueValue()); IDBuilder.Add(Kind); } }; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 218465f66ca43..3a11589345d35 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10734,8 +10734,19 @@ def warn_lifetime_safety_loan_expires_permissive : Warning< def warn_lifetime_safety_loan_expires_strict : Warning< "object whose reference is captured may not live long enough">, InGroup, DefaultIgnore; + +def warn_lifetime_safety_return_stack_addr_permissive + : Warning<"address of stack memory is returned later">, + InGroup, + DefaultIgnore; +def warn_lifetime_safety_return_stack_addr_strict + : Warning<"address of stack memory may be returned later">, + InGroup, + DefaultIgnore; + def note_lifetime_safety_used_here : Note<"later used here">; def note_lifetime_safety_destroyed_here : Note<"destroyed here">; +def note_lifetime_safety_returned_here : Note<"returned here">; // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp index c443c3a5d4f9b..1f7c282dadac2 100644 --- a/clang/lib/Analysis/LifetimeSafety/Checker.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -43,7 +43,7 @@ namespace { /// Struct to store the complete context for a potential lifetime violation. struct PendingWarning { SourceLocation ExpiryLoc; // Where the loan expired. - const Expr *UseExpr; // Where the origin holding this loan was used. + llvm::PointerUnion CausingFact; Confidence ConfidenceLevel; }; @@ -68,7 +68,7 @@ class LifetimeChecker { issuePendingWarnings(); } - /// Checks for use-after-free errors when a loan expires. + /// Checks for use-after-free & use-after-return errors when a loan expires. /// /// This method examines all live origins at the expiry point and determines /// if any of them hold the expiring loan. If so, it creates a pending @@ -83,7 +83,11 @@ class LifetimeChecker { LoanID ExpiredLoan = EF->getLoanID(); LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); Confidence CurConfidence = Confidence::None; - const UseFact *BadUse = nullptr; + // The UseFact or OriginEscapesFact most indicative of a lifetime error, + // prioritized by earlier source location. + llvm::PointerUnion + BestCausingFact = nullptr; + for (auto &[OID, LiveInfo] : Origins) { LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); if (!HeldLoans.contains(ExpiredLoan)) @@ -92,17 +96,17 @@ class LifetimeChecker { Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); if (CurConfidence < NewConfidence) { CurConfidence = NewConfidence; - BadUse = LiveInfo.CausingUseFact; + BestCausingFact = LiveInfo.CausingFact; } } - if (!BadUse) + if (!BestCausingFact) return; // We have a use-after-free. Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; if (LastConf >= CurConfidence) return; FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), - /*UseExpr=*/BadUse->getUseExpr(), + /*BestCausingFact=*/BestCausingFact, /*ConfidenceLevel=*/CurConfidence}; } @@ -112,8 +116,20 @@ class LifetimeChecker { for (const auto &[LID, Warning] : FinalWarningsMap) { const Loan &L = FactMgr.getLoanMgr().getLoan(LID); const Expr *IssueExpr = L.IssueExpr; - Reporter->reportUseAfterFree(IssueExpr, Warning.UseExpr, - Warning.ExpiryLoc, Warning.ConfidenceLevel); + llvm::PointerUnion + CausingFact = Warning.CausingFact; + Confidence Confidence = Warning.ConfidenceLevel; + SourceLocation ExpiryLoc = Warning.ExpiryLoc; + + if (const auto *UF = CausingFact.dyn_cast()) + Reporter->reportUseAfterFree(IssueExpr, UF->getUseExpr(), ExpiryLoc, + Confidence); + else if (const auto *OEF = + CausingFact.dyn_cast()) + Reporter->reportUseAfterReturn(IssueExpr, OEF->getEscapeExpr(), + ExpiryLoc, Confidence); + else + llvm_unreachable("Unhandled CausingFact type"); } } }; diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h index de821bb17eb6b..05c20d6385368 100644 --- a/clang/lib/Analysis/LifetimeSafety/Dataflow.h +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -170,8 +170,8 @@ class DataflowAnalysis { return D->transfer(In, *F->getAs()); case Fact::Kind::OriginFlow: return D->transfer(In, *F->getAs()); - case Fact::Kind::ReturnOfOrigin: - return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginEscapes: + return D->transfer(In, *F->getAs()); case Fact::Kind::Use: return D->transfer(In, *F->getAs()); case Fact::Kind::TestPoint: @@ -184,7 +184,7 @@ class DataflowAnalysis { Lattice transfer(Lattice In, const IssueFact &) { return In; } Lattice transfer(Lattice In, const ExpireFact &) { return In; } Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } - Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; } + Lattice transfer(Lattice In, const OriginEscapesFact &) { return In; } Lattice transfer(Lattice In, const UseFact &) { return In; } Lattice transfer(Lattice In, const TestPointFact &) { return In; } }; diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index 190c038f46401..0ae7111c489e8 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -43,10 +43,10 @@ void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, OS << ")\n"; } -void ReturnOfOriginFact::dump(llvm::raw_ostream &OS, const LoanManager &, - const OriginManager &OM) const { - OS << "ReturnOfOrigin ("; - OM.dump(getReturnedOriginID(), OS); +void OriginEscapesFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginEscapes ("; + OM.dump(getEscapedOriginID(), OS); OS << ")\n"; } @@ -95,4 +95,14 @@ void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { } } +llvm::ArrayRef +FactManager::getBlockContaining(ProgramPoint P) const { + for (const auto &BlockToFactsVec : BlockToFacts) { + for (const Fact *F : BlockToFactsVec) + if (F == P) + return BlockToFactsVec; + } + return {}; +} + } // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 381ff99aae420..cb9a202b08968 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -58,6 +58,7 @@ void FactsGenerator::run() { // initializations and destructions are processed in the correct sequence. for (const CFGBlock *Block : *AC.getAnalysis()) { CurrentBlockFacts.clear(); + EscapesInCurrentBlock.clear(); for (unsigned I = 0; I < Block->size(); ++I) { const CFGElement &Element = Block->Elements[I]; if (std::optional CS = Element.getAs()) @@ -66,6 +67,8 @@ void FactsGenerator::run() { Element.getAs()) handleDestructor(*DtorOpt); } + CurrentBlockFacts.append(EscapesInCurrentBlock.begin(), + EscapesInCurrentBlock.end()); FactMgr.addBlockFacts(Block, CurrentBlockFacts); } } @@ -166,7 +169,8 @@ void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { if (const Expr *RetExpr = RS->getRetValue()) { if (hasOrigin(RetExpr)) { OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back(FactMgr.createFact(OID)); + EscapesInCurrentBlock.push_back( + FactMgr.createFact(OID, RetExpr)); } } } diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp index 59f594e50fb46..57338122b4440 100644 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -53,6 +53,14 @@ struct Lattice { } }; +static SourceLocation GetFactLoc(CausingFactType F) { + if (const auto *UF = F.dyn_cast()) + return UF->getUseExpr()->getExprLoc(); + if (const auto *OEF = F.dyn_cast()) + return OEF->getEscapeExpr()->getExprLoc(); + llvm_unreachable("unhandled causing fact in PointerUnion"); +} + /// The analysis that tracks which origins are live, with granular information /// about the causing use fact and confidence level. This is a backward /// analysis. @@ -74,11 +82,14 @@ class AnalysisImpl /// one. Lattice join(Lattice L1, Lattice L2) const { LivenessMap Merged = L1.LiveOrigins; - // Take the earliest UseFact to make the join hermetic and commutative. - auto CombineUseFact = [](const UseFact &A, - const UseFact &B) -> const UseFact * { - return A.getUseExpr()->getExprLoc() < B.getUseExpr()->getExprLoc() ? &A - : &B; + // Take the earliest Fact to make the join hermetic and commutative. + auto CombineCausingFact = [](CausingFactType A, + CausingFactType B) -> CausingFactType { + if (!A) + return B; + if (!B) + return A; + return GetFactLoc(A) < GetFactLoc(B) ? A : B; }; auto CombineLivenessKind = [](LivenessKind K1, LivenessKind K2) -> LivenessKind { @@ -93,12 +104,11 @@ class AnalysisImpl const LivenessInfo *L2) -> LivenessInfo { assert((L1 || L2) && "unexpectedly merging 2 empty sets"); if (!L1) - return LivenessInfo(L2->CausingUseFact, LivenessKind::Maybe); + return LivenessInfo(L2->CausingFact, LivenessKind::Maybe); if (!L2) - return LivenessInfo(L1->CausingUseFact, LivenessKind::Maybe); - return LivenessInfo( - CombineUseFact(*L1->CausingUseFact, *L2->CausingUseFact), - CombineLivenessKind(L1->Kind, L2->Kind)); + return LivenessInfo(L1->CausingFact, LivenessKind::Maybe); + return LivenessInfo(CombineCausingFact(L1->CausingFact, L2->CausingFact), + CombineLivenessKind(L1->Kind, L2->Kind)); }; return Lattice(utils::join( L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, @@ -120,6 +130,14 @@ class AnalysisImpl LivenessInfo(&UF, LivenessKind::Must))); } + /// An escaping origin (e.g., via return) makes the origin live with definite + /// confidence, as it dominates this program point. + Lattice transfer(Lattice In, const OriginEscapesFact &OEF) { + OriginID OID = OEF.getEscapedOriginID(); + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&OEF, LivenessKind::Must))); + } + /// Issuing a new loan to an origin kills its liveness. Lattice transfer(Lattice In, const IssueFact &IF) { return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp index 0e6c194123df8..23ce1b78dfde2 100644 --- a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -58,12 +58,10 @@ static llvm::BitVector computePersistentOrigins(const FactManager &FactMgr, CheckOrigin(OF->getSrcOriginID()); break; } - case Fact::Kind::ReturnOfOrigin: - CheckOrigin(F->getAs()->getReturnedOriginID()); - break; case Fact::Kind::Use: CheckOrigin(F->getAs()->getUsedOrigin()); break; + case Fact::Kind::OriginEscapes: case Fact::Kind::Expire: case Fact::Kind::TestPoint: break; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 4c1fe60dd2db9..a2ac3ca48ce9a 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3517,6 +3517,18 @@ class LifetimeSafetyReporterImpl : public LifetimeSafetyReporter { << UseExpr->getEndLoc(); } + void reportUseAfterReturn(const Expr *IssueExpr, const Expr *EscapeExpr, + SourceLocation ExpiryLoc, Confidence C) override { + S.Diag(IssueExpr->getExprLoc(), + C == Confidence::Definite + ? diag::warn_lifetime_safety_return_stack_addr_permissive + : diag::warn_lifetime_safety_return_stack_addr_strict) + << IssueExpr->getEndLoc(); + + S.Diag(EscapeExpr->getExprLoc(), diag::note_lifetime_safety_returned_here) + << EscapeExpr->getEndLoc(); + } + private: Sema &S; }; diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 60650f295305b..d14479d43fc1c 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -18,8 +18,8 @@ MyObj* return_local_addr() { return p; // CHECK: Use ([[O_P]] (Decl: p), Read) // CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) -// CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) // CHECK: Expire ([[L_X]] (Path: x)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) } @@ -49,8 +49,8 @@ MyObj* assign_and_return_local_addr() { return ptr2; // CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) // CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) -// CHECK: ReturnOfOrigin ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) // CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) } // Return of Non-Pointer Type diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index b9368db550805..2803e73b5aee2 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety -verify %s +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety -Wno-dangling -verify %s struct MyObj { int id; @@ -396,6 +396,131 @@ void loan_from_previous_iteration(MyObj safe, bool condition) { } // expected-note {{destroyed here}} } +//===----------------------------------------------------------------------===// +// Basic Definite Use-After-Return (Return-Stack-Address) (-W...permissive) +// These are cases where the pointer is guaranteed to be dangling at the use site. +//===----------------------------------------------------------------------===// + +MyObj* simple_return_stack_address() { + MyObj s; + MyObj* p = &s; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} +} + +MyObj* direct_return() { + MyObj s; + return &s; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const MyObj* conditional_assign_unconditional_return(const MyObj& safe, bool c) { + MyObj s; + const MyObj* p = &safe; + if (c) { + p = &s; // expected-warning {{address of stack memory is returned later}} + } + return p; // expected-note {{returned here}} +} + +View conditional_assign_both_branches(const MyObj& safe, bool c) { + MyObj s; + View p; + if (c) { + p = s; // expected-warning {{address of stack memory is returned later}} + } + else { + p = safe; + } + return p; // expected-note {{returned here}} + +} + +View reassign_safe_to_local(const MyObj& safe) { + MyObj local; + View p = safe; + p = local; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} +} + +View pointer_chain_to_local() { + MyObj local; + View p1 = local; // expected-warning {{address of stack memory is returned later}} + View p2 = p1; + return p2; // expected-note {{returned here}} +} + +View multiple_assign_multiple_return(const MyObj& safe, bool c1, bool c2) { + MyObj local1; + MyObj local2; + View p; + if (c1) { + p = local1; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} + } + else if (c2) { + p = local2; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} + } + p = safe; + return p; +} + +View multiple_assign_single_return(const MyObj& safe, bool c1, bool c2) { + MyObj local1; + MyObj local2; + View p; + if (c1) { + p = local1; // expected-warning {{address of stack memory is returned later}} + } + else if (c2) { + p = local2; // expected-warning {{address of stack memory is returned later}} + } + else { + p = safe; + } + return p; // expected-note 2 {{returned here}} +} + +View direct_return_of_local() { + MyObj stack; + return stack; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +MyObj& reference_return_of_local() { + MyObj stack; + return stack; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +//===----------------------------------------------------------------------===// +// Use-After-Scope & Use-After-Return (Return-Stack-Address) Combined +// These are cases where the diagnostic kind is determined by location +//===----------------------------------------------------------------------===// + +MyObj* uaf_before_uar() { + MyObj* p; + { + MyObj local_obj; + p = &local_obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + return p; // expected-note {{later used here}} +} + +View uar_before_uaf(const MyObj& safe, bool c) { + View p; + { + MyObj local_obj; + p = local_obj; // expected-warning {{address of stack memory is returned later}} + if (c) { + return p; // expected-note {{returned here}} + } + } + p.use(); + p = safe; + return p; +} + //===----------------------------------------------------------------------===// // No-Error Cases //===----------------------------------------------------------------------===// @@ -434,12 +559,19 @@ void no_error_loan_from_current_iteration(bool cond) { } } +View safe_return(const MyObj& safe) { + MyObj local; + View p = local; + p = safe; // p has been reassigned + return p; // This is safe +} //===----------------------------------------------------------------------===// // Lifetimebound Attribute Tests //===----------------------------------------------------------------------===// View Identity(View v [[clang::lifetimebound]]); +const MyObj& IdentityRef(const MyObj& obj [[clang::lifetimebound]]); MyObj* Identity(MyObj* v [[clang::lifetimebound]]); View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); MyObj* GetPointer(const MyObj& obj [[clang::lifetimebound]]); @@ -584,6 +716,28 @@ void lifetimebound_ctor() { (void)v; } +View lifetimebound_return_of_local() { + MyObj stack; + return Identity(stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const MyObj& lifetimebound_return_ref_to_local() { + MyObj stack; + return IdentityRef(stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +// FIXME: Fails to diagnose UAR when a reference to a by-value param escapes via the return value. +View lifetimebound_return_of_by_value_param(MyObj stack_param) { + return Identity(stack_param); +} + +// FIXME: Fails to diagnose UAF when a reference to a by-value param escapes via an out-param. +void uaf_from_by_value_param_failing(MyObj param, View* out_p) { + *out_p = Identity(param); +} + // Conditional operator. void conditional_operator_one_unsafe_branch(bool cond) { MyObj safe; diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 601308c53f9a9..558a22af72572 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -20,6 +20,7 @@ namespace clang::lifetimes::internal { namespace { using namespace ast_matchers; +using ::testing::Not; using ::testing::SizeIs; using ::testing::UnorderedElementsAreArray; @@ -122,6 +123,40 @@ class LifetimeTestHelper { return LID; } + // Gets the set of loans that are live at the given program point. A loan is + // considered live at point P if there is a live origin which contains this + // loan. + std::optional getLiveLoansAtPoint(ProgramPoint P) const { + const auto &LiveOriginsAnalysis = Runner.getAnalysis().getLiveOrigins(); + const auto &LoanPropagation = Runner.getAnalysis().getLoanPropagation(); + + LivenessMap LiveOriginsMap = LiveOriginsAnalysis.getLiveOriginsAt(P); + + LoanSet::Factory F; + LoanSet Result = F.getEmptySet(); + + for (const auto &[OID, LI] : LiveOriginsMap) { + LoanSet Loans = LoanPropagation.getLoans(OID, P); + Result = clang::lifetimes::internal::utils::join(Result, Loans, F); + } + + if (Result.isEmpty()) + return std::nullopt; + + return Result; + } + + const ExpireFact * + getExpireFactFromAllFacts(const llvm::ArrayRef &FactsInBlock, + const LoanID &loanID) { + for (const Fact *F : FactsInBlock) { + if (auto const *CurrentEF = F->getAs()) + if (CurrentEF->getLoanID() == loanID) + return CurrentEF; + } + return nullptr; + } + std::optional getLoansAtPoint(OriginID OID, llvm::StringRef Annotation) { ProgramPoint PP = Runner.getProgramPoint(Annotation); @@ -141,6 +176,14 @@ class LifetimeTestHelper { return Result; } + ProgramPoint getProgramPoint(llvm::StringRef Annotation) { + return Runner.getProgramPoint(Annotation); + } + + llvm::ArrayRef getBlockContaining(ProgramPoint P) { + return Runner.getAnalysis().getFactManager().getBlockContaining(P); + } + private: template DeclT *findDecl(llvm::StringRef Name) { auto &Ctx = Runner.getASTContext(); @@ -304,6 +347,43 @@ MATCHER_P2(AreLiveAtImpl, Annotation, ConfFilter, "") { return true; } +MATCHER_P2(HasLiveLoanAtExpiryImpl, HelperPtr, Annotation, "") { + llvm::StringRef VarName = arg; + LifetimeTestHelper &Helper = *HelperPtr; + + std::vector Loans = Helper.getLoansForVar(VarName); + if (Loans.empty()) { + *result_listener << "No loans found for variable" << VarName.str(); + return false; + } + + ProgramPoint PP = Helper.getProgramPoint(Annotation); + llvm::ArrayRef AllFactsInBlock = Helper.getBlockContaining(PP); + + bool NoExpireFactLive = false; + for (const LoanID CurrentLoanID : Loans) { + const ExpireFact *EF = + Helper.getExpireFactFromAllFacts(AllFactsInBlock, CurrentLoanID); + if (!EF) { + NoExpireFactLive = true; + continue; + } + std::optional LiveLoans = Helper.getLiveLoansAtPoint(EF); + if (!LiveLoans.has_value()) { + *result_listener << "No Live Loans At Expiry Location."; + continue; + } + if (LiveLoans->contains({CurrentLoanID})) + return true; + } + if (NoExpireFactLive) { + *result_listener << "No Expire Fact for loan of " << VarName.str(); + return false; + } + *result_listener << "No loans of " << VarName.str() << " are live"; + return false; +} + MATCHER_P(MustBeLiveAt, Annotation, "") { return ExplainMatchResult(AreLiveAtImpl(Annotation, LivenessKindFilter::Must), arg, result_listener); @@ -353,6 +433,10 @@ class LifetimeAnalysisTest : public ::testing::Test { return HasLoansToImpl(std::vector(LoanVars), Annotation); } + auto HasLiveLoanAtExpiry(const char *Annotation) { + return HasLiveLoanAtExpiryImpl(Helper.get(), Annotation); + } + std::unique_ptr Runner; std::unique_ptr Helper; }; @@ -1223,5 +1307,204 @@ TEST_F(LifetimeAnalysisTest, LivenessOutsideLoop) { EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p1")); } +TEST_F(LifetimeAnalysisTest, SimpleReturnStackAddress) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + +TEST_F(LifetimeAnalysisTest, DirectReturn) { + SetupTest(R"( + MyObj* target() { + MyObj s; + POINT(P); + return &s; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalAssignUnconditionalReturn) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1; + MyObj* p = nullptr; + if (c) { + p = &s1; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("s1", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignments) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p1 = &s; + MyObj* p2 = &s; + POINT(P); + return p2; + } + )"); + // Test if atleast one loan to "s" is live; + EXPECT_THAT("s", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalAssignBothBranches) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1; + static MyObj s2; + MyObj* p = nullptr; + if (c) { + p = &s1; + } else { + p = &s2; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("s1", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ReassignFromSafeToLocalThenReturn) { + SetupTest(R"( + MyObj* target() { + static MyObj safe_obj; + MyObj local_obj; + MyObj* p = &safe_obj; + + p = &local_obj; + POINT(P); + return p; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, PointerChainToLocal) { + SetupTest(R"( + MyObj* target() { + MyObj local_obj; + MyObj* p1 = &local_obj; + MyObj* p2 = p1; + POINT(P); + return p2; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignmentMultipleReturn) { + SetupTest(R"( + MyObj* target(bool c1, bool c2) { + static MyObj global_obj; + MyObj local_obj1; + MyObj local_obj2; + MyObj* p = nullptr; + if(c1){ + p = &local_obj1; + POINT(C1); + return p; + } + else if(c2){ + p = &local_obj2; + POINT(C2); + return p; + } + p = &global_obj; + POINT(C3); + return p; + } + )"); + + EXPECT_THAT("local_obj1", HasLiveLoanAtExpiry("C1")); + EXPECT_THAT("local_obj2", HasLiveLoanAtExpiry("C2")); + + EXPECT_THAT("local_obj1", Not(HasLiveLoanAtExpiry("C3"))); + EXPECT_THAT("local_obj2", Not(HasLiveLoanAtExpiry("C3"))); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignmentsSingleReturn) { + SetupTest(R"( + MyObj* target(bool c1, bool c2) { + static MyObj global_obj; + MyObj local_obj1; + MyObj local_obj2; + MyObj* p = nullptr; + if(c1){ + p = &local_obj1; + } + else if(c2){ + p = &local_obj2; + } + else{ + p = &global_obj; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("local_obj1", HasLiveLoanAtExpiry("P")); + EXPECT_THAT("local_obj2", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, UseAfterScopeThenReturn) { + SetupTest(R"( + MyObj* target() { + MyObj* p; + { + MyObj local_obj; + p = &local_obj; + POINT(p1); + } + POINT(p2); + return p; + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p2")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p1")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); + + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("p2")); +} + +TEST_F(LifetimeAnalysisTest, ReturnBeforeUseAfterScope) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj* p; + static MyObj global_obj; + { + MyObj local_obj; + p = &local_obj; + if(c){ + POINT(p1); + return p; + } + } + POINT(p2); + return &global_obj; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("p1")); + + EXPECT_THAT(NoOrigins(), AreLiveAt("p2")); + + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p1")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From 361f176adfcf3cbc7485b24df1dfccb1633ad3b2 Mon Sep 17 00:00:00 2001 From: Kashika Akhouri Date: Fri, 21 Nov 2025 18:51:21 +0530 Subject: [PATCH 32/35] [LifetimeSafety] Detect expiry of loans to trivially destructed types (#168855) Handling Trivially Destructed Types This PR uses `AddLifetime` to handle expiry of loans to trivially destructed types. Example: ```cpp int * trivial_uar(){ int *ptr; int x = 1; ptr = &x; return ptr; } ``` The CFG created now has an Expire Fact for trivially destructed types: ``` Function: trivial_uar Block B2: End of Block Block B1: Issue (0 (Path: x), ToOrigin: 0 (Expr: DeclRefExpr)) OriginFlow (Dest: 1 (Expr: UnaryOperator), Src: 0 (Expr: DeclRefExpr)) Use (2 (Decl: ptr), Write) OriginFlow (Dest: 2 (Decl: ptr), Src: 1 (Expr: UnaryOperator)) Use (2 (Decl: ptr), Read) OriginFlow (Dest: 3 (Expr: ImplicitCastExpr), Src: 2 (Decl: ptr)) Expire (0 (Path: x)) OriginEscapes (3 (Expr: ImplicitCastExpr)) End of Block Block B0: End of Block ``` This Expire Fact issues UAR and UAF warnings. Fixes https://github.com/llvm/llvm-project/issues/162862 --- .../Analyses/LifetimeSafety/FactsGenerator.h | 2 +- .../LifetimeSafety/FactsGenerator.cpp | 26 +++---- clang/lib/Sema/AnalysisBasedWarnings.cpp | 3 + .../Sema/warn-lifetime-safety-dataflow.cpp | 24 ++++++- clang/test/Sema/warn-lifetime-safety.cpp | 68 +++++++++++++++++-- .../unittests/Analysis/LifetimeSafetyTest.cpp | 66 ++++++++++++++++++ 6 files changed, 165 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 8ea37259c570b..878cb90b685f9 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -50,7 +50,7 @@ class FactsGenerator : public ConstStmtVisitor { void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); private: - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt); + void handleLifetimeEnds(const CFGLifetimeEnds &LifetimeEnds); void handleGSLPointerConstruction(const CXXConstructExpr *CCE); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index cb9a202b08968..f7be472ed15b5 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -63,9 +63,9 @@ void FactsGenerator::run() { const CFGElement &Element = Block->Elements[I]; if (std::optional CS = Element.getAs()) Visit(CS->getStmt()); - else if (std::optional DtorOpt = - Element.getAs()) - handleDestructor(*DtorOpt); + else if (std::optional LifetimeEnds = + Element.getAs()) + handleLifetimeEnds(*LifetimeEnds); } CurrentBlockFacts.append(EscapesInCurrentBlock.begin(), EscapesInCurrentBlock.end()); @@ -229,25 +229,19 @@ void FactsGenerator::VisitMaterializeTemporaryExpr( killAndFlowOrigin(*MTE, *MTE->getSubExpr()); } -void FactsGenerator::handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. +void FactsGenerator::handleLifetimeEnds(const CFGLifetimeEnds &LifetimeEnds) { /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) + const VarDecl *LifetimeEndsVD = LifetimeEnds.getVarDecl(); + if (!LifetimeEndsVD) return; // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; + for (const auto &Loan : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = Loan.Path; // Check if the loan is for a stack variable and if that variable // is the one being destructed. - if (LoanPath.D == DestructedVD) + if (LoanPath.D == LifetimeEndsVD) CurrentBlockFacts.push_back(FactMgr.createFact( - L.ID, DtorOpt.getTriggerStmt()->getEndLoc())); + Loan.ID, LifetimeEnds.getTriggerStmt()->getEndLoc())); } } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index a2ac3ca48ce9a..69151f77e4fcf 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3639,6 +3639,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety; + if (EnableLifetimeSafetyAnalysis) + AC.getCFGBuildOptions().AddLifetime = true; + // Force that certain expressions appear as CFGElements in the CFG. This // is used to speed up various analyses. // FIXME: This isn't the right factoring. This is here for initial diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index d14479d43fc1c..b5ab9f91c9b84 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -59,6 +59,7 @@ int return_int_val() { int x = 10; // CHECK: Block B{{[0-9]+}}: // CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: {{[0-9]+}} (Expr: DeclRefExpr)) +// CHECK: Expire ([[L_X:[0-9]+]] (Path: x)) return x; } // CHECK-NEXT: End of Block @@ -77,7 +78,6 @@ void loan_expires_cpp() { } -// FIXME: No expire for Trivial Destructors // CHECK-LABEL: Function: loan_expires_trivial void loan_expires_trivial() { int trivial_obj = 1; @@ -86,11 +86,29 @@ void loan_expires_trivial() { // CHECK: OriginFlow (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) int* pTrivialObj = &trivial_obj; // CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) -// CHECK-NOT: Expire +// CHECK: Expire ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj)) // CHECK-NEXT: End of Block - // FIXME: Add check for Expire once trivial destructors are handled for expiration. } +// Trivial Destructors +// CHECK-LABEL: Function: return_int_pointer +int* return_int_pointer() { + int* ptr; +// CHECK: Block B{{[0-9]+}}: + int x = 1; +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) + ptr = &x; +// CHECK: Use ([[O_PTR:[0-9]+]] (Decl: ptr), Write) +// CHECK: OriginFlow (Dest: [[O_PTR]] (Decl: ptr), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: Use ([[O_PTR]] (Decl: ptr), Read) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR]] (Decl: ptr)) +// CHECK: Expire ([[L_X]] (Path: x)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) + return ptr; +} +// CHECK-NEXT: End of Block + // CHECK-LABEL: Function: conditional void conditional(bool condition) { int a = 5; diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 2803e73b5aee2..e80a05860389c 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -12,6 +12,10 @@ struct [[gsl::Pointer()]] View { void use() const; }; +class TriviallyDestructedClass { + View a, b; +}; + //===----------------------------------------------------------------------===// // Basic Definite Use-After-Free (-W...permissive) // These are cases where the pointer is guaranteed to be dangling at the use site. @@ -396,6 +400,24 @@ void loan_from_previous_iteration(MyObj safe, bool condition) { } // expected-note {{destroyed here}} } +void trivial_int_uaf() { + int * a; + { + int b = 1; + a = &b; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*a; // expected-note {{later used here}} +} + +void trivial_class_uaf() { + TriviallyDestructedClass* ptr; + { + TriviallyDestructedClass s; + ptr = &s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)ptr; // expected-note {{later used here}} +} + //===----------------------------------------------------------------------===// // Basic Definite Use-After-Return (Return-Stack-Address) (-W...permissive) // These are cases where the pointer is guaranteed to be dangling at the use site. @@ -493,6 +515,43 @@ MyObj& reference_return_of_local() { // expected-note@-1 {{returned here}} } +int* trivial_int_uar() { + int *a; + int b = 1; + a = &b; // expected-warning {{address of stack memory is returned later}} + return a; // expected-note {{returned here}} +} + +TriviallyDestructedClass* trivial_class_uar () { + TriviallyDestructedClass *ptr; + TriviallyDestructedClass s; + ptr = &s; // expected-warning {{address of stack memory is returned later}} + return ptr; // expected-note {{returned here}} +} + +// FIXME: No lifetime warning for this as no expire facts are generated for parameters +const int& return_parameter(int a) { + return a; +} + +// FIXME: No lifetime warning for this as no expire facts are generated for parameters +int* return_pointer_to_parameter(int a) { + return &a; +} + +const int& return_reference_to_parameter(int a) +{ + const int &b = a; + return b; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const int& get_ref_to_local() { + int a = 42; + return a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + //===----------------------------------------------------------------------===// // Use-After-Scope & Use-After-Return (Return-Stack-Address) Combined // These are cases where the diagnostic kind is determined by location @@ -688,7 +747,8 @@ void lifetimebound_partial_safety(bool cond) { v.use(); // expected-note {{later used here}} } -// FIXME: Creating reference from lifetimebound call doesn't propagate loans. +// FIXME: Warning should be on the 'GetObject' call, not the assignment to 'ptr'. +// The loan from the lifetimebound argument is not propagated to the call expression itself. const MyObj& GetObject(View v [[clang::lifetimebound]]); void lifetimebound_return_reference() { View v; @@ -697,9 +757,9 @@ void lifetimebound_return_reference() { MyObj obj; View temp_v = obj; const MyObj& ref = GetObject(temp_v); - ptr = &ref; - } - (void)*ptr; + ptr = &ref; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*ptr; // expected-note {{later used here}} } // FIXME: No warning for non gsl::Pointer types. Origin tracking is only supported for pointer types. diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index 558a22af72572..a895475013c98 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -59,6 +59,7 @@ class LifetimeTestRunner { BuildOptions.setAllAlwaysAdd(); BuildOptions.AddImplicitDtors = true; BuildOptions.AddTemporaryDtors = true; + BuildOptions.AddLifetime = true; // Run the main analysis. Analysis = std::make_unique(*AnalysisCtx, nullptr); @@ -1307,6 +1308,42 @@ TEST_F(LifetimeAnalysisTest, LivenessOutsideLoop) { EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p1")); } +TEST_F(LifetimeAnalysisTest, TrivialDestructorsUAF) { + SetupTest(R"( + void target() { + int *ptr; + { + int s = 1; + ptr = &s; + } + POINT(p1); + (void)*ptr; + } + )"); + EXPECT_THAT(Origin("ptr"), HasLoansTo({"s"}, "p1")); + EXPECT_THAT(Origins({"ptr"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialClassDestructorsUAF) { + SetupTest(R"( + class S { + View a, b; + }; + + void target() { + S* ptr; + { + S s; + ptr = &s; + } + POINT(p1); + (void)ptr; + } + )"); + EXPECT_THAT(Origin("ptr"), HasLoansTo({"s"}, "p1")); + EXPECT_THAT(Origins({"ptr"}), MustBeLiveAt("p1")); +} + TEST_F(LifetimeAnalysisTest, SimpleReturnStackAddress) { SetupTest(R"( MyObj* target() { @@ -1506,5 +1543,34 @@ TEST_F(LifetimeAnalysisTest, ReturnBeforeUseAfterScope) { EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); } +TEST_F(LifetimeAnalysisTest, TrivialDestructorsUAR) { + SetupTest(R"( + int* target() { + int s = 10; + int* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialClassDestructorsUAR) { + SetupTest(R"( + class S { + View a, b; + }; + + S* target() { + S *ptr; + S s; + ptr = &s; + POINT(p1); + return ptr; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + } // anonymous namespace } // namespace clang::lifetimes::internal From fadaf5d2ec619b9bc044e943f616e2df0d0e9eb8 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 25 Nov 2025 23:36:42 +0530 Subject: [PATCH 33/35] [LifetimeSafety] Add parameter lifetime tracking in CFG (#169320) This PR enhances the CFG builder to properly handle function parameters in lifetime analysis: 1. Added code to include parameters in the initial scope during CFG construction for both `FunctionDecl` and `BlockDecl` types 2. Added a special case to skip reference parameters, as they don't need automatic destruction 3. Fixed several test cases that were previously marked as "FIXME" due to missing parameter lifetime tracking Previously, Clang's lifetime analysis was not properly tracking the lifetime of function parameters, causing it to miss important use-after-return bugs when parameter values were returned by reference or address. This change ensures that parameters are properly tracked in the CFG, allowing the analyzer to correctly identify when stack memory associated with parameters is returned. Fixes https://github.com/llvm/llvm-project/issues/169014 --- clang/lib/Analysis/CFG.cpp | 20 ++++++- clang/test/Analysis/lifetime-cfg-output.cpp | 28 +++++++++ clang/test/Analysis/scopes-cfg-output.cpp | 2 + clang/test/Sema/warn-lifetime-safety.cpp | 57 ++++++++++++++++--- .../Analysis/FlowSensitive/LoggerTest.cpp | 11 +++- 5 files changed, 109 insertions(+), 9 deletions(-) diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 88fc191923aa2..0e720ea41e5cb 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1675,6 +1675,12 @@ std::unique_ptr CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { assert(Succ == &cfg->getExit()); Block = nullptr; // the EXIT block is empty. Create all other blocks lazily. + // Add parameters to the initial scope to handle their dtos and lifetime ends. + LocalScope *paramScope = nullptr; + if (const auto *FD = dyn_cast_or_null(D)) + for (ParmVarDecl *PD : FD->parameters()) + paramScope = addLocalScopeForVarDecl(PD, paramScope); + if (BuildOpts.AddImplicitDtors) if (const CXXDestructorDecl *DD = dyn_cast_or_null(D)) addImplicitDtorsForDestructor(DD); @@ -2255,6 +2261,11 @@ LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD, if (!VD->hasLocalStorage()) return Scope; + // Reference parameters are aliases to objects that live elsewhere, so they + // don't require automatic destruction or lifetime tracking. + if (isa(VD) && VD->getType()->isReferenceType()) + return Scope; + if (!BuildOpts.AddLifetime && !BuildOpts.AddScopes && !needsAutomaticDestruction(VD)) { assert(BuildOpts.AddImplicitDtors); @@ -5680,8 +5691,15 @@ class StmtPrinterHelper : public PrinterHelper { bool handleDecl(const Decl *D, raw_ostream &OS) { DeclMapTy::iterator I = DeclMap.find(D); - if (I == DeclMap.end()) + if (I == DeclMap.end()) { + // ParmVarDecls are not declared in the CFG itself, so they do not appear + // in DeclMap. + if (auto *PVD = dyn_cast_or_null(D)) { + OS << "[Parm: " << PVD->getNameAsString() << "]"; + return true; + } return false; + } if (currentBlock >= 0 && I->second.first == (unsigned) currentBlock && I->second.second == currStmt) { diff --git a/clang/test/Analysis/lifetime-cfg-output.cpp b/clang/test/Analysis/lifetime-cfg-output.cpp index 0a75c5bcc0bcc..36b36eddc440c 100644 --- a/clang/test/Analysis/lifetime-cfg-output.cpp +++ b/clang/test/Analysis/lifetime-cfg-output.cpp @@ -935,3 +935,31 @@ int backpatched_goto() { goto label; i++; } + +// CHECK: [B2 (ENTRY)] +// CHECK-NEXT: Succs (1): B1 +// CHECK: [B1] +// CHECK-NEXT: 1: a +// CHECK-NEXT: 2: [B1.1] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 3: b +// CHECK-NEXT: 4: [B1.3] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 5: [B1.2] + [B1.4] +// CHECK-NEXT: 6: c +// CHECK-NEXT: 7: [B1.6] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 8: [B1.5] + [B1.7] +// CHECK-NEXT: 9: int res = a + b + c; +// CHECK-NEXT: 10: res +// CHECK-NEXT: 11: [B1.10] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 12: return [B1.11]; +// CHECK-NEXT: 13: [B1.9] (Lifetime ends) +// CHECK-NEXT: 14: [Parm: c] (Lifetime ends) +// CHECK-NEXT: 15: [Parm: b] (Lifetime ends) +// CHECK-NEXT: 16: [Parm: a] (Lifetime ends) +// CHECK-NEXT: Preds (1): B2 +// CHECK-NEXT: Succs (1): B0 +// CHECK: [B0 (EXIT)] +// CHECK-NEXT: Preds (1): B1 +int test_param_scope_end_order(int a, int b, int c) { + int res = a + b + c; + return res; +} diff --git a/clang/test/Analysis/scopes-cfg-output.cpp b/clang/test/Analysis/scopes-cfg-output.cpp index 6ed6f3638f75b..9c75492c33a42 100644 --- a/clang/test/Analysis/scopes-cfg-output.cpp +++ b/clang/test/Analysis/scopes-cfg-output.cpp @@ -1437,12 +1437,14 @@ void test_cleanup_functions() { // CHECK-NEXT: 4: return; // CHECK-NEXT: 5: CleanupFunction (cleanup_int) // CHECK-NEXT: 6: CFGScopeEnd(i) +// CHECK-NEXT: 7: CFGScopeEnd(m) // CHECK-NEXT: Preds (1): B3 // CHECK-NEXT: Succs (1): B0 // CHECK: [B2] // CHECK-NEXT: 1: return; // CHECK-NEXT: 2: CleanupFunction (cleanup_int) // CHECK-NEXT: 3: CFGScopeEnd(i) +// CHECK-NEXT: 4: CFGScopeEnd(m) // CHECK-NEXT: Preds (1): B3 // CHECK-NEXT: Succs (1): B0 // CHECK: [B3] diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index e80a05860389c..1191469e23df1 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -529,14 +529,14 @@ TriviallyDestructedClass* trivial_class_uar () { return ptr; // expected-note {{returned here}} } -// FIXME: No lifetime warning for this as no expire facts are generated for parameters const int& return_parameter(int a) { - return a; + return a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} } -// FIXME: No lifetime warning for this as no expire facts are generated for parameters int* return_pointer_to_parameter(int a) { - return &a; + return &a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} } const int& return_reference_to_parameter(int a) @@ -788,9 +788,52 @@ const MyObj& lifetimebound_return_ref_to_local() { // expected-note@-1 {{returned here}} } -// FIXME: Fails to diagnose UAR when a reference to a by-value param escapes via the return value. -View lifetimebound_return_of_by_value_param(MyObj stack_param) { - return Identity(stack_param); +View lifetimebound_return_by_value_param(MyObj stack_param) { + return Identity(stack_param); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +View lifetimebound_return_by_value_multiple_param(int cond, MyObj a, MyObj b, MyObj c) { + if (cond == 1) + return Identity(a); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + if (cond == 2) + return Identity(b); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + return Identity(c); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +template +View lifetimebound_return_by_value_param_template(T t) { + return Identity(t); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} +void use_lifetimebound_return_by_value_param_template() { + lifetimebound_return_by_value_param_template(MyObj{}); // expected-note {{in instantiation of}} +} + +void lambda_uar_param() { + auto lambda = [](MyObj stack_param) { + return Identity(stack_param); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + }; + lambda(MyObj{}); +} + +// FIXME: This should be detected. We see correct destructors but origin flow breaks somewhere. +namespace VariadicTemplatedParamsUAR{ + +template +View Max(Args... args [[clang::lifetimebound]]); + +template +View lifetimebound_return_of_variadic_param(Args... args) { + return Max(args...); +} +void test_variadic() { + lifetimebound_return_of_variadic_param(MyObj{1}, MyObj{2}, MyObj{3}); +} } // FIXME: Fails to diagnose UAF when a reference to a by-value param escapes via an out-param. diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp index 88630119ba8a1..609255437fe82 100644 --- a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp @@ -149,9 +149,18 @@ recordState(Elements=8, Branches=2, Joins=1) enterElement(return b ? p : q;) transfer() recordState(Elements=9, Branches=2, Joins=1) +enterElement([Parm: q] (Lifetime ends)) +transfer() +recordState(Elements=10, Branches=2, Joins=1) +enterElement([Parm: p] (Lifetime ends)) +transfer() +recordState(Elements=11, Branches=2, Joins=1) +enterElement([Parm: b] (Lifetime ends)) +transfer() +recordState(Elements=12, Branches=2, Joins=1) enterBlock(0, false) -recordState(Elements=9, Branches=2, Joins=1) +recordState(Elements=12, Branches=2, Joins=1) endAnalysis() )"); From 978ce83a0272344f0a152f5d5e4b4126ab7b3a42 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Wed, 26 Nov 2025 16:43:03 +0530 Subject: [PATCH 34/35] [LifetimeSafety] Move GSL pointer/owner type detection to LifetimeAnnotations (#169620) Refactored GSL pointer and owner type detection functions to improve code organization and reusability. --- .../LifetimeSafety/LifetimeAnnotations.h | 5 ++ .../LifetimeSafety/FactsGenerator.cpp | 12 ---- .../LifetimeSafety/LifetimeAnnotations.cpp | 31 +++++++++ clang/lib/Sema/CheckExprLifetime.cpp | 69 +++++-------------- clang/lib/Sema/CheckExprLifetime.h | 3 - clang/lib/Sema/SemaAttr.cpp | 4 +- 6 files changed, 57 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h index f02969e0a9563..1a16fb82f9a84 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h @@ -38,6 +38,11 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); /// method or because it's a normal assignment operator. bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); +// Tells whether the type is annotated with [[gsl::Pointer]]. +bool isGslPointerType(QualType QT); +// Tells whether the type is annotated with [[gsl::Owner]]. +bool isGslOwnerType(QualType QT); + } // namespace clang::lifetimes #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index f7be472ed15b5..00870c3fd4086 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -15,18 +15,6 @@ namespace clang::lifetimes::internal { using llvm::isa_and_present; -static bool isGslPointerType(QualType QT) { - if (const auto *RD = QT->getAsCXXRecordDecl()) { - // We need to check the template definition for specializations. - if (auto *CTSD = dyn_cast(RD)) - return CTSD->getSpecializedTemplate() - ->getTemplatedDecl() - ->hasAttr(); - return RD->hasAttr(); - } - return false; -} - static bool isPointerType(QualType QT) { return QT->isPointerOrReferenceType() || isGslPointerType(QT); } diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp index ad61a42c0eaeb..54e343fc2ee5e 100644 --- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -10,6 +10,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" @@ -70,4 +71,34 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { return isNormalAssignmentOperator(FD); } +template static bool isRecordWithAttr(QualType Type) { + auto *RD = Type->getAsCXXRecordDecl(); + if (!RD) + return false; + // Generally, if a primary template class declaration is annotated with an + // attribute, all its specializations generated from template instantiations + // should inherit the attribute. + // + // However, since lifetime analysis occurs during parsing, we may encounter + // cases where a full definition of the specialization is not required. In + // such cases, the specialization declaration remains incomplete and lacks the + // attribute. Therefore, we fall back to checking the primary template class. + // + // Note: it is possible for a specialization declaration to have an attribute + // even if the primary template does not. + // + // FIXME: What if the primary template and explicit specialization + // declarations have conflicting attributes? We should consider diagnosing + // this scenario. + bool Result = RD->hasAttr(); + + if (auto *CTSD = dyn_cast(RD)) + Result |= CTSD->getSpecializedTemplate()->getTemplatedDecl()->hasAttr(); + + return Result; +} + +bool isGslPointerType(QualType QT) { return isRecordWithAttr(QT); } +bool isGslOwnerType(QualType QT) { return isRecordWithAttr(QT); } + } // namespace clang::lifetimes diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index 8aebf53c0e76f..17bd89ba972f4 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -17,6 +17,9 @@ #include "llvm/ADT/PointerIntPair.h" namespace clang::sema { +using lifetimes::isGslOwnerType; +using lifetimes::isGslPointerType; + namespace { enum LifetimeKind { /// The lifetime of a temporary bound to this entity ends at the end of the @@ -256,38 +259,8 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, Expr *Init, ReferenceKind RK, LocalVisitor Visit); -template static bool isRecordWithAttr(QualType Type) { - auto *RD = Type->getAsCXXRecordDecl(); - if (!RD) - return false; - // Generally, if a primary template class declaration is annotated with an - // attribute, all its specializations generated from template instantiations - // should inherit the attribute. - // - // However, since lifetime analysis occurs during parsing, we may encounter - // cases where a full definition of the specialization is not required. In - // such cases, the specialization declaration remains incomplete and lacks the - // attribute. Therefore, we fall back to checking the primary template class. - // - // Note: it is possible for a specialization declaration to have an attribute - // even if the primary template does not. - // - // FIXME: What if the primary template and explicit specialization - // declarations have conflicting attributes? We should consider diagnosing - // this scenario. - bool Result = RD->hasAttr(); - - if (auto *CTSD = dyn_cast(RD)) - Result |= CTSD->getSpecializedTemplate()->getTemplatedDecl()->hasAttr(); - - return Result; -} - -// Tells whether the type is annotated with [[gsl::Pointer]]. -bool isGLSPointerType(QualType QT) { return isRecordWithAttr(QT); } - static bool isPointerLikeType(QualType QT) { - return isGLSPointerType(QT) || QT->isPointerType() || QT->isNullPtrType(); + return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType(); } // Decl::isInStdNamespace will return false for iterators in some STL @@ -330,7 +303,7 @@ static bool isContainerOfOwner(const RecordDecl *Container) { return false; const auto &TAs = CTSD->getTemplateArgs(); return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && - isRecordWithAttr(TAs[0].getAsType()); + isGslOwnerType(TAs[0].getAsType()); } // Returns true if the given Record is `std::initializer_list`. @@ -348,14 +321,13 @@ static bool isStdInitializerListOfPointer(const RecordDecl *RD) { static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { if (auto *Conv = dyn_cast_or_null(Callee)) - if (isRecordWithAttr(Conv->getConversionType()) && + if (isGslPointerType(Conv->getConversionType()) && Callee->getParent()->hasAttr()) return true; if (!isInStlNamespace(Callee->getParent())) return false; - if (!isRecordWithAttr( - Callee->getFunctionObjectParameterType()) && - !isRecordWithAttr(Callee->getFunctionObjectParameterType())) + if (!isGslPointerType(Callee->getFunctionObjectParameterType()) && + !isGslOwnerType(Callee->getFunctionObjectParameterType())) return false; if (isPointerLikeType(Callee->getReturnType())) { if (!Callee->getIdentifier()) @@ -392,7 +364,7 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { if (!RD->hasAttr() && !RD->hasAttr()) return false; if (FD->getReturnType()->isPointerType() || - isRecordWithAttr(FD->getReturnType())) { + isGslPointerType(FD->getReturnType())) { return llvm::StringSwitch(FD->getName()) .Cases("begin", "rbegin", "cbegin", "crbegin", true) .Cases("end", "rend", "cend", "crend", true) @@ -464,7 +436,7 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { return true; // RHS must be an owner. - if (!isRecordWithAttr(RHSArgType)) + if (!isGslOwnerType(RHSArgType)) return false; // Bail out if the RHS is Owner. @@ -546,7 +518,7 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, // Once we initialized a value with a non gsl-owner reference, it can no // longer dangle. if (ReturnType->isReferenceType() && - !isRecordWithAttr(ReturnType->getPointeeType())) { + !isGslOwnerType(ReturnType->getPointeeType())) { for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) { if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit || PE.Kind == IndirectLocalPathEntry::LifetimeBoundCall) @@ -1157,8 +1129,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // auto p2 = Temp().owner; // Here p2 is dangling. if (const auto *FD = llvm::dyn_cast_or_null(E.D); FD && !FD->getType()->isReferenceType() && - isRecordWithAttr(FD->getType()) && - LK != LK_MemInitializer) { + isGslOwnerType(FD->getType()) && LK != LK_MemInitializer) { return Report; } return Abandon; @@ -1190,10 +1161,9 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // const GSLOwner& func(const Foo& foo [[clang::lifetimebound]]) // GSLOwner* func(cosnt Foo& foo [[clang::lifetimebound]]) // GSLPointer func(const Foo& foo [[clang::lifetimebound]]) - if (FD && - ((FD->getReturnType()->isPointerOrReferenceType() && - isRecordWithAttr(FD->getReturnType()->getPointeeType())) || - isGLSPointerType(FD->getReturnType()))) + if (FD && ((FD->getReturnType()->isPointerOrReferenceType() && + isGslOwnerType(FD->getReturnType()->getPointeeType())) || + isGslPointerType(FD->getReturnType()))) return Report; return Abandon; @@ -1205,7 +1175,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // int &p = *localUniquePtr; // someContainer.add(std::move(localUniquePtr)); // return p; - if (!pathContainsInit(Path) && isRecordWithAttr(L->getType())) + if (!pathContainsInit(Path) && isGslOwnerType(L->getType())) return Report; return Abandon; } @@ -1214,8 +1184,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, auto *MTE = dyn_cast(L); bool IsGslPtrValueFromGslTempOwner = - MTE && !MTE->getExtendingDecl() && - isRecordWithAttr(MTE->getType()); + MTE && !MTE->getExtendingDecl() && isGslOwnerType(MTE->getType()); // Skipping a chain of initializing gsl::Pointer annotated objects. // We are looking only for the final source to find out if it was // a local or temporary owner or the address of a local @@ -1230,7 +1199,7 @@ static bool shouldRunGSLAssignmentAnalysis(const Sema &SemaRef, bool EnableGSLAssignmentWarnings = !SemaRef.getDiagnostics().isIgnored( diag::warn_dangling_lifetime_pointer_assignment, SourceLocation()); return (EnableGSLAssignmentWarnings && - (isRecordWithAttr(Entity.LHS->getType()) || + (isGslPointerType(Entity.LHS->getType()) || lifetimes::isAssignmentOperatorLifetimeBound( Entity.AssignmentOperator))); } @@ -1399,7 +1368,7 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity, // Suppress false positives for code like the one below: // Ctor(unique_ptr up) : pointer(up.get()), owner(move(up)) {} // FIXME: move this logic to analyzePathForGSLPointer. - if (DRE && isRecordWithAttr(DRE->getType())) + if (DRE && isGslOwnerType(DRE->getType())) return false; auto *VD = DRE ? dyn_cast(DRE->getDecl()) : nullptr; diff --git a/clang/lib/Sema/CheckExprLifetime.h b/clang/lib/Sema/CheckExprLifetime.h index 16595d0ca1b36..38b7061988dc7 100644 --- a/clang/lib/Sema/CheckExprLifetime.h +++ b/clang/lib/Sema/CheckExprLifetime.h @@ -18,9 +18,6 @@ namespace clang::sema { -// Tells whether the type is annotated with [[gsl::Pointer]]. -bool isGLSPointerType(QualType QT); - /// Describes an entity that is being assigned. struct AssignedEntity { // The left-hand side expression of the assignment. diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 3eed6ad7fe6b3..f7175d264ce23 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "CheckExprLifetime.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Lookup.h" @@ -289,7 +289,7 @@ void Sema::inferLifetimeCaptureByAttribute(FunctionDecl *FD) { // We only apply the lifetime_capture_by attribute to parameters of // pointer-like reference types (`const T&`, `T&&`). if (PVD->getType()->isReferenceType() && - sema::isGLSPointerType(PVD->getType().getNonReferenceType())) { + lifetimes::isGslPointerType(PVD->getType().getNonReferenceType())) { int CaptureByThis[] = {LifetimeCaptureByAttr::This}; PVD->addAttr( LifetimeCaptureByAttr::CreateImplicit(Context, CaptureByThis, 1)); From 1eaef91213a0ff46a8f61775f42c590006c6544d Mon Sep 17 00:00:00 2001 From: Kashika Akhouri Date: Fri, 5 Dec 2025 15:28:43 +0530 Subject: [PATCH 35/35] [LifetimeSafety] Suggest lifetime annotations (#169767) Add lifetime annotation suggestion in lifetime analysis. This PR introduces a new feature to Clang's lifetime analysis to detect and suggest missing `[[clang::lifetimebound]]` annotations on function parameters. It introduces the concept of `placeholder loans`. At the entry of a function, a special placeholder loan is created for each pointer or reference parameter. The analysis then tracks these loans using `OriginFlow` facts. If an `OriginEscapesFact` shows that an origin holding a placeholder loan escapes the function's scope (e.g., via a return statement), a new warning is issued. This warning, controlled by the warning flag `-Wexperimental-lifetime-safety-suggestions`, suggests adding the `[[clang::lifetimebound]]` attribute to the corresponding parameter. Example: ```cpp std::string_view foo(std::string_view a) { return a; } ``` Facts: ``` Function: foo Block B2: Issue (0 (Placeholder loan) , ToOrigin: 0 (Decl: a)) End of Block Block B1: Use (0 (Decl: a), Read) OriginFlow (Dest: 1 (Expr: ImplicitCastExpr), Src: 0 (Decl: a)) OriginFlow (Dest: 2 (Expr: CXXConstructExpr), Src: 1 (Expr: ImplicitCastExpr)) OriginEscapes (2 (Expr: CXXConstructExpr)) End of Block Block B0: End of Block ``` Sample warning: ``` o.cpp:61:39: warning: param should be marked [[clang::lifetimebound]] [-Wexperimental-lifetime-safety-suggestions] 61 | std::string_view foo(std::string_view a) { | ~~~~~~~~~~~~~~~~~^ | [[clang::lifetimebound]] o.cpp:62:9: note: param escapes here 62 | return a; ``` Fixes: https://github.com/llvm/llvm-project/issues/169939 --- .../Analyses/LifetimeSafety/FactsGenerator.h | 1 + .../Analyses/LifetimeSafety/LifetimeSafety.h | 4 + .../Analysis/Analyses/LifetimeSafety/Loans.h | 95 ++++++++++++--- clang/include/clang/Basic/DiagnosticGroups.td | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 7 ++ clang/lib/Analysis/LifetimeSafety/Checker.cpp | 32 ++++- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 4 +- .../LifetimeSafety/FactsGenerator.cpp | 47 ++++++-- clang/lib/Analysis/LifetimeSafety/Loans.cpp | 8 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 13 +++ .../Sema/warn-lifetime-safety-suggestions.cpp | 109 ++++++++++++++++++ .../unittests/Analysis/LifetimeSafetyTest.cpp | 7 +- 12 files changed, 297 insertions(+), 32 deletions(-) create mode 100644 clang/test/Sema/warn-lifetime-safety-suggestions.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 878cb90b685f9..a1acd8615afdd 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -91,6 +91,7 @@ class FactsGenerator : public ConstStmtVisitor { void markUseAsWrite(const DeclRefExpr *DRE); + llvm::SmallVector issuePlaceholderLoans(); FactManager &FactMgr; AnalysisDeclContext &AC; llvm::SmallVector CurrentBlockFacts; diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h index b34a7f18b5809..31fae55f60486 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -47,6 +47,10 @@ class LifetimeSafetyReporter { const Expr *EscapeExpr, SourceLocation ExpiryLoc, Confidence Confidence) {} + + // Suggests lifetime bound annotations for function paramters + virtual void suggestAnnotation(const ParmVarDecl *PVD, + const Expr *EscapeExpr) {} }; /// The main entry point for the analysis. diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h index 7f5cf03fd3e5f..e9bccd4773622 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h @@ -34,21 +34,81 @@ struct AccessPath { AccessPath(const clang::ValueDecl *D) : D(D) {} }; -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { +/// An abstract base class for a single "Loan" which represents lending a +/// storage in memory. +class Loan { /// TODO: Represent opaque loans. /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it /// is represented as empty LoanSet - LoanID ID; +public: + enum class Kind : uint8_t { + /// A loan with an access path to a storage location. + Path, + /// A non-expiring placeholder loan for a parameter, representing a borrow + /// from the function's caller. + Placeholder + }; + + Loan(Kind K, LoanID ID) : K(K), ID(ID) {} + virtual ~Loan() = default; + + Kind getKind() const { return K; } + LoanID getID() const { return ID; } + + virtual void dump(llvm::raw_ostream &OS) const = 0; + +private: + const Kind K; + const LoanID ID; +}; + +/// PathLoan represents lending a storage location that is visible within the +/// function's scope (e.g., a local variable on stack). +class PathLoan : public Loan { AccessPath Path; /// The expression that creates the loan, e.g., &x. const Expr *IssueExpr; - Loan(LoanID id, AccessPath path, const Expr *IssueExpr) - : ID(id), Path(path), IssueExpr(IssueExpr) {} +public: + PathLoan(LoanID ID, AccessPath Path, const Expr *IssueExpr) + : Loan(Kind::Path, ID), Path(Path), IssueExpr(IssueExpr) {} + + const AccessPath &getAccessPath() const { return Path; } + const Expr *getIssueExpr() const { return IssueExpr; } - void dump(llvm::raw_ostream &OS) const; + void dump(llvm::raw_ostream &OS) const override; + + static bool classof(const Loan *L) { return L->getKind() == Kind::Path; } +}; + +/// A placeholder loan held by a function parameter, representing a borrow from +/// the caller's scope. +/// +/// Created at function entry for each pointer or reference parameter with an +/// origin. Unlike PathLoan, placeholder loans: +/// - Have no IssueExpr (created at function entry, not at a borrow site) +/// - Have no AccessPath (the borrowed object is not visible to the function) +/// - Do not currently expire, but may in the future when modeling function +/// invalidations (e.g., vector::push_back) +/// +/// When a placeholder loan escapes the function (e.g., via return), it +/// indicates the parameter should be marked [[clang::lifetimebound]], enabling +/// lifetime annotation suggestions. +class PlaceholderLoan : public Loan { + /// The function parameter that holds this placeholder loan. + const ParmVarDecl *PVD; + +public: + PlaceholderLoan(LoanID ID, const ParmVarDecl *PVD) + : Loan(Kind::Placeholder, ID), PVD(PVD) {} + + const ParmVarDecl *getParmVarDecl() const { return PVD; } + + void dump(llvm::raw_ostream &OS) const override; + + static bool classof(const Loan *L) { + return L->getKind() == Kind::Placeholder; + } }; /// Manages the creation, storage and retrieval of loans. @@ -56,16 +116,24 @@ class LoanManager { public: LoanManager() = default; - Loan &addLoan(AccessPath Path, const Expr *IssueExpr) { - AllLoans.emplace_back(getNextLoanID(), Path, IssueExpr); - return AllLoans.back(); + template + LoanType *createLoan(Args &&...args) { + static_assert( + std::is_same_v || + std::is_same_v, + "createLoan can only be used with PathLoan or PlaceholderLoan"); + void *Mem = LoanAllocator.Allocate(); + auto *NewLoan = + new (Mem) LoanType(getNextLoanID(), std::forward(args)...); + AllLoans.push_back(NewLoan); + return NewLoan; } - const Loan &getLoan(LoanID ID) const { + const Loan *getLoan(LoanID ID) const { assert(ID.Value < AllLoans.size()); return AllLoans[ID.Value]; } - llvm::ArrayRef getLoans() const { return AllLoans; } + llvm::ArrayRef getLoans() const { return AllLoans; } private: LoanID getNextLoanID() { return NextLoanID++; } @@ -73,7 +141,8 @@ class LoanManager { LoanID NextLoanID{0}; /// TODO(opt): Profile and evaluate the usefullness of small buffer /// optimisation. - llvm::SmallVector AllLoans; + llvm::SmallVector AllLoans; + llvm::BumpPtrAllocator LoanAllocator; }; } // namespace clang::lifetimes::internal diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 1e3c0e852e8e2..d5b6fc7aa43fb 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -543,6 +543,8 @@ def LifetimeSafety : DiagGroup<"experimental-lifetime-safety", Experimental warnings to detect use-after-free and related temporal safety bugs based on lifetime safety analysis. }]; } +def LifetimeSafetySuggestions + : DiagGroup<"experimental-lifetime-safety-suggestions">; def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3a11589345d35..d3ec2385e9783 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10748,6 +10748,13 @@ def note_lifetime_safety_used_here : Note<"later used here">; def note_lifetime_safety_destroyed_here : Note<"destroyed here">; def note_lifetime_safety_returned_here : Note<"returned here">; +def warn_lifetime_safety_suggest_lifetimebound + : Warning<"param should be marked [[clang::lifetimebound]]">, + InGroup, + DefaultIgnore; + +def note_lifetime_safety_suggestion_returned_here : Note<"param returned here">; + // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. // Array comparisons have similar warnings diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp index 1f7c282dadac2..74792768e2c57 100644 --- a/clang/lib/Analysis/LifetimeSafety/Checker.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -50,6 +50,7 @@ struct PendingWarning { class LifetimeChecker { private: llvm::DenseMap FinalWarningsMap; + llvm::DenseMap AnnotationWarningsMap; const LoanPropagationAnalysis &LoanPropagation; const LiveOriginsAnalysis &LiveOrigins; const FactManager &FactMgr; @@ -65,7 +66,26 @@ class LifetimeChecker { for (const Fact *F : FactMgr.getFacts(B)) if (const auto *EF = F->getAs()) checkExpiry(EF); + else if (const auto *OEF = F->getAs()) + checkAnnotations(OEF); issuePendingWarnings(); + suggestAnnotations(); + } + + /// Checks if an escaping origin holds a placeholder loan, indicating a + /// missing [[clang::lifetimebound]] annotation. + void checkAnnotations(const OriginEscapesFact *OEF) { + OriginID EscapedOID = OEF->getEscapedOriginID(); + LoanSet EscapedLoans = LoanPropagation.getLoans(EscapedOID, OEF); + for (LoanID LID : EscapedLoans) { + const Loan *L = FactMgr.getLoanMgr().getLoan(LID); + if (const auto *PL = dyn_cast(L)) { + const ParmVarDecl *PVD = PL->getParmVarDecl(); + if (PVD->hasAttr()) + continue; + AnnotationWarningsMap.try_emplace(PVD, OEF->getEscapeExpr()); + } + } } /// Checks for use-after-free & use-after-return errors when a loan expires. @@ -114,8 +134,9 @@ class LifetimeChecker { if (!Reporter) return; for (const auto &[LID, Warning] : FinalWarningsMap) { - const Loan &L = FactMgr.getLoanMgr().getLoan(LID); - const Expr *IssueExpr = L.IssueExpr; + const Loan *L = FactMgr.getLoanMgr().getLoan(LID); + const auto *BL = cast(L); + const Expr *IssueExpr = BL->getIssueExpr(); llvm::PointerUnion CausingFact = Warning.CausingFact; Confidence Confidence = Warning.ConfidenceLevel; @@ -132,6 +153,13 @@ class LifetimeChecker { llvm_unreachable("Unhandled CausingFact type"); } } + + void suggestAnnotations() { + if (!Reporter) + return; + for (const auto &[PVD, EscapeExpr] : AnnotationWarningsMap) + Reporter->suggestAnnotation(PVD, EscapeExpr); + } }; } // namespace diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index 0ae7111c489e8..68317318ff4e2 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -20,7 +20,7 @@ void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, const OriginManager &OM) const { OS << "Issue ("; - LM.getLoan(getLoanID()).dump(OS); + LM.getLoan(getLoanID())->dump(OS); OS << ", ToOrigin: "; OM.dump(getOriginID(), OS); OS << ")\n"; @@ -29,7 +29,7 @@ void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, const OriginManager &) const { OS << "Expire ("; - LM.getLoan(getLoanID()).dump(OS); + LM.getLoan(getLoanID())->dump(OS); OS << ")\n"; } diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 00870c3fd4086..2f270b03996f2 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -31,22 +31,28 @@ static bool hasOrigin(const VarDecl *VD) { /// This function should be called whenever a DeclRefExpr represents a borrow. /// \param DRE The declaration reference expression that initiates the borrow. /// \return The new Loan on success, nullptr otherwise. -static const Loan *createLoan(FactManager &FactMgr, const DeclRefExpr *DRE) { +static const PathLoan *createLoan(FactManager &FactMgr, + const DeclRefExpr *DRE) { if (const auto *VD = dyn_cast(DRE->getDecl())) { AccessPath Path(VD); // The loan is created at the location of the DeclRefExpr. - return &FactMgr.getLoanMgr().addLoan(Path, DRE); + return FactMgr.getLoanMgr().createLoan(Path, DRE); } return nullptr; } void FactsGenerator::run() { llvm::TimeTraceScope TimeProfile("FactGenerator"); + const CFG &Cfg = *AC.getCFG(); + llvm::SmallVector PlaceholderLoanFacts = issuePlaceholderLoans(); // Iterate through the CFG blocks in reverse post-order to ensure that // initializations and destructions are processed in the correct sequence. for (const CFGBlock *Block : *AC.getAnalysis()) { CurrentBlockFacts.clear(); EscapesInCurrentBlock.clear(); + if (Block == &Cfg.getEntry()) + CurrentBlockFacts.append(PlaceholderLoanFacts.begin(), + PlaceholderLoanFacts.end()); for (unsigned I = 0; I < Block->size(); ++I) { const CFGElement &Element = Block->Elements[I]; if (std::optional CS = Element.getAs()) @@ -85,7 +91,7 @@ void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { if (const Loan *L = createLoan(FactMgr, DRE)) { OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); CurrentBlockFacts.push_back( - FactMgr.createFact(L->ID, ExprOID)); + FactMgr.createFact(L->getID(), ExprOID)); } } } @@ -223,13 +229,14 @@ void FactsGenerator::handleLifetimeEnds(const CFGLifetimeEnds &LifetimeEnds) { if (!LifetimeEndsVD) return; // Iterate through all loans to see if any expire. - for (const auto &Loan : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = Loan.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == LifetimeEndsVD) - CurrentBlockFacts.push_back(FactMgr.createFact( - Loan.ID, LifetimeEnds.getTriggerStmt()->getEndLoc())); + for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) { + if (const auto *BL = dyn_cast(Loan)) { + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (BL->getAccessPath().D == LifetimeEndsVD) + CurrentBlockFacts.push_back(FactMgr.createFact( + BL->getID(), LifetimeEnds.getTriggerStmt()->getEndLoc())); + } } } @@ -342,4 +349,24 @@ void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { UseFacts[DRE]->markAsWritten(); } +// Creates an IssueFact for a new placeholder loan for each pointer or reference +// parameter at the function's entry. +llvm::SmallVector FactsGenerator::issuePlaceholderLoans() { + const auto *FD = dyn_cast(AC.getDecl()); + if (!FD) + return {}; + + llvm::SmallVector PlaceholderLoanFacts; + for (const ParmVarDecl *PVD : FD->parameters()) { + if (hasOrigin(PVD)) { + const PlaceholderLoan *L = + FactMgr.getLoanMgr().createLoan(PVD); + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*PVD); + PlaceholderLoanFacts.push_back( + FactMgr.createFact(L->getID(), OID)); + } + } + return PlaceholderLoanFacts; +} + } // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp index 2c85a3c6083f3..fdfdbb40a2a46 100644 --- a/clang/lib/Analysis/LifetimeSafety/Loans.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -10,9 +10,13 @@ namespace clang::lifetimes::internal { -void Loan::dump(llvm::raw_ostream &OS) const { - OS << ID << " (Path: "; +void PathLoan::dump(llvm::raw_ostream &OS) const { + OS << getID() << " (Path: "; OS << Path.D->getNameAsString() << ")"; } +void PlaceholderLoan::dump(llvm::raw_ostream &OS) const { + OS << getID() << " (Placeholder loan)"; +} + } // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 69151f77e4fcf..e769b23ee0bcf 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -3529,6 +3529,19 @@ class LifetimeSafetyReporterImpl : public LifetimeSafetyReporter { << EscapeExpr->getEndLoc(); } + void suggestAnnotation(const ParmVarDecl *PVD, + const Expr *EscapeExpr) override { + SourceLocation InsertionPoint = Lexer::getLocForEndOfToken( + PVD->getEndLoc(), 0, S.getSourceManager(), S.getLangOpts()); + S.Diag(PVD->getBeginLoc(), diag::warn_lifetime_safety_suggest_lifetimebound) + << PVD->getSourceRange() + << FixItHint::CreateInsertion(InsertionPoint, + " [[clang::lifetimebound]]"); + S.Diag(EscapeExpr->getBeginLoc(), + diag::note_lifetime_safety_suggestion_returned_here) + << EscapeExpr->getSourceRange(); + } + private: Sema &S; }; diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp new file mode 100644 index 0000000000000..c0f675a301d14 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety-suggestions -verify %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor + MyObj operator+(MyObj); +}; + +struct [[gsl::Pointer()]] View { + View(const MyObj&); // Borrows from MyObj + View(); + void use() const; +}; + +View return_view_directly (View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View conditional_return_view ( + View a, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + View b, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + bool c) { + View res; + if (c) + res = a; + else + res = b; + return res; // expected-note 2 {{param returned here}} +} + +// FIXME: Fails to generate lifetime suggestion for reference types as these are not handled currently. +MyObj& return_reference (MyObj& a, MyObj& b, bool c) { + if(c) { + return a; + } + return b; +} + +// FIXME: Fails to generate lifetime suggestion for reference types as these are not handled currently. +View return_view_from_reference (MyObj& p) { + return p; +} + +int* return_pointer_directly (int* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +MyObj* return_pointer_object (MyObj* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View only_one_paramter_annotated (View a [[clang::lifetimebound]], + View b, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + bool c) { + if(c) + return a; + return b; // expected-note {{param returned here}} +} + +View reassigned_to_another_parameter ( + View a, + View b) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + a = b; + return a; // expected-note {{param returned here}} +} + +struct ReturnsSelf { + const ReturnsSelf& get() const { + return *this; + } +}; + +struct ViewProvider { + MyObj data; + View getView() const { + return data; + } +}; + +// FIXME: Fails to generate lifetime suggestions for the implicit 'this' parameter, as this feature is not yet implemented. +void test_get_on_temporary() { + const ReturnsSelf& s_ref = ReturnsSelf().get(); + (void)s_ref; +} + +// FIXME: Fails to generate lifetime suggestions for the implicit 'this' parameter, as this feature is not yet implemented. +void test_getView_on_temporary() { + View sv = ViewProvider{1}.getView(); + (void)sv; +} + +//===----------------------------------------------------------------------===// +// Negative Test Cases +//===----------------------------------------------------------------------===// + +View already_annotated(View a [[clang::lifetimebound]]) { + return a; +} + +MyObj return_obj_by_value(MyObj& p) { + return p; +} + +MyObj GlobalMyObj; +View Global = GlobalMyObj; +View Reassigned(View a) { + a = Global; + return a; +} diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp index a895475013c98..fee4e79e27d03 100644 --- a/clang/unittests/Analysis/LifetimeSafetyTest.cpp +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -114,9 +114,10 @@ class LifetimeTestHelper { return {}; } std::vector LID; - for (const Loan &L : Analysis.getFactManager().getLoanMgr().getLoans()) - if (L.Path.D == VD) - LID.push_back(L.ID); + for (const Loan *L : Analysis.getFactManager().getLoanMgr().getLoans()) + if (const auto *BL = dyn_cast(L)) + if (BL->getAccessPath().D == VD) + LID.push_back(L->getID()); if (LID.empty()) { ADD_FAILURE() << "Loan for '" << VarName << "' not found."; return {};