diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index b05e9c6c56ed0..786f6f5b215c9 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -1082,6 +1082,14 @@ clang:openmp: - llvm/unittests/Frontend/OpenMP* - llvm/test/Transforms/OpenMP/** +clang:temporal-safety: + - clang/include/clang/Analysis/Analyses/LifetimeSafety/** + - clang/lib/Analysis/LifetimeSafety/** + - clang/unittests/Analysis/LifetimeSafety* + - clang/test/Sema/*lifetime-safety* + - clang/test/Sema/*lifetime-analysis* + - clang/test/Analysis/LifetimeSafety/** + clang:as-a-library: - clang/tools/libclang/** - clang/bindings/** diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h deleted file mode 100644 index 9998702a41cab..0000000000000 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the entry point for a dataflow-based static analysis -// that checks for C++ lifetime violations. -// -// The analysis is based on the concepts of "origins" and "loans" to track -// pointer lifetimes and detect issues like use-after-free and dangling -// pointers. See the RFC for more details: -// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H -#include "clang/AST/DeclBase.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -namespace clang { - -void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC); - -} // namespace clang - -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h new file mode 100644 index 0000000000000..03636be7d00c3 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Checker.h @@ -0,0 +1,35 @@ +//===- Checker.h - C++ Lifetime Safety Analysis -*----------- C++-*-=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines and enforces the lifetime safety policy. It detects +// use-after-free errors by examining loan expiration points and checking if +// any live origins hold the expired loans. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" + +namespace clang::lifetimes::internal { + +/// Runs the lifetime checker, which detects use-after-free errors by +/// examining loan expiration points and checking if any live origins hold +/// the expired loan. +void runLifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter); + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_CHECKER_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h new file mode 100644 index 0000000000000..b5f7f8746186a --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Facts.h @@ -0,0 +1,252 @@ +//===- Facts.h - Lifetime Analysis Facts and Fact Manager ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Facts, which are atomic lifetime-relevant events (such as +// loan issuance, loan expiration, origin flow, and use), and the FactManager, +// which manages the storage and retrieval of facts for each CFG block. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include + +namespace clang::lifetimes::internal { + +using FactID = utils::ID; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + /// This can also optionally kill the destination origin before flowing into + /// it. Otherwise, the source's loan set is merged into the destination's + /// loan set. + OriginFlow, + /// An origin is used (eg. appears as l-value expression like DeclRefExpr). + Use, + /// A marker for a specific point in the code, for testing. + TestPoint, + /// An origin that escapes the function scope (e.g., via return). + OriginEscapes, + }; + +private: + Kind K; + FactID ID; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + void setID(FactID ID) { this->ID = ID; } + FactID getID() const { return ID; } + + template const T *getAs() const { + if (T::classof(this)) + return static_cast(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const; +}; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const override; +}; + +class ExpireFact : public Fact { + LoanID LID; + SourceLocation ExpiryLoc; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID, SourceLocation ExpiryLoc) + : Fact(Kind::Expire), LID(LID), ExpiryLoc(ExpiryLoc) {} + + LoanID getLoanID() const { return LID; } + SourceLocation getExpiryLoc() const { return ExpiryLoc; } + + void dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const override; +}; + +class OriginFlowFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + // True if the destination origin should be killed (i.e., its current loans + // cleared) before the source origin's loans are flowed into it. + bool KillDest; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginFlow; + } + + OriginFlowFact(OriginID OIDDest, OriginID OIDSrc, bool KillDest) + : Fact(Kind::OriginFlow), OIDDest(OIDDest), OIDSrc(OIDSrc), + KillDest(KillDest) {} + + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + bool getKillDest() const { return KillDest; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class OriginEscapesFact : public Fact { + OriginID OID; + const Expr *EscapeExpr; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::OriginEscapes; + } + + OriginEscapesFact(OriginID OID, const Expr *EscapeExpr) + : Fact(Kind::OriginEscapes), OID(OID), EscapeExpr(EscapeExpr) {} + OriginID getEscapedOriginID() const { return OID; } + const Expr *getEscapeExpr() const { return EscapeExpr; }; + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +class UseFact : public Fact { + const Expr *UseExpr; + OriginID OID; + // True if this use is a write operation (e.g., left-hand side of assignment). + // Write operations are exempted from use-after-free checks. + bool IsWritten = false; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Use; } + + UseFact(const Expr *UseExpr, OriginManager &OM) + : Fact(Kind::Use), UseExpr(UseExpr), OID(OM.get(*UseExpr)) {} + + OriginID getUsedOrigin() const { return OID; } + const Expr *getUseExpr() const { return UseExpr; } + void markAsWritten() { IsWritten = true; } + bool isWritten() const { return IsWritten; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const override; +}; + +/// A dummy-fact used to mark a specific point in the code for testing. +/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast. +class TestPointFact : public Fact { + StringRef Annotation; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; } + + explicit TestPointFact(StringRef Annotation) + : Fact(Kind::TestPoint), Annotation(Annotation) {} + + StringRef getAnnotation() const { return Annotation; } + + void dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const override; +}; + +class FactManager { +public: + void init(const CFG &Cfg) { + assert(BlockToFacts.empty() && "FactManager already initialized"); + BlockToFacts.resize(Cfg.getNumBlockIDs()); + } + + llvm::ArrayRef getFacts(const CFGBlock *B) const { + return BlockToFacts[B->getBlockID()]; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { + if (!NewFacts.empty()) + BlockToFacts[B->getBlockID()].assign(NewFacts.begin(), NewFacts.end()); + } + + template + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate(); + FactType *Res = new (Mem) FactType(std::forward(args)...); + Res->setID(NextFactID++); + return Res; + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const; + + /// Retrieves program points that were specially marked in the source code + /// for testing. + /// + /// The analysis recognizes special function calls of the form + /// `void("__lifetime_test_point_")` as test points. This method returns + /// a map from the annotation string () to the corresponding + /// `ProgramPoint`. This allows test harnesses to query the analysis state at + /// user-defined locations in the code. + /// \note This is intended for testing only. + llvm::StringMap getTestPoints() const; + /// Retrieves all the facts in the block containing Program Point P. + /// \note This is intended for testing only. + llvm::ArrayRef getBlockContaining(ProgramPoint P) const; + + unsigned getNumFacts() const { return NextFactID.Value; } + + LoanManager &getLoanMgr() { return LoanMgr; } + const LoanManager &getLoanMgr() const { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + const OriginManager &getOriginMgr() const { return OriginMgr; } + +private: + FactID NextFactID{0}; + LoanManager LoanMgr; + OriginManager OriginMgr; + /// Facts for each CFG block, indexed by block ID. + llvm::SmallVector> BlockToFacts; + llvm::BumpPtrAllocator FactAllocator; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h new file mode 100644 index 0000000000000..a1acd8615afdd --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -0,0 +1,112 @@ +//===- FactsGenerator.h - Lifetime Facts Generation -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FactsGenerator, which traverses the AST to generate +// lifetime-relevant facts (such as loan issuance, expiration, origin flow, +// and use) from CFG statements. These facts are used by the dataflow analyses +// to track pointer lifetimes and detect use-after-free errors. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H + +#include "clang/AST/StmtVisitor.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang::lifetimes::internal { + +class FactsGenerator : public ConstStmtVisitor { + using Base = ConstStmtVisitor; + +public: + FactsGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run(); + + void VisitDeclStmt(const DeclStmt *DS); + void VisitDeclRefExpr(const DeclRefExpr *DRE); + void VisitCXXConstructExpr(const CXXConstructExpr *CCE); + void VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE); + void VisitCallExpr(const CallExpr *CE); + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N); + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE); + void VisitUnaryOperator(const UnaryOperator *UO); + void VisitReturnStmt(const ReturnStmt *RS); + void VisitBinaryOperator(const BinaryOperator *BO); + void VisitConditionalOperator(const ConditionalOperator *CO); + void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE); + void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE); + void VisitInitListExpr(const InitListExpr *ILE); + void VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *MTE); + +private: + void handleLifetimeEnds(const CFGLifetimeEnds &LifetimeEnds); + + void handleGSLPointerConstruction(const CXXConstructExpr *CCE); + + /// Checks if a call-like expression creates a borrow by passing a value to a + /// reference parameter, creating an IssueFact if it does. + /// \param IsGslConstruction True if this is a GSL construction where all + /// argument origins should flow to the returned origin. + void handleFunctionCall(const Expr *Call, const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction = false); + + template + void flowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back(FactMgr.createFact( + DestOID, SrcOID, /*KillDest=*/false)); + } + + template + void killAndFlowOrigin(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact(DestOID, SrcOID, /*KillDest=*/true)); + } + + /// Checks if the expression is a `void("__lifetime_test_point_...")` cast. + /// If so, creates a `TestPointFact` and returns true. + bool handleTestPoint(const CXXFunctionalCastExpr *FCE); + + void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr); + + // A DeclRefExpr will be treated as a use of the referenced decl. It will be + // checked for use-after-free unless it is later marked as being written to + // (e.g. on the left-hand side of an assignment). + void handleUse(const DeclRefExpr *DRE); + + void markUseAsWrite(const DeclRefExpr *DRE); + + llvm::SmallVector issuePlaceholderLoans(); + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector CurrentBlockFacts; + // Collect origins that escape the function in this block (OriginEscapesFact), + // appended at the end of CurrentBlockFacts to ensure they appear after + // ExpireFact entries. + llvm::SmallVector EscapesInCurrentBlock; + // To distinguish between reads and writes for use-after-free checks, this map + // stores the `UseFact` for each `DeclRefExpr`. We initially identify all + // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use + // corresponding to the left-hand side is updated to be a "write", thereby + // exempting it from the check. + llvm::DenseMap UseFacts; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_FACTSGENERATOR_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h new file mode 100644 index 0000000000000..1a16fb82f9a84 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h @@ -0,0 +1,48 @@ +//===- LifetimeAnnotations.h - -*--------------- C++--------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Helper functions to inspect and infer lifetime annotations. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H + +#include "clang/AST/DeclCXX.h" + +namespace clang ::lifetimes { + +/// Returns the most recent declaration of the method to ensure all +/// lifetime-bound attributes from redeclarations are considered. +const FunctionDecl *getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD); + +/// Returns the most recent declaration of the method to ensure all +/// lifetime-bound attributes from redeclarations are considered. +const CXXMethodDecl * +getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD); + +// Return true if this is an "normal" assignment operator. +// We assume that a normal assignment operator always returns *this, that is, +// an lvalue reference that is the same type as the implicit object parameter +// (or the LHS for a non-member operator==). +bool isNormalAssignmentOperator(const FunctionDecl *FD); + +/// Returns true if this is an assignment operator where the parameter +/// has the lifetimebound attribute. +bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD); + +/// Returns true if the implicit object parameter (this) should be considered +/// lifetimebound, either due to an explicit lifetimebound attribute on the +/// method or because it's a normal assignment operator. +bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); + +// Tells whether the type is annotated with [[gsl::Pointer]]. +bool isGslPointerType(QualType QT); +// Tells whether the type is annotated with [[gsl::Owner]]. +bool isGslOwnerType(QualType QT); + +} // namespace clang::lifetimes + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMEANNOTATIONS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h new file mode 100644 index 0000000000000..31fae55f60486 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h @@ -0,0 +1,96 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the main entry point and orchestrator for the C++ Lifetime +// Safety Analysis. It coordinates the entire analysis pipeline: fact +// generation, loan propagation, live origins analysis, and enforcement of +// lifetime safety policy. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" + +namespace clang::lifetimes { + +/// Enum to track the confidence level of a potential error. +enum class Confidence : uint8_t { + None, + Maybe, // Reported as a potential error (-Wlifetime-safety-strict) + Definite // Reported as a definite error (-Wlifetime-safety-permissive) +}; + +class LifetimeSafetyReporter { +public: + LifetimeSafetyReporter() = default; + virtual ~LifetimeSafetyReporter() = default; + + virtual void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, + Confidence Confidence) {} + + virtual void reportUseAfterReturn(const Expr *IssueExpr, + const Expr *EscapeExpr, + SourceLocation ExpiryLoc, + Confidence Confidence) {} + + // Suggests lifetime bound annotations for function paramters + virtual void suggestAnnotation(const ParmVarDecl *PVD, + const Expr *EscapeExpr) {} +}; + +/// The main entry point for the analysis. +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + +namespace internal { +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFactory{/*canonicalize=*/false}; + LoanSet::Factory LoanSetFactory{/*canonicalize=*/false}; + LivenessMap::Factory LivenessMapFactory{/*canonicalize=*/false}; +}; + +/// Running the lifetime safety analysis and querying its results. It +/// encapsulates the various dataflow analyses. +class LifetimeSafetyAnalysis { +public: + LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter); + + void run(); + + /// \note These are provided only for testing purposes. + LoanPropagationAnalysis &getLoanPropagation() const { + return *LoanPropagation; + } + LiveOriginsAnalysis &getLiveOrigins() const { return *LiveOrigins; } + FactManager &getFactManager() { return FactMgr; } + +private: + AnalysisDeclContext &AC; + LifetimeSafetyReporter *Reporter; + LifetimeFactory Factory; + FactManager FactMgr; + std::unique_ptr LiveOrigins; + std::unique_ptr LoanPropagation; +}; +} // namespace internal +} // namespace clang::lifetimes + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h new file mode 100644 index 0000000000000..8ad17db83499d --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h @@ -0,0 +1,99 @@ +//===- LiveOrigins.h - Live Origins Analysis -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveOriginAnalysis, a backward dataflow analysis that +// determines which origins are "live" at each program point. An origin is +// "live" at a program point if there's a potential future use of a pointer it +// is associated with. Liveness is "generated" by a use of an origin (e.g., a +// `UseFact` from a read of a pointer) and is "killed" (i.e., it stops being +// live) when the origin is replaced by flowing a different origin into it +// (e.g., an OriginFlow from an assignment that kills the destination). +// +// This information is used for detecting use-after-free errors, as it allows us +// to check if a live origin holds a loan to an object that has already expired. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/Support/Debug.h" + +namespace clang::lifetimes::internal { + +using CausingFactType = + ::llvm::PointerUnion; + +enum class LivenessKind : uint8_t { + Dead, // Not alive + Maybe, // Live on some path but not all paths (may-be-live) + Must // Live on all paths (must-be-live) +}; + +/// Information about why an origin is live at a program point. +struct LivenessInfo { + /// The use that makes the origin live. If liveness is propagated from + /// multiple uses along different paths, this will point to the use appearing + /// earlier in the translation unit. + /// This is 'null' when the origin is not live. + CausingFactType CausingFact; + + /// The kind of liveness of the origin. + /// `Must`: The origin is live on all control-flow paths from the current + /// point to the function's exit (i.e. the current point is dominated by a set + /// of uses). + /// `Maybe`: indicates it is live on some but not all paths. + /// + /// This determines the diagnostic's confidence level. + /// `Must`-be-alive at expiration implies a definite use-after-free, + /// while `Maybe`-be-alive suggests a potential one on some paths. + LivenessKind Kind; + + LivenessInfo() : CausingFact(nullptr), Kind(LivenessKind::Dead) {} + LivenessInfo(CausingFactType CF, LivenessKind K) : CausingFact(CF), Kind(K) {} + + bool operator==(const LivenessInfo &Other) const { + return CausingFact == Other.CausingFact && Kind == Other.Kind; + } + bool operator!=(const LivenessInfo &Other) const { return !(*this == Other); } + + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddPointer(CausingFact.getOpaqueValue()); + IDBuilder.Add(Kind); + } +}; + +using LivenessMap = llvm::ImmutableMap; + +class LiveOriginsAnalysis { +public: + LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF); + ~LiveOriginsAnalysis(); + + /// Returns the set of origins that are live at a specific program point, + /// along with the the details of the liveness. + LivenessMap getLiveOriginsAt(ProgramPoint P) const; + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LIVE_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h new file mode 100644 index 0000000000000..447d05ca898fd --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h @@ -0,0 +1,48 @@ +//===- LoanPropagation.h - Loan Propagation Analysis -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoanPropagationAnalysis, a forward dataflow analysis +// that tracks which loans each origin holds at each program point. Loans +// represent borrows of storage locations and are propagated through the +// program as pointers are copied or assigned. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal { + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet; +using OriginLoanMap = llvm::ImmutableMap; + +class LoanPropagationAnalysis { +public: + LoanPropagationAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory); + ~LoanPropagationAnalysis(); + + LoanSet getLoans(OriginID OID, ProgramPoint P) const; + +private: + class Impl; + std::unique_ptr PImpl; +}; + +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOAN_PROPAGATION_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h new file mode 100644 index 0000000000000..e9bccd4773622 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Loans.h @@ -0,0 +1,149 @@ +//===- Loans.h - Loan and Access Path Definitions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Loan and AccessPath structures, which represent +// borrows of storage locations, and the LoanManager, which manages the +// creation and retrieval of loans during lifetime analysis. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H + +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang::lifetimes::internal { + +using LoanID = utils::ID; +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, LoanID ID) { + return OS << ID.Value; +} + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. +struct AccessPath { + const clang::ValueDecl *D; + + AccessPath(const clang::ValueDecl *D) : D(D) {} +}; + +/// An abstract base class for a single "Loan" which represents lending a +/// storage in memory. +class Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet +public: + enum class Kind : uint8_t { + /// A loan with an access path to a storage location. + Path, + /// A non-expiring placeholder loan for a parameter, representing a borrow + /// from the function's caller. + Placeholder + }; + + Loan(Kind K, LoanID ID) : K(K), ID(ID) {} + virtual ~Loan() = default; + + Kind getKind() const { return K; } + LoanID getID() const { return ID; } + + virtual void dump(llvm::raw_ostream &OS) const = 0; + +private: + const Kind K; + const LoanID ID; +}; + +/// PathLoan represents lending a storage location that is visible within the +/// function's scope (e.g., a local variable on stack). +class PathLoan : public Loan { + AccessPath Path; + /// The expression that creates the loan, e.g., &x. + const Expr *IssueExpr; + +public: + PathLoan(LoanID ID, AccessPath Path, const Expr *IssueExpr) + : Loan(Kind::Path, ID), Path(Path), IssueExpr(IssueExpr) {} + + const AccessPath &getAccessPath() const { return Path; } + const Expr *getIssueExpr() const { return IssueExpr; } + + void dump(llvm::raw_ostream &OS) const override; + + static bool classof(const Loan *L) { return L->getKind() == Kind::Path; } +}; + +/// A placeholder loan held by a function parameter, representing a borrow from +/// the caller's scope. +/// +/// Created at function entry for each pointer or reference parameter with an +/// origin. Unlike PathLoan, placeholder loans: +/// - Have no IssueExpr (created at function entry, not at a borrow site) +/// - Have no AccessPath (the borrowed object is not visible to the function) +/// - Do not currently expire, but may in the future when modeling function +/// invalidations (e.g., vector::push_back) +/// +/// When a placeholder loan escapes the function (e.g., via return), it +/// indicates the parameter should be marked [[clang::lifetimebound]], enabling +/// lifetime annotation suggestions. +class PlaceholderLoan : public Loan { + /// The function parameter that holds this placeholder loan. + const ParmVarDecl *PVD; + +public: + PlaceholderLoan(LoanID ID, const ParmVarDecl *PVD) + : Loan(Kind::Placeholder, ID), PVD(PVD) {} + + const ParmVarDecl *getParmVarDecl() const { return PVD; } + + void dump(llvm::raw_ostream &OS) const override; + + static bool classof(const Loan *L) { + return L->getKind() == Kind::Placeholder; + } +}; + +/// Manages the creation, storage and retrieval of loans. +class LoanManager { +public: + LoanManager() = default; + + template + LoanType *createLoan(Args &&...args) { + static_assert( + std::is_same_v || + std::is_same_v, + "createLoan can only be used with PathLoan or PlaceholderLoan"); + void *Mem = LoanAllocator.Allocate(); + auto *NewLoan = + new (Mem) LoanType(getNextLoanID(), std::forward(args)...); + AllLoans.push_back(NewLoan); + return NewLoan; + } + + const Loan *getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllLoans; + llvm::BumpPtrAllocator LoanAllocator; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_LOANS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h new file mode 100644 index 0000000000000..56b9010f41fa2 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -0,0 +1,93 @@ +//===- Origins.h - Origin and Origin Management ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines Origins, which represent the set of possible loans a +// pointer-like object could hold, and the OriginManager, which manages the +// creation, storage, and retrieval of origins for variables and expressions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" + +namespace clang::lifetimes::internal { + +using OriginID = utils::ID; + +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OriginID ID) { + return OS << ID.Value; +} + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + /// A pointer to the AST node that this origin represents. This union + /// distinguishes between origins from declarations (variables or parameters) + /// and origins from expressions. + llvm::PointerUnion Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast(); + } +}; + +/// Manages the creation, storage, and retrieval of origins for pointer-like +/// variables and expressions. +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D); + Origin &addOrigin(OriginID ID, const clang::Expr &E); + + // TODO: Mark this method as const once we remove the call to getOrCreate. + OriginID get(const Expr &E); + + OriginID get(const ValueDecl &D); + + OriginID getOrCreate(const Expr &E); + + const Origin &getOrigin(OriginID ID) const; + + llvm::ArrayRef getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D); + + unsigned getNumOrigins() const { return NextOriginID.Value; } + + void dump(OriginID OID, llvm::raw_ostream &OS) const; + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector AllOrigins; + llvm::DenseMap DeclToOriginID; + llvm::DenseMap ExprToOriginID; +}; +} // namespace clang::lifetimes::internal + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_ORIGINS_H diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h new file mode 100644 index 0000000000000..4183cabe860a7 --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Utils.h @@ -0,0 +1,118 @@ +//===- Utils.h - Utility Functions for Lifetime Safety --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file provides utilities for the lifetime safety analysis, including +// join operations for LLVM's immutable data structures. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H + +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" + +namespace clang::lifetimes::internal::utils { + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template struct ID { + uint32_t Value = 0; + + bool operator==(const ID &Other) const { return Value == Other.Value; } + bool operator!=(const ID &Other) const { return !(*this == Other); } + bool operator<(const ID &Other) const { return Value < Other.Value; } + ID operator++(int) { + ID Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +/// Computes the union of two ImmutableSets. +template +static llvm::ImmutableSet join(llvm::ImmutableSet A, + llvm::ImmutableSet B, + typename llvm::ImmutableSet::Factory &F) { + if (A.getHeight() < B.getHeight()) + std::swap(A, B); + for (const T &E : B) + A = F.add(A, E); + return A; +} + +/// Describes the strategy for joining two `ImmutableMap` instances, primarily +/// differing in how they handle keys that are unique to one of the maps. +/// +/// A `Symmetric` join is universally correct, while an `Asymmetric` join +/// serves as a performance optimization. The latter is applicable only when the +/// join operation possesses a left identity element, allowing for a more +/// efficient, one-sided merge. +enum class JoinKind { + /// A symmetric join applies the `JoinValues` operation to keys unique to + /// either map, ensuring that values from both maps contribute to the result. + Symmetric, + /// An asymmetric join preserves keys unique to the first map as-is, while + /// applying the `JoinValues` operation only to keys unique to the second map. + Asymmetric, +}; + +/// Computes the key-wise union of two ImmutableMaps. +// TODO(opt): This key-wise join is a performance bottleneck. A more +// efficient merge could be implemented using a Patricia Trie or HAMT +// instead of the current AVL-tree-based ImmutableMap. +template +static llvm::ImmutableMap +join(const llvm::ImmutableMap &A, const llvm::ImmutableMap &B, + typename llvm::ImmutableMap::Factory &F, Joiner JoinValues, + JoinKind Kind) { + if (A.getHeight() < B.getHeight()) + return join(B, A, F, JoinValues, Kind); + + // For each element in B, join it with the corresponding element in A + // (or with an empty value if it doesn't exist in A). + llvm::ImmutableMap Res = A; + for (const auto &Entry : B) { + const K &Key = Entry.first; + const V &ValB = Entry.second; + Res = F.add(Res, Key, JoinValues(A.lookup(Key), &ValB)); + } + if (Kind == JoinKind::Symmetric) { + for (const auto &Entry : A) { + const K &Key = Entry.first; + const V &ValA = Entry.second; + if (!B.contains(Key)) + Res = F.add(Res, Key, JoinValues(&ValA, nullptr)); + } + } + return Res; +} +} // namespace clang::lifetimes::internal::utils + +namespace llvm { +template +struct DenseMapInfo> { + using ID = clang::lifetimes::internal::utils::ID; + + static inline ID getEmptyKey() { + return {DenseMapInfo::getEmptyKey()}; + } + + static inline ID getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey()}; + } + + static unsigned getHashValue(const ID &Val) { + return DenseMapInfo::getHashValue(Val.Value); + } + + static bool isEqual(const ID &LHS, const ID &RHS) { return LHS == RHS; } +}; +} // namespace llvm + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_UTILS_H diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b986f93200452..d5b6fc7aa43fb 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -535,7 +535,16 @@ def Dangling : DiagGroup<"dangling", [DanglingAssignment, DanglingGsl, ReturnStackAddress]>; -def LifetimeSafety : DiagGroup<"experimental-lifetime-safety">; +def LifetimeSafetyPermissive : DiagGroup<"experimental-lifetime-safety-permissive">; +def LifetimeSafetyStrict : DiagGroup<"experimental-lifetime-safety-strict">; +def LifetimeSafety : DiagGroup<"experimental-lifetime-safety", + [LifetimeSafetyPermissive, LifetimeSafetyStrict]> { + code Documentation = [{ + Experimental warnings to detect use-after-free and related temporal safety bugs based on lifetime safety analysis. + }]; +} +def LifetimeSafetySuggestions + : DiagGroup<"experimental-lifetime-safety-suggestions">; def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6749e2fd3502f..d3ec2385e9783 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10727,9 +10727,33 @@ def warn_dangling_reference_captured_by_unknown : Warning< "object whose reference is captured will be destroyed at the end of " "the full-expression">, InGroup; -def warn_experimental_lifetime_safety_dummy_warning : Warning< - "todo: remove this warning after we have atleast one warning based on the lifetime analysis">, - InGroup, DefaultIgnore; +// Diagnostics based on the Lifetime safety analysis. +def warn_lifetime_safety_loan_expires_permissive : Warning< + "object whose reference is captured does not live long enough">, + InGroup, DefaultIgnore; +def warn_lifetime_safety_loan_expires_strict : Warning< + "object whose reference is captured may not live long enough">, + InGroup, DefaultIgnore; + +def warn_lifetime_safety_return_stack_addr_permissive + : Warning<"address of stack memory is returned later">, + InGroup, + DefaultIgnore; +def warn_lifetime_safety_return_stack_addr_strict + : Warning<"address of stack memory may be returned later">, + InGroup, + DefaultIgnore; + +def note_lifetime_safety_used_here : Note<"later used here">; +def note_lifetime_safety_destroyed_here : Note<"destroyed here">; +def note_lifetime_safety_returned_here : Note<"returned here">; + +def warn_lifetime_safety_suggest_lifetimebound + : Warning<"param should be marked [[clang::lifetimebound]]">, + InGroup, + DefaultIgnore; + +def note_lifetime_safety_suggestion_returned_here : Note<"param returned here">; // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 88fc191923aa2..0e720ea41e5cb 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1675,6 +1675,12 @@ std::unique_ptr CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) { assert(Succ == &cfg->getExit()); Block = nullptr; // the EXIT block is empty. Create all other blocks lazily. + // Add parameters to the initial scope to handle their dtos and lifetime ends. + LocalScope *paramScope = nullptr; + if (const auto *FD = dyn_cast_or_null(D)) + for (ParmVarDecl *PD : FD->parameters()) + paramScope = addLocalScopeForVarDecl(PD, paramScope); + if (BuildOpts.AddImplicitDtors) if (const CXXDestructorDecl *DD = dyn_cast_or_null(D)) addImplicitDtorsForDestructor(DD); @@ -2255,6 +2261,11 @@ LocalScope* CFGBuilder::addLocalScopeForVarDecl(VarDecl *VD, if (!VD->hasLocalStorage()) return Scope; + // Reference parameters are aliases to objects that live elsewhere, so they + // don't require automatic destruction or lifetime tracking. + if (isa(VD) && VD->getType()->isReferenceType()) + return Scope; + if (!BuildOpts.AddLifetime && !BuildOpts.AddScopes && !needsAutomaticDestruction(VD)) { assert(BuildOpts.AddImplicitDtors); @@ -5680,8 +5691,15 @@ class StmtPrinterHelper : public PrinterHelper { bool handleDecl(const Decl *D, raw_ostream &OS) { DeclMapTy::iterator I = DeclMap.find(D); - if (I == DeclMap.end()) + if (I == DeclMap.end()) { + // ParmVarDecls are not declared in the CFG itself, so they do not appear + // in DeclMap. + if (auto *PVD = dyn_cast_or_null(D)) { + OS << "[Parm: " << PVD->getNameAsString() << "]"; + return true; + } return false; + } if (currentBlock >= 0 && I->second.first == (unsigned) currentBlock && I->second.second == currStmt) { diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 0523d92480cb3..1dbd4153d856f 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,7 +21,6 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp - LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp @@ -50,3 +49,4 @@ add_clang_library(clangAnalysis add_subdirectory(plugins) add_subdirectory(FlowSensitive) +add_subdirectory(LifetimeSafety) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp deleted file mode 100644 index bf67bea6c9933..0000000000000 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ /dev/null @@ -1,762 +0,0 @@ -//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Analysis/Analyses/LifetimeSafety.h" -#include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/StmtVisitor.h" -#include "clang/AST/Type.h" -#include "clang/Analysis/Analyses/PostOrderCFGView.h" -#include "clang/Analysis/AnalysisDeclContext.h" -#include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TimeProfiler.h" -#include - -namespace clang { -namespace { - -/// Represents the storage location being borrowed, e.g., a specific stack -/// variable. -/// TODO: Model access paths of other types, e.g., s.field, heap and globals. -struct AccessPath { - const clang::ValueDecl *D; - - AccessPath(const clang::ValueDecl *D) : D(D) {} -}; - -/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. -/// Used for giving ID to loans and origins. -template struct ID { - uint32_t Value = 0; - - bool operator==(const ID &Other) const { return Value == Other.Value; } - bool operator!=(const ID &Other) const { return !(*this == Other); } - bool operator<(const ID &Other) const { return Value < Other.Value; } - ID operator++(int) { - ID Tmp = *this; - ++Value; - return Tmp; - } - void Profile(llvm::FoldingSetNodeID &IDBuilder) const { - IDBuilder.AddInteger(Value); - } -}; - -template -inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID ID) { - return OS << ID.Value; -} - -using LoanID = ID; -using OriginID = ID; - -/// Information about a single borrow, or "Loan". A loan is created when a -/// reference or pointer is created. -struct Loan { - /// TODO: Represent opaque loans. - /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it - /// is represented as empty LoanSet - LoanID ID; - AccessPath Path; - SourceLocation IssueLoc; - - Loan(LoanID id, AccessPath path, SourceLocation loc) - : ID(id), Path(path), IssueLoc(loc) {} -}; - -/// An Origin is a symbolic identifier that represents the set of possible -/// loans a pointer-like object could hold at any given time. -/// TODO: Enhance the origin model to handle complex types, pointer -/// indirection and reborrowing. The plan is to move from a single origin per -/// variable/expression to a "list of origins" governed by the Type. -/// For example, the type 'int**' would have two origins. -/// See discussion: -/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 -struct Origin { - OriginID ID; - /// A pointer to the AST node that this origin represents. This union - /// distinguishes between origins from declarations (variables or parameters) - /// and origins from expressions. - llvm::PointerUnion Ptr; - - Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} - Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} - - const clang::ValueDecl *getDecl() const { - return Ptr.dyn_cast(); - } - const clang::Expr *getExpr() const { - return Ptr.dyn_cast(); - } -}; - -/// Manages the creation, storage and retrieval of loans. -class LoanManager { -public: - LoanManager() = default; - - Loan &addLoan(AccessPath Path, SourceLocation Loc) { - AllLoans.emplace_back(getNextLoanID(), Path, Loc); - return AllLoans.back(); - } - - const Loan &getLoan(LoanID ID) const { - assert(ID.Value < AllLoans.size()); - return AllLoans[ID.Value]; - } - llvm::ArrayRef getLoans() const { return AllLoans; } - -private: - LoanID getNextLoanID() { return NextLoanID++; } - - LoanID NextLoanID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllLoans; -}; - -/// Manages the creation, storage, and retrieval of origins for pointer-like -/// variables and expressions. -class OriginManager { -public: - OriginManager() = default; - - Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { - AllOrigins.emplace_back(ID, &D); - return AllOrigins.back(); - } - Origin &addOrigin(OriginID ID, const clang::Expr &E) { - AllOrigins.emplace_back(ID, &E); - return AllOrigins.back(); - } - - OriginID get(const Expr &E) { - // Origin of DeclRefExpr is that of the declaration it refers to. - if (const auto *DRE = dyn_cast(&E)) - return get(*DRE->getDecl()); - auto It = ExprToOriginID.find(&E); - // TODO: This should be an assert(It != ExprToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == ExprToOriginID.end()) - return getOrCreate(E); - - return It->second; - } - - OriginID get(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - // TODO: This should be an assert(It != DeclToOriginID.end()). The current - // implementation falls back to getOrCreate to avoid crashing on - // yet-unhandled pointer expressions, creating an empty origin for them. - if (It == DeclToOriginID.end()) - return getOrCreate(D); - - return It->second; - } - - OriginID getOrCreate(const Expr &E) { - auto It = ExprToOriginID.find(&E); - if (It != ExprToOriginID.end()) - return It->second; - - if (const auto *DRE = dyn_cast(&E)) { - // Origin of DeclRefExpr is that of the declaration it refers to. - return getOrCreate(*DRE->getDecl()); - } - OriginID NewID = getNextOriginID(); - addOrigin(NewID, E); - ExprToOriginID[&E] = NewID; - return NewID; - } - - const Origin &getOrigin(OriginID ID) const { - assert(ID.Value < AllOrigins.size()); - return AllOrigins[ID.Value]; - } - - llvm::ArrayRef getOrigins() const { return AllOrigins; } - - OriginID getOrCreate(const ValueDecl &D) { - auto It = DeclToOriginID.find(&D); - if (It != DeclToOriginID.end()) - return It->second; - OriginID NewID = getNextOriginID(); - addOrigin(NewID, D); - DeclToOriginID[&D] = NewID; - return NewID; - } - -private: - OriginID getNextOriginID() { return NextOriginID++; } - - OriginID NextOriginID{0}; - /// TODO(opt): Profile and evaluate the usefullness of small buffer - /// optimisation. - llvm::SmallVector AllOrigins; - llvm::DenseMap DeclToOriginID; - llvm::DenseMap ExprToOriginID; -}; - -/// An abstract base class for a single, atomic lifetime-relevant event. -class Fact { - -public: - enum class Kind : uint8_t { - /// A new loan is issued from a borrow expression (e.g., &x). - Issue, - /// A loan expires as its underlying storage is freed (e.g., variable goes - /// out of scope). - Expire, - /// An origin is propagated from a source to a destination (e.g., p = q). - AssignOrigin, - /// An origin escapes the function by flowing into the return value. - ReturnOfOrigin - }; - -private: - Kind K; - -protected: - Fact(Kind K) : K(K) {} - -public: - virtual ~Fact() = default; - Kind getKind() const { return K; } - - template const T *getAs() const { - if (T::classof(this)) - return static_cast(this); - return nullptr; - } - - virtual void dump(llvm::raw_ostream &OS) const { - OS << "Fact (Kind: " << static_cast(K) << ")\n"; - } -}; - -class IssueFact : public Fact { - LoanID LID; - OriginID OID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } - - IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} - LoanID getLoanID() const { return LID; } - OriginID getOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS) const override { - OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() - << ")\n"; - } -}; - -class ExpireFact : public Fact { - LoanID LID; - -public: - static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } - - ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} - LoanID getLoanID() const { return LID; } - void dump(llvm::raw_ostream &OS) const override { - OS << "Expire (LoanID: " << getLoanID() << ")\n"; - } -}; - -class AssignOriginFact : public Fact { - OriginID OIDDest; - OriginID OIDSrc; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::AssignOrigin; - } - - AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) - : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } - void dump(llvm::raw_ostream &OS) const override { - OS << "AssignOrigin (DestID: " << getDestOriginID() - << ", SrcID: " << getSrcOriginID() << ")\n"; - } -}; - -class ReturnOfOriginFact : public Fact { - OriginID OID; - -public: - static bool classof(const Fact *F) { - return F->getKind() == Kind::ReturnOfOrigin; - } - - ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } - void dump(llvm::raw_ostream &OS) const override { - OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; - } -}; - -class FactManager { -public: - llvm::ArrayRef getFacts(const CFGBlock *B) const { - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) - return It->second; - return {}; - } - - void addBlockFacts(const CFGBlock *B, llvm::ArrayRef NewFacts) { - if (!NewFacts.empty()) - BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } - - template - FactType *createFact(Args &&...args) { - void *Mem = FactAllocator.Allocate(); - return new (Mem) FactType(std::forward(args)...); - } - - void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Lifetime Analysis Facts:\n"; - llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) - if (const auto *ND = dyn_cast(D)) - llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - // Print blocks in the order as they appear in code for a stable ordering. - for (const CFGBlock *B : *AC.getAnalysis()) { - llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; - auto It = BlockToFactsMap.find(B); - if (It != BlockToFactsMap.end()) { - for (const Fact *F : It->second) { - llvm::dbgs() << " "; - F->dump(llvm::dbgs()); - } - } - llvm::dbgs() << " End of Block\n"; - } - } - - LoanManager &getLoanMgr() { return LoanMgr; } - OriginManager &getOriginMgr() { return OriginMgr; } - -private: - LoanManager LoanMgr; - OriginManager OriginMgr; - llvm::DenseMap> - BlockToFactsMap; - llvm::BumpPtrAllocator FactAllocator; -}; - -class FactGenerator : public ConstStmtVisitor { - -public: - FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) - : FactMgr(FactMgr), AC(AC) {} - - void run() { - llvm::TimeTraceScope TimeProfile("FactGenerator"); - // Iterate through the CFG blocks in reverse post-order to ensure that - // initializations and destructions are processed in the correct sequence. - for (const CFGBlock *Block : *AC.getAnalysis()) { - CurrentBlockFacts.clear(); - for (unsigned I = 0; I < Block->size(); ++I) { - const CFGElement &Element = Block->Elements[I]; - if (std::optional CS = Element.getAs()) - Visit(CS->getStmt()); - else if (std::optional DtorOpt = - Element.getAs()) - handleDestructor(*DtorOpt); - } - FactMgr.addBlockFacts(Block, CurrentBlockFacts); - } - } - - void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) - if (const auto *VD = dyn_cast(D)) - if (hasOrigin(VD->getType())) - if (const Expr *InitExpr = VD->getInit()) - addAssignOriginFact(*VD, *InitExpr); - } - - void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { - /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized - /// pointers can use the same type of loan. - FactMgr.getOriginMgr().getOrCreate(*N); - } - - void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { - if (!hasOrigin(ICE->getType())) - return; - Visit(ICE->getSubExpr()); - // An ImplicitCastExpr node itself gets an origin, which flows from the - // origin of its sub-expression (after stripping its own parens/casts). - // TODO: Consider if this is actually useful in practice. Alternatively, we - // could directly use the sub-expression's OriginID instead of creating a - // new one. - addAssignOriginFact(*ICE, *ICE->getSubExpr()); - } - - void VisitUnaryOperator(const UnaryOperator *UO) { - if (UO->getOpcode() == UO_AddrOf) { - const Expr *SubExpr = UO->getSubExpr(); - if (const auto *DRE = dyn_cast(SubExpr)) { - if (const auto *VD = dyn_cast(DRE->getDecl())) { - // Check if it's a local variable. - if (VD->hasLocalStorage()) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); - AccessPath AddrOfLocalVarPath(VD); - const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, - UO->getOperatorLoc()); - CurrentBlockFacts.push_back( - FactMgr.createFact(L.ID, OID)); - } - } - } - } - } - - void VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (hasOrigin(RetExpr->getType())) { - OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); - CurrentBlockFacts.push_back( - FactMgr.createFact(OID)); - } - } - } - - void VisitBinaryOperator(const BinaryOperator *BO) { - if (BO->isAssignmentOp()) { - const Expr *LHSExpr = BO->getLHS(); - const Expr *RHSExpr = BO->getRHS(); - - // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` - // LHS must be a pointer/reference type that can be an origin. - // RHS must also represent an origin (either another pointer/ref or an - // address-of). - if (const auto *DRE_LHS = dyn_cast(LHSExpr)) - if (const auto *VD_LHS = - dyn_cast(DRE_LHS->getDecl()->getCanonicalDecl()); - VD_LHS && hasOrigin(VD_LHS->getType())) - addAssignOriginFact(*VD_LHS, *RHSExpr); - } - } - -private: - // Check if a type has an origin. - bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } - - template - void addAssignOriginFact(const Destination &D, const Source &S) { - OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); - OriginID SrcOID = FactMgr.getOriginMgr().get(S); - CurrentBlockFacts.push_back( - FactMgr.createFact(DestOID, SrcOID)); - } - - void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { - /// TODO: Also handle trivial destructors (e.g., for `int` - /// variables) which will never have a CFGAutomaticObjDtor node. - /// TODO: Handle loans to temporaries. - /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the - /// lifetime ends. - const VarDecl *DestructedVD = DtorOpt.getVarDecl(); - if (!DestructedVD) - return; - // Iterate through all loans to see if any expire. - /// TODO(opt): Do better than a linear search to find loans associated with - /// 'DestructedVD'. - for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { - const AccessPath &LoanPath = L.Path; - // Check if the loan is for a stack variable and if that variable - // is the one being destructed. - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact(L.ID)); - } - } - - FactManager &FactMgr; - AnalysisDeclContext &AC; - llvm::SmallVector CurrentBlockFacts; -}; - -// ========================================================================= // -// The Dataflow Lattice -// ========================================================================= // - -// Using LLVM's immutable collections is efficient for dataflow analysis -// as it avoids deep copies during state transitions. -// TODO(opt): Consider using a bitset to represent the set of loans. -using LoanSet = llvm::ImmutableSet; -using OriginLoanMap = llvm::ImmutableMap; - -/// An object to hold the factories for immutable collections, ensuring -/// that all created states share the same underlying memory management. -struct LifetimeFactory { - OriginLoanMap::Factory OriginMapFactory; - LoanSet::Factory LoanSetFact; - - /// Creates a singleton set containing only the given loan ID. - LoanSet createLoanSet(LoanID LID) { - return LoanSetFact.add(LoanSetFact.getEmptySet(), LID); - } -}; - -/// LifetimeLattice represents the state of our analysis at a given program -/// point. It is an immutable object, and all operations produce a new -/// instance rather than modifying the existing one. -struct LifetimeLattice { - /// The map from an origin to the set of loans it contains. - /// The lattice has a finite height: An origin's loan set is bounded by the - /// total number of loans in the function. - /// TODO(opt): To reduce the lattice size, propagate origins of declarations, - /// not expressions, because expressions are not visible across blocks. - OriginLoanMap Origins = OriginLoanMap(nullptr); - - explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} - LifetimeLattice() = default; - - bool operator==(const LifetimeLattice &Other) const { - return Origins == Other.Origins; - } - bool operator!=(const LifetimeLattice &Other) const { - return !(*this == Other); - } - - LoanSet getLoans(OriginID OID) const { - if (auto *Loans = Origins.lookup(OID)) - return *Loans; - return LoanSet(nullptr); - } - - /// Computes the union of two lattices by performing a key-wise join of - /// their OriginLoanMaps. - // TODO(opt): This key-wise join is a performance bottleneck. A more - // efficient merge could be implemented using a Patricia Trie or HAMT - // instead of the current AVL-tree-based ImmutableMap. - // TODO(opt): Keep the state small by removing origins which become dead. - LifetimeLattice join(const LifetimeLattice &Other, - LifetimeFactory &Factory) const { - /// Merge the smaller map into the larger one ensuring we iterate over the - /// smaller map. - if (Origins.getHeight() < Other.Origins.getHeight()) - return Other.join(*this, Factory); - - OriginLoanMap JoinedState = Origins; - // For each origin in the other map, union its loan set with ours. - for (const auto &Entry : Other.Origins) { - OriginID OID = Entry.first; - LoanSet OtherLoanSet = Entry.second; - JoinedState = Factory.OriginMapFactory.add( - JoinedState, OID, join(getLoans(OID), OtherLoanSet, Factory)); - } - return LifetimeLattice(JoinedState); - } - - LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { - /// Merge the smaller set into the larger one ensuring we iterate over the - /// smaller set. - if (a.getHeight() < b.getHeight()) - std::swap(a, b); - LoanSet Result = a; - for (LoanID LID : b) { - /// TODO(opt): Profiling shows that this loop is a major performance - /// bottleneck. Investigate using a BitVector to represent the set of - /// loans for improved join performance. - Result = Factory.LoanSetFact.add(Result, LID); - } - return Result; - } - - void dump(llvm::raw_ostream &OS) const { - OS << "LifetimeLattice State:\n"; - if (Origins.isEmpty()) - OS << " \n"; - for (const auto &Entry : Origins) { - if (Entry.second.isEmpty()) - OS << " Origin " << Entry.first << " contains no loans\n"; - for (const LoanID &LID : Entry.second) - OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; - } - } -}; - -// ========================================================================= // -// The Transfer Function -// ========================================================================= // -class Transferer { - FactManager &AllFacts; - LifetimeFactory &Factory; - -public: - explicit Transferer(FactManager &F, LifetimeFactory &Factory) - : AllFacts(F), Factory(Factory) {} - - /// Computes the exit state of a block by applying all its facts sequentially - /// to a given entry state. - /// TODO: We might need to store intermediate states per-fact in the block for - /// later analysis. - LifetimeLattice transferBlock(const CFGBlock *Block, - LifetimeLattice EntryState) { - LifetimeLattice BlockState = EntryState; - llvm::ArrayRef Facts = AllFacts.getFacts(Block); - - for (const Fact *F : Facts) { - BlockState = transferFact(BlockState, F); - } - return BlockState; - } - -private: - LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) { - switch (F->getKind()) { - case Fact::Kind::Issue: - return transfer(In, *F->getAs()); - case Fact::Kind::AssignOrigin: - return transfer(In, *F->getAs()); - // Expire and ReturnOfOrigin facts don't modify the Origins and the State. - case Fact::Kind::Expire: - case Fact::Kind::ReturnOfOrigin: - return In; - } - llvm_unreachable("Unknown fact kind"); - } - - /// A new loan is issued to the origin. Old loans are erased. - LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) { - OriginID OID = F.getOriginID(); - LoanID LID = F.getLoanID(); - return LifetimeLattice(Factory.OriginMapFactory.add( - In.Origins, OID, Factory.createLoanSet(LID))); - } - - /// The destination origin's loan set is replaced by the source's. - /// This implicitly "resets" the old loans of the destination. - LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) { - OriginID DestOID = F.getDestOriginID(); - OriginID SrcOID = F.getSrcOriginID(); - LoanSet SrcLoans = InState.getLoans(SrcOID); - return LifetimeLattice( - Factory.OriginMapFactory.add(InState.Origins, DestOID, SrcLoans)); - } -}; - -// ========================================================================= // -// Dataflow analysis -// ========================================================================= // - -/// Drives the intra-procedural dataflow analysis. -/// -/// Orchestrates the analysis by iterating over the CFG using a worklist -/// algorithm. It computes a fixed point by propagating the LifetimeLattice -/// state through each block until the state no longer changes. -/// TODO: Maybe use the dataflow framework! The framework might need changes -/// to support the current comparison done at block-entry. -class LifetimeDataflow { - const CFG &Cfg; - AnalysisDeclContext &AC; - LifetimeFactory LifetimeFact; - - Transferer Xfer; - - /// Stores the merged analysis state at the entry of each CFG block. - llvm::DenseMap BlockEntryStates; - /// Stores the analysis state at the exit of each CFG block, after the - /// transfer function has been applied. - llvm::DenseMap BlockExitStates; - -public: - LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) - : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {} - - void run() { - llvm::TimeTraceScope TimeProfile("Lifetime Dataflow"); - ForwardDataflowWorklist Worklist(Cfg, AC); - const CFGBlock *Entry = &Cfg.getEntry(); - BlockEntryStates[Entry] = LifetimeLattice{}; - Worklist.enqueueBlock(Entry); - while (const CFGBlock *B = Worklist.dequeue()) { - LifetimeLattice EntryState = getEntryState(B); - LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState); - BlockExitStates[B] = ExitState; - - for (const CFGBlock *Successor : B->succs()) { - auto SuccIt = BlockEntryStates.find(Successor); - LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) - ? SuccIt->second - : LifetimeLattice{}; - LifetimeLattice NewSuccEntryState = - OldSuccEntryState.join(ExitState, LifetimeFact); - // Enqueue the successor if its entry state has changed. - // TODO(opt): Consider changing 'join' to report a change if != - // comparison is found expensive. - if (SuccIt == BlockEntryStates.end() || - NewSuccEntryState != OldSuccEntryState) { - BlockEntryStates[Successor] = NewSuccEntryState; - Worklist.enqueueBlock(Successor); - } - } - } - } - - void dump() const { - llvm::dbgs() << "==========================================\n"; - llvm::dbgs() << " Dataflow results:\n"; - llvm::dbgs() << "==========================================\n"; - const CFGBlock &B = Cfg.getExit(); - getExitState(&B).dump(llvm::dbgs()); - } - - LifetimeLattice getEntryState(const CFGBlock *B) const { - return BlockEntryStates.lookup(B); - } - - LifetimeLattice getExitState(const CFGBlock *B) const { - return BlockExitStates.lookup(B); - } -}; - -// ========================================================================= // -// TODO: Analysing dataflow results and error reporting. -// ========================================================================= // -} // anonymous namespace - -void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC) { - llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); - DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), - /*ShowColors=*/true)); - FactManager FactMgr; - FactGenerator FactGen(FactMgr, AC); - FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); - - /// TODO(opt): Consider optimizing individual blocks before running the - /// dataflow analysis. - /// 1. Expression Origins: These are assigned once and read at most once, - /// forming simple chains. These chains can be compressed into a single - /// assignment. - /// 2. Block-Local Loans: Origins of expressions are never read by other - /// blocks; only Decls are visible. Therefore, loans in a block that - /// never reach an Origin associated with a Decl can be safely dropped by - /// the analysis. - LifetimeDataflow Dataflow(Cfg, FactMgr, AC); - Dataflow.run(); - DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); -} -} // namespace clang diff --git a/clang/lib/Analysis/LifetimeSafety.md b/clang/lib/Analysis/LifetimeSafety.md new file mode 100644 index 0000000000000..3f3d03d729b46 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.md @@ -0,0 +1,230 @@ +Excellent! This is a very strong and logical structure for the white paper. It follows a clear narrative, starting from the high-level problem and progressively diving into the specifics of your solution. The sections on why a traditional borrow checker doesn't fit C++ and the open questions are particularly good, as they show a deep engagement with the problem space. + +Here is a draft of the white paper following your new skeleton, with the details filled in based on my analysis of your implementation and the provided reference documents. I've also incorporated some of my own suggestions to enhance the flow and clarity. + +*** + + + +# Lifetime Safety: An Intuitive Approach for Temporal Safety in C++ +**Author:** +[Utkarsh Saxena](mailto:usx@google.com) + +**Purpose:** This document serves as a live RFC for a new lifetime safety analysis in C++, with the ultimate goal of publication as a white paper. + +## Intended Audience + +This document is intended for C++ compiler developers (especially those working on Clang), developers of other systems languages with advanced memory safety models (like Rust and Carbon), and all C++ users interested in writing safer code. + +## Goal + +* To describe a new lifetime model for C++ that aims to maximize the compile-time detection of temporal memory safety issues. +* To explore a path toward incremental safety in C++, offering a spectrum of checks that can be adopted without requiring a full plunge into a restrictive ownership model. + +**Out of Scope** + +* **Rigorous Temporal Memory Safety:** This analysis aims to detect a large class of common errors, but it does not formally prove the absence of all temporal safety bugs. +* **Runtime Solutions:** This paper focuses exclusively on static, compile-time analysis and does not cover runtime solutions like MTE or AddressSanitizer. + +# Paper: C++ Lifetimes Safety Analysis + +**Subtitle: A Flow-Sensitive, Alias-based Approach to Preventing Dangling Pointers** + +## Abstract + +This paper introduces a new intra-procedural, flow-sensitive lifetime analysis for C++ implemented in Clang. The analysis is designed to detect a significant class of temporal memory safety violations, such as use-after-free and use-after-return, at compile time. It is based on a model of "Loans" and "Origins," inspired by the Polonius borrow checker in Rust, but adapted for the semantics and flexibility of C++. + +The analysis works by translating the Clang CFG into a series of lifetime-relevant "Facts." These facts are then processed by dataflow analyses to precisely determine the validity of pointers and references at each program point. This fact-based approach, combined with a configurable strictness model, allows for both high-confidence error reporting and the detection of more subtle, potential bugs, without requiring extensive new annotations. The ultimate goal is to provide a powerful, low-overhead tool that makes C++ safer by default. + +## The Anatomy of a Temporal Safety Error + +At its core, a temporal safety error is a bug where an operation is performed on an object at a time when it is no longer valid to do so ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=at+its+core+a+temporal+safety+error+is+a+bug+where+an+operation+is+performed+on+an+object+at+a+time+when+it+is+no+longer+valid+to+do+so)). These bugs are notoriously difficult to debug because they often manifest as unpredictable crashes or silent data corruption far from the root cause. However, we can argue that this wide and varied class of errors—from use-after-free to iterator invalidation—all stem from a single, canonical pattern. + +**Conjecture: Any temporal safety issue is a form of Use-After-Free.** + +All sub-categories of temporal safety issues, such as returning a reference to a stack variable (`return-stack-addr`), using a variable after its scope has ended (`use-after-scope`), using heap memory after it has been deleted (`heap-use-after-free`), or using an iterator after its container has been modified (`use-after-invalidation`), can be described by a single sequence of events. + +In C++, an *object* is a region of storage, and pointers and references are the mechanisms we use to refer to them. A use-after-free occurs when we access an object after its lifetime has ended. But how can an object be accessed after it has been destroyed? This is only possible through an **alias**—a pointer or reference—that was created while the object was alive and that survived the object's destruction. + +This insight allows us to define a canonical use-after-free with four distinct events that happen in a specific order: + +1. **`t0`: Creation.** An object `M` is created in some region of storage (on the stack, on the heap, etc.). +2. **`t1`: Alias Creation.** An alias `P` (a pointer or reference) is created that refers to the object `M`. +3. **`t2`: End of Lifetime.** The lifetime of object `M` ends (e.g., it is deallocated, or it goes out of scope). +4. **`t3`: Use of Alias.** The alias `P`, which now dangles, is used to access the memory where `M` once resided. + +Let's examine this with a simple piece of C++ code: + +```cpp +void use_after_scope_example() { + int* p; + { + int s = 10; // t0: Object `s` is created on the stack. + p = &s; // t1: Alias `p` is made to refer to object `s`. + } // t2: The lifetime of `s` ends. `p` now dangles. + *p = 42; // t3: The dangling alias `p` is used. This is a use-after-free. +} +``` + +The fundamental problem is that the alias `p` outlived the object `s` it referred to. The challenge for a static analysis is therefore clear: to prevent temporal safety errors, the compiler must be able to track aliases and understand the lifetime of the objects they refer to. It needs to know the "points-to" set for every alias at every point in the program and verify that, at the moment of use, the alias does not point to an object whose lifetime has ended. + +This alias-based perspective is powerful because it generalizes beautifully. The "end of lifetime" event at `t2` doesn't have to be a variable going out of scope. It could be: + +* A call to `delete`, which ends the lifetime of a heap object. +* A function `return`, which ends the lifetime of all its local variables. +* A container modification, like `std::vector::push_back()`, which may reallocate storage, ending the lifetime of the objects in the old buffer and invalidating all existing iterators (aliases). + +By focusing on tracking aliases and their validity, we can build a unified model to detect a wide range of temporal safety errors without imposing the heavy "aliasing XOR mutability" restrictions of a traditional borrow checker ([source](https://gist.github.com/nmsmith/cdaa94aa74e8e0611221e65db8e41f7b?content_ref=the+major+advancement+is+to+eliminate+the+aliasing+xor+mutability+restriction+amongst+references+and+replace+it+with+a+similar+restriction+applied+to+lifetime+parameters)). This provides a more intuitive and C++-idiomatic path to memory safety. + +## Relation with Thread safety + +This analysis does not address Thread Safety. Thread safety is concerned with data races that occur across multiple threads. While it is possible to create temporal safety issues in multi-threaded scenarios, this analysis is focused on the sequential lifetime of objects within a single function. + +## Quest for Safer Aliasing + +Is it possible to achieve memory safety without a restrictive model like Rust's borrow checker? We believe the answer is yes. The key is to shift our focus from *restricting aliases* to *understanding them*. Instead of forbidding programs that have aliased mutable pointers, we can build a model that understands what each pointer can point to at any given time. This approach, similar to the one proposed in P1179 for C++ and explored in modern lifetime systems like Mojo's, allows us to directly detect the root cause of the problem: using a pointer after its target has ceased to exist ([source](http://docs.google.com/document/d/19vbfAiV1yQu3xSMRWjyPUdzyB_LDdVUcKat_HWI1l3g?content_ref=this+approach+similar+to+the+one+proposed+in+p1179+for+c+and+explored+in+modern+lifetime+systems+like+mojo+s+allows+us+to+directly+detect+the+root+cause+of+the+problem+using+a+pointer+after+its+target+has+ceased+to+exist)). + +This paper proposes such a model for C++. Let's begin with a simple, yet illustrative, dangling pointer bug: + +```cpp +// Example 1: A simple use-after-free +void definite_simple_case() { + MyObj* p; + { + MyObj s; + p = &s; // 'p' now points to 's' + } // 's' is destroyed, 'p' is now dangling + (void)*p; // Use-after-free +} +``` + +How can a compiler understand that the use of `p` is an error? It needs to answer a series of questions: + +1. What does `p` point to? +2. When does the object `p` points to cease to be valid? +3. Is `p` used after that point? + +Our model is designed to answer precisely these questions. + +## Core Concepts + +Our model is built on a few core concepts that allow us to formally track the relationships between pointers and the data they point to. + +### Access Paths + +An **Access Path** is a symbolic representation of a storage location in the program ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=struct+accesspath+const+clang+valuedecl+d+accesspath+const+clang+valuedecl+d+d+d)). It provides a way to uniquely identify a variable or a sub-object. For now, we will consider simple paths that refer to top-level variables, but the model can be extended to include field accesses (`a.b`), array elements (`a[i]`), and pointer indirections (`p->field`). + +### Loans: The Act of Borrowing + +A **Loan** is created whenever a reference or pointer to an object is created. It represents the act of "borrowing" that object's storage location ([source](https://raw.githubusercontent.com/llvm/llvm-project/0e7c1732a9a7d28549fe5d690083daeb0e5de6b2/clang/lib/Analysis/LifetimeSafety.cpp?content_ref=information+about+a+single+borrow+or+loan+a+loan+is+created+when+a+reference+or+pointer+is+created)). Each loan is associated with a unique ID and the `AccessPath` of the object being borrowed. + +In our `definite_simple_case` example, the expression `&s` creates a loan. The `AccessPath` for this loan is the variable `s`. + +### Origins: The Provenance of a Pointer + +An **Origin** is a symbolic identifier that represents the *set of possible loans* a pointer-like object could hold at any given time ([source](http://docs.google.com/document/d/1JpJ3M9yeXX-BnC4oKXBvRWzxoFrwziN1RzI4DrMrSp8?content_ref=ime+is+a+symbolic+identifier+representing+a+set+of+loans+from+which+a+pointer+or+reference+could+have+originated)). Every pointer-like variable or expression in the program is associated with an origin. + +* A variable declaration like `MyObj* p` introduces an origin for `p`. +* An expression like `&s` also has an origin. +* The complexity of origins can grow with type complexity. For example: + * `int* p;` has a single origin. + * `int** p;` has two origins: one for the outer pointer and one for the inner pointer. This allows us to distinguish between `p` itself being modified and what `*p` points to being modified. + * `struct S { int* p; };` also has an origin associated with the member `p`. + +The central goal of our analysis is to determine, for each origin at each point in the program, which loans it might contain. + +## Subtyping Rules and Subset Constraints + +The relationships between origins are established through the program's semantics, particularly assignments. When a pointer is assigned to another, as in `p = q`, the set of loans that `q` holds must be a subset of the loans that `p` can now hold. This is a fundamental subtyping rule: for `T*'a` to be a subtype of `T*'b`, the set of loans represented by `'a` must be a subset of the loans represented by `'b`. + +This leads to the concept of **subset constraints**. An assignment `p = q` generates a constraint `Origin(q) ⊆ Origin(p)`. The analysis doesn't solve these as a global system of equations. Instead, as we will see, it propagates the *consequences* of these constraints—the loans themselves—through the control-flow graph. This is a key departure from the Polonius model, which focuses on propagating the constraints (`'a: 'b`) themselves. + +## Invalidations: When Loans Expire + +A loan expires when the object it refers to is no longer valid. In our model, this is an **invalidation** event. The most common invalidation is deallocation, which in C++ can mean: +* A stack variable going out of scope. +* A `delete` call on a heap-allocated object. +* A container modification that reallocates its internal storage. + +## An Event-Based Representation of the Function + +To analyze a function, we first transform its CFG into a sequence of atomic, lifetime-relevant **Events**, which we call **Facts**. These facts abstract away the complexities of C++ syntax and provide a clean input for our analysis. The main facts are: + +* `Issue(LoanID, OriginID)`: A new loan is created. For example, `&s` generates an `Issue` fact. +* `Expire(LoanID)`: A loan expires. This is generated at the end of a variable's scope. +* `OriginFlow(Dest, Src, Kill)`: Loans from a source origin flow to a destination origin, as in an assignment. `Kill` indicates whether the destination's old loans are cleared. +* `Use(OriginID)`: An origin is used, such as in a pointer dereference. + +Let's trace our `definite_simple_case` example with these facts: + +```cpp +void definite_simple_case() { + MyObj* p; // Origin for p is O_p + { + MyObj s; + // The expression `&s` generates: + // - IssueFact(L1, O_&s) (A new loan L1 on 's' is created) + // The assignment `p = &s` generates: + // - OriginFlowFact(O_p, O_&s, Kill=true) + p = &s; + } // The end of the scope for 's' generates: + // - ExpireFact(L1) + // The dereference `*p` generates: + // - UseFact(O_p) + (void)*p; +} +``` + +## Flow-Sensitive Lifetime Policy + +With the program represented as a stream of facts, we can now define a flow-sensitive policy to answer our three core questions. We do this by maintaining a map from `Origin` to `Set` at each program point. This map represents the state of our analysis. + +The analysis proceeds as follows: +1. **Forward Propagation of Loans:** We perform a forward dataflow analysis. + * When we encounter an `Issue` fact, we add the new loan to its origin's loan set. + * When we see an `OriginFlow` fact, we update the destination origin's loan set with the loans from the source. + * At control-flow merge points, we take the *union* of the loan sets from all incoming branches. + +2. **Backward Propagation of Liveness:** We then perform a backward dataflow analysis, starting from `Use` facts. + * A `Use` of an origin marks it as "live." + * This liveness information is propagated backward. If an origin `O_p` is live, and it received its loans from `O_q`, then `O_q` is also considered live at that point. + +3. **Error Detection:** An error is flagged when the analysis determines that a **live** origin contains a loan that has **expired**. + +In our `definite_simple_case` example: +* The forward analysis determines that at the point of use, `Origin(p)` contains `Loan(s)`. +* The backward analysis determines that at the point where `s` is destroyed, `Origin(p)` is live. +* The `ExpireFact` for `Loan(s)` occurs before the `UseFact`. +* The combination of these three conditions triggers a use-after-free error. + +## Without Functions, Our Work is Done Here! + +The model described so far works perfectly for a single, monolithic function. However, the moment we introduce function calls, the problem becomes more complex. How do we reason about lifetimes across function boundaries, especially when we can't see the implementation of the called function? + +### Effects of a Function Call + +A function call has inputs and outputs. From a lifetime perspective, the key challenge is to understand how the lifetimes of the outputs relate to the lifetimes of the inputs. + +### Outlives Constraints and Placeholder Origins + +When analyzing a function like `const char* get_prefix(const string& s, int len)`, we don't know the specific lifetime of the `s` that will be passed by the caller. To handle this, we introduce **placeholder origins** for the input parameters. These placeholders act as variables in our analysis. + +If a function returns a pointer or reference, its lifetime must be tied to one of its inputs. This is an **outlives constraint**. For example, the return value of `get_prefix` must "outlive" the input `s`. In our model, this means the origin of the return value will contain the placeholder loan associated with `s`. + +### Opaque Functions + +What if a function's implementation is not visible (e.g., it's in a separate translation unit), and it has no lifetime annotations? In this case, we must be conservative. If we pass a pointer to an opaque function, we have to assume it might have been invalidated. Our model handles this by associating a special **OPAQUE loan** with the pointer after the call, signifying that its lifetime is now unknown. + +## Why a Borrow Checker is Not the Right Fit for C++ + +The "aliasing XOR mutability" rule, while powerful, is fundamentally at odds with many idiomatic C++ patterns. +* **Observer Patterns:** It's common to have multiple non-owning pointers observing a mutable object. +* **Intrusive Data Structures:** Data structures like intrusive linked lists require objects to hold pointers to one another, creating cycles that are difficult for a traditional borrow checker to handle. +* **Iterator Invalidation:** The core problem in C++ is often not aliasing itself, but the fact that a mutation can invalidate an alias (e.g., resizing a vector). An alias-based analysis, like the one proposed here, directly models this problem, whereas a borrow checker can feel like an indirect and overly restrictive solution. + +By focusing on tracking what pointers can point to, our model avoids rejecting these safe and useful patterns, making it a more natural fit for the existing C++ ecosystem. + +## Open Questions + +* **When and if to introduce the term "lifetime"?** The term "lifetime" is heavily associated with Rust's model. This paper has intentionally focused on "Origins" and "Loans" to avoid confusion. Is there a point where introducing "lifetime" would be helpful, or should we stick to the new terminology? +* **Syntax for Annotations:** While this model is designed to work with minimal annotations, some will be necessary for complex cases. What should the syntax for these annotations look like? Can we build on existing attributes like `[[clang::lifetimebound]]`? diff --git a/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0000000000000..5874e8405baf6 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,17 @@ +add_clang_library(clangAnalysisLifetimeSafety + Checker.cpp + Facts.cpp + FactsGenerator.cpp + LifetimeAnnotations.cpp + LifetimeSafety.cpp + LiveOrigins.cpp + Loans.cpp + LoanPropagation.cpp + Origins.cpp + + LINK_LIBS + clangAST + clangAnalysis + clangBasic + ) + diff --git a/clang/lib/Analysis/LifetimeSafety/Checker.cpp b/clang/lib/Analysis/LifetimeSafety/Checker.cpp new file mode 100644 index 0000000000000..74792768e2c57 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Checker.cpp @@ -0,0 +1,174 @@ +//===- Checker.cpp - C++ Lifetime Safety Checker ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the LifetimeChecker, which detects use-after-free +// errors by checking if live origins hold loans that have expired. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { + +static Confidence livenessKindToConfidence(LivenessKind K) { + switch (K) { + case LivenessKind::Must: + return Confidence::Definite; + case LivenessKind::Maybe: + return Confidence::Maybe; + case LivenessKind::Dead: + return Confidence::None; + } + llvm_unreachable("unknown liveness kind"); +} + +namespace { + +/// Struct to store the complete context for a potential lifetime violation. +struct PendingWarning { + SourceLocation ExpiryLoc; // Where the loan expired. + llvm::PointerUnion CausingFact; + Confidence ConfidenceLevel; +}; + +class LifetimeChecker { +private: + llvm::DenseMap FinalWarningsMap; + llvm::DenseMap AnnotationWarningsMap; + const LoanPropagationAnalysis &LoanPropagation; + const LiveOriginsAnalysis &LiveOrigins; + const FactManager &FactMgr; + LifetimeSafetyReporter *Reporter; + +public: + LifetimeChecker(const LoanPropagationAnalysis &LoanPropagation, + const LiveOriginsAnalysis &LiveOrigins, const FactManager &FM, + AnalysisDeclContext &ADC, LifetimeSafetyReporter *Reporter) + : LoanPropagation(LoanPropagation), LiveOrigins(LiveOrigins), FactMgr(FM), + Reporter(Reporter) { + for (const CFGBlock *B : *ADC.getAnalysis()) + for (const Fact *F : FactMgr.getFacts(B)) + if (const auto *EF = F->getAs()) + checkExpiry(EF); + else if (const auto *OEF = F->getAs()) + checkAnnotations(OEF); + issuePendingWarnings(); + suggestAnnotations(); + } + + /// Checks if an escaping origin holds a placeholder loan, indicating a + /// missing [[clang::lifetimebound]] annotation. + void checkAnnotations(const OriginEscapesFact *OEF) { + OriginID EscapedOID = OEF->getEscapedOriginID(); + LoanSet EscapedLoans = LoanPropagation.getLoans(EscapedOID, OEF); + for (LoanID LID : EscapedLoans) { + const Loan *L = FactMgr.getLoanMgr().getLoan(LID); + if (const auto *PL = dyn_cast(L)) { + const ParmVarDecl *PVD = PL->getParmVarDecl(); + if (PVD->hasAttr()) + continue; + AnnotationWarningsMap.try_emplace(PVD, OEF->getEscapeExpr()); + } + } + } + + /// Checks for use-after-free & use-after-return errors when a loan expires. + /// + /// This method examines all live origins at the expiry point and determines + /// if any of them hold the expiring loan. If so, it creates a pending + /// warning with the appropriate confidence level based on the liveness + /// information. The confidence reflects whether the origin is definitely + /// or maybe live at this point. + /// + /// Note: This implementation considers only the confidence of origin + /// liveness. Future enhancements could also consider the confidence of loan + /// propagation (e.g., a loan may only be held on some execution paths). + void checkExpiry(const ExpireFact *EF) { + LoanID ExpiredLoan = EF->getLoanID(); + LivenessMap Origins = LiveOrigins.getLiveOriginsAt(EF); + Confidence CurConfidence = Confidence::None; + // The UseFact or OriginEscapesFact most indicative of a lifetime error, + // prioritized by earlier source location. + llvm::PointerUnion + BestCausingFact = nullptr; + + for (auto &[OID, LiveInfo] : Origins) { + LoanSet HeldLoans = LoanPropagation.getLoans(OID, EF); + if (!HeldLoans.contains(ExpiredLoan)) + continue; + // Loan is defaulted. + Confidence NewConfidence = livenessKindToConfidence(LiveInfo.Kind); + if (CurConfidence < NewConfidence) { + CurConfidence = NewConfidence; + BestCausingFact = LiveInfo.CausingFact; + } + } + if (!BestCausingFact) + return; + // We have a use-after-free. + Confidence LastConf = FinalWarningsMap.lookup(ExpiredLoan).ConfidenceLevel; + if (LastConf >= CurConfidence) + return; + FinalWarningsMap[ExpiredLoan] = {/*ExpiryLoc=*/EF->getExpiryLoc(), + /*BestCausingFact=*/BestCausingFact, + /*ConfidenceLevel=*/CurConfidence}; + } + + void issuePendingWarnings() { + if (!Reporter) + return; + for (const auto &[LID, Warning] : FinalWarningsMap) { + const Loan *L = FactMgr.getLoanMgr().getLoan(LID); + const auto *BL = cast(L); + const Expr *IssueExpr = BL->getIssueExpr(); + llvm::PointerUnion + CausingFact = Warning.CausingFact; + Confidence Confidence = Warning.ConfidenceLevel; + SourceLocation ExpiryLoc = Warning.ExpiryLoc; + + if (const auto *UF = CausingFact.dyn_cast()) + Reporter->reportUseAfterFree(IssueExpr, UF->getUseExpr(), ExpiryLoc, + Confidence); + else if (const auto *OEF = + CausingFact.dyn_cast()) + Reporter->reportUseAfterReturn(IssueExpr, OEF->getEscapeExpr(), + ExpiryLoc, Confidence); + else + llvm_unreachable("Unhandled CausingFact type"); + } + } + + void suggestAnnotations() { + if (!Reporter) + return; + for (const auto &[PVD, EscapeExpr] : AnnotationWarningsMap) + Reporter->suggestAnnotation(PVD, EscapeExpr); + } +}; +} // namespace + +void runLifetimeChecker(const LoanPropagationAnalysis &LP, + const LiveOriginsAnalysis &LO, + const FactManager &FactMgr, AnalysisDeclContext &ADC, + LifetimeSafetyReporter *Reporter) { + llvm::TimeTraceScope TimeProfile("LifetimeChecker"); + LifetimeChecker Checker(LP, LO, FactMgr, ADC, Reporter); +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Dataflow.h b/clang/lib/Analysis/LifetimeSafety/Dataflow.h new file mode 100644 index 0000000000000..05c20d6385368 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Dataflow.h @@ -0,0 +1,192 @@ +//===- Dataflow.h - Generic Dataflow Analysis Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a generic, policy-based driver for dataflow analyses. +// It provides a flexible framework that combines the dataflow runner and +// transfer functions, allowing derived classes to implement specific analyses +// by defining their lattice, join, and transfer functions. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes::internal { + +enum class Direction { Forward, Backward }; + +/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific +/// `Fact`. identified by a lifetime-related event (`Fact`). +/// +/// A `ProgramPoint` has "after" semantics: it represents the location +/// immediately after its corresponding `Fact`. +using ProgramPoint = const Fact *; + +/// A generic, policy-based driver for dataflow analyses. It combines +/// the dataflow runner and the transferer logic into a single class hierarchy. +/// +/// The derived class is expected to provide: +/// - A `Lattice` type. +/// - `StringRef getAnalysisName() const` +/// - `Lattice getInitialState();` The initial state of the analysis. +/// - `Lattice join(Lattice, Lattice);` Merges states from multiple CFG paths. +/// - `Lattice transfer(Lattice, const FactType&);` Defines how a single +/// lifetime-relevant `Fact` transforms the lattice state. Only overloads +/// for facts relevant to the analysis need to be implemented. +/// +/// \tparam Derived The CRTP derived class that implements the specific +/// analysis. +/// \tparam LatticeType The dataflow lattice used by the analysis. +/// \tparam Dir The direction of the analysis (Forward or Backward). +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +template +class DataflowAnalysis { +public: + using Lattice = LatticeType; + using Base = DataflowAnalysis; + +private: + const CFG &Cfg; + AnalysisDeclContext &AC; + + /// The dataflow state before a basic block is processed. + llvm::DenseMap InStates; + /// The dataflow state after a basic block is processed. + llvm::DenseMap OutStates; + /// Dataflow state at each program point, indexed by Fact ID. + /// In a forward analysis, this is the state after the Fact at that point has + /// been applied, while in a backward analysis, it is the state before. + llvm::SmallVector PointToState; + + static constexpr bool isForward() { return Dir == Direction::Forward; } + +protected: + FactManager &FactMgr; + + explicit DataflowAnalysis(const CFG &Cfg, AnalysisDeclContext &AC, + FactManager &FactMgr) + : Cfg(Cfg), AC(AC), FactMgr(FactMgr) {} + +public: + void run() { + Derived &D = static_cast(*this); + llvm::TimeTraceScope Time(D.getAnalysisName()); + + PointToState.resize(FactMgr.getNumFacts()); + + using Worklist = + std::conditional_t; + Worklist W(Cfg, AC); + + const CFGBlock *Start = isForward() ? &Cfg.getEntry() : &Cfg.getExit(); + InStates[Start] = D.getInitialState(); + W.enqueueBlock(Start); + + while (const CFGBlock *B = W.dequeue()) { + Lattice StateIn = *getInState(B); + Lattice StateOut = transferBlock(B, StateIn); + OutStates[B] = StateOut; + for (const CFGBlock *AdjacentB : isForward() ? B->succs() : B->preds()) { + if (!AdjacentB) + continue; + std::optional OldInState = getInState(AdjacentB); + Lattice NewInState = + !OldInState ? StateOut : D.join(*OldInState, StateOut); + // Enqueue the adjacent block if its in-state has changed or if we have + // never seen it. + if (!OldInState || NewInState != *OldInState) { + InStates[AdjacentB] = NewInState; + W.enqueueBlock(AdjacentB); + } + } + } + } + +protected: + Lattice getState(ProgramPoint P) const { + return PointToState[P->getID().Value]; + } + + std::optional getInState(const CFGBlock *B) const { + auto It = InStates.find(B); + if (It == InStates.end()) + return std::nullopt; + return It->second; + } + + Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); } + + void dump() const { + const Derived *D = static_cast(this); + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << D->getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = isForward() ? Cfg.getExit() : Cfg.getEntry(); + getOutState(&B).dump(llvm::dbgs()); + } + +private: + /// Computes the state at one end of a block by applying all its facts + /// sequentially to a given state from the other end. + Lattice transferBlock(const CFGBlock *Block, Lattice State) { + auto Facts = FactMgr.getFacts(Block); + if constexpr (isForward()) { + for (const Fact *F : Facts) { + State = transferFact(State, F); + PointToState[F->getID().Value] = State; + } + } else { + for (const Fact *F : llvm::reverse(Facts)) { + // In backward analysis, capture the state before applying the fact. + PointToState[F->getID().Value] = State; + State = transferFact(State, F); + } + } + return State; + } + + Lattice transferFact(Lattice In, const Fact *F) { + assert(F); + Derived *D = static_cast(this); + switch (F->getKind()) { + case Fact::Kind::Issue: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Expire: + return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginFlow: + return D->transfer(In, *F->getAs()); + case Fact::Kind::OriginEscapes: + return D->transfer(In, *F->getAs()); + case Fact::Kind::Use: + return D->transfer(In, *F->getAs()); + case Fact::Kind::TestPoint: + return D->transfer(In, *F->getAs()); + } + llvm_unreachable("Unknown fact kind"); + } + +public: + Lattice transfer(Lattice In, const IssueFact &) { return In; } + Lattice transfer(Lattice In, const ExpireFact &) { return In; } + Lattice transfer(Lattice In, const OriginFlowFact &) { return In; } + Lattice transfer(Lattice In, const OriginEscapesFact &) { return In; } + Lattice transfer(Lattice In, const UseFact &) { return In; } + Lattice transfer(Lattice In, const TestPointFact &) { return In; } +}; +} // namespace clang::lifetimes::internal +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_DATAFLOW_H diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp new file mode 100644 index 0000000000000..68317318ff4e2 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -0,0 +1,108 @@ +//===- Facts.cpp - Lifetime Analysis Facts Implementation -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/AST/Decl.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" + +namespace clang::lifetimes::internal { + +void Fact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "Fact (Kind: " << static_cast(K) << ")\n"; +} + +void IssueFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &OM) const { + OS << "Issue ("; + LM.getLoan(getLoanID())->dump(OS); + OS << ", ToOrigin: "; + OM.dump(getOriginID(), OS); + OS << ")\n"; +} + +void ExpireFact::dump(llvm::raw_ostream &OS, const LoanManager &LM, + const OriginManager &) const { + OS << "Expire ("; + LM.getLoan(getLoanID())->dump(OS); + OS << ")\n"; +} + +void OriginFlowFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginFlow (Dest: "; + OM.dump(getDestOriginID(), OS); + OS << ", Src: "; + OM.dump(getSrcOriginID(), OS); + OS << (getKillDest() ? "" : ", Merge"); + OS << ")\n"; +} + +void OriginEscapesFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "OriginEscapes ("; + OM.dump(getEscapedOriginID(), OS); + OS << ")\n"; +} + +void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &OM) const { + OS << "Use ("; + OM.dump(getUsedOrigin(), OS); + OS << ", " << (isWritten() ? "Write" : "Read") << ")\n"; +} + +void TestPointFact::dump(llvm::raw_ostream &OS, const LoanManager &, + const OriginManager &) const { + OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n"; +} + +llvm::StringMap FactManager::getTestPoints() const { + llvm::StringMap AnnotationToPointMap; + for (const auto &BlockFacts : BlockToFacts) { + for (const Fact *F : BlockFacts) { + if (const auto *TPF = F->getAs()) { + StringRef PointName = TPF->getAnnotation(); + assert(!AnnotationToPointMap.contains(PointName) && + "more than one test points with the same name"); + AnnotationToPointMap[PointName] = F; + } + } + } + return AnnotationToPointMap; +} + +void FactManager::dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) + if (const auto *ND = dyn_cast(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + for (const Fact *F : getFacts(B)) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs(), LoanMgr, OriginMgr); + } + llvm::dbgs() << " End of Block\n"; + } +} + +llvm::ArrayRef +FactManager::getBlockContaining(ProgramPoint P) const { + for (const auto &BlockToFactsVec : BlockToFacts) { + for (const Fact *F : BlockToFactsVec) + if (F == P) + return BlockToFactsVec; + } + return {}; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp new file mode 100644 index 0000000000000..2f270b03996f2 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -0,0 +1,372 @@ +//===- FactsGenerator.cpp - Lifetime Facts Generation -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/TimeProfiler.h" + +namespace clang::lifetimes::internal { +using llvm::isa_and_present; + +static bool isPointerType(QualType QT) { + return QT->isPointerOrReferenceType() || isGslPointerType(QT); +} +// Check if a type has an origin. +static bool hasOrigin(const Expr *E) { + return E->isGLValue() || isPointerType(E->getType()); +} + +static bool hasOrigin(const VarDecl *VD) { + return isPointerType(VD->getType()); +} + +/// Creates a loan for the storage path of a given declaration reference. +/// This function should be called whenever a DeclRefExpr represents a borrow. +/// \param DRE The declaration reference expression that initiates the borrow. +/// \return The new Loan on success, nullptr otherwise. +static const PathLoan *createLoan(FactManager &FactMgr, + const DeclRefExpr *DRE) { + if (const auto *VD = dyn_cast(DRE->getDecl())) { + AccessPath Path(VD); + // The loan is created at the location of the DeclRefExpr. + return FactMgr.getLoanMgr().createLoan(Path, DRE); + } + return nullptr; +} + +void FactsGenerator::run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + const CFG &Cfg = *AC.getCFG(); + llvm::SmallVector PlaceholderLoanFacts = issuePlaceholderLoans(); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis()) { + CurrentBlockFacts.clear(); + EscapesInCurrentBlock.clear(); + if (Block == &Cfg.getEntry()) + CurrentBlockFacts.append(PlaceholderLoanFacts.begin(), + PlaceholderLoanFacts.end()); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional CS = Element.getAs()) + Visit(CS->getStmt()); + else if (std::optional LifetimeEnds = + Element.getAs()) + handleLifetimeEnds(*LifetimeEnds); + } + CurrentBlockFacts.append(EscapesInCurrentBlock.begin(), + EscapesInCurrentBlock.end()); + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } +} + +void FactsGenerator::VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast(D)) + if (hasOrigin(VD)) + if (const Expr *InitExpr = VD->getInit()) + killAndFlowOrigin(*VD, *InitExpr); +} + +void FactsGenerator::VisitDeclRefExpr(const DeclRefExpr *DRE) { + handleUse(DRE); + // For non-pointer/non-view types, a reference to the variable's storage + // is a borrow. We create a loan for it. + // For pointer/view types, we stick to the existing model for now and do + // not create an extra origin for the l-value expression itself. + + // TODO: A single origin for a `DeclRefExpr` for a pointer or view type is + // not sufficient to model the different levels of indirection. The current + // single-origin model cannot distinguish between a loan to the variable's + // storage and a loan to what it points to. A multi-origin model would be + // required for this. + if (!isPointerType(DRE->getType())) { + if (const Loan *L = createLoan(FactMgr, DRE)) { + OriginID ExprOID = FactMgr.getOriginMgr().getOrCreate(*DRE); + CurrentBlockFacts.push_back( + FactMgr.createFact(L->getID(), ExprOID)); + } + } +} + +void FactsGenerator::VisitCXXConstructExpr(const CXXConstructExpr *CCE) { + if (isGslPointerType(CCE->getType())) { + handleGSLPointerConstruction(CCE); + return; + } +} + +void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + // Specifically for conversion operators, + // like `std::string_view p = std::string{};` + if (isGslPointerType(MCE->getType()) && + isa_and_present(MCE->getCalleeDecl())) { + // The argument is the implicit object itself. + handleFunctionCall(MCE, MCE->getMethodDecl(), + {MCE->getImplicitObjectArgument()}, + /*IsGslConstruction=*/true); + } + if (const CXXMethodDecl *Method = MCE->getMethodDecl()) { + // Construct the argument list, with the implicit 'this' object as the + // first argument. + llvm::SmallVector Args; + Args.push_back(MCE->getImplicitObjectArgument()); + Args.append(MCE->getArgs(), MCE->getArgs() + MCE->getNumArgs()); + + handleFunctionCall(MCE, Method, Args, /*IsGslConstruction=*/false); + } +} + +void FactsGenerator::VisitCallExpr(const CallExpr *CE) { + handleFunctionCall(CE, CE->getDirectCallee(), + {CE->getArgs(), CE->getNumArgs()}); +} + +void FactsGenerator::VisitCXXNullPtrLiteralExpr( + const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); +} + +void FactsGenerator::VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE)) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + killAndFlowOrigin(*ICE, *ICE->getSubExpr()); +} + +void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + // Taking address of a pointer-type expression is not yet supported and + // will be supported in multi-origin model. + if (isPointerType(SubExpr->getType())) + return; + // The origin of an address-of expression (e.g., &x) is the origin of + // its sub-expression (x). This fact will cause the dataflow analysis + // to propagate any loans held by the sub-expression's origin to the + // origin of this UnaryOperator expression. + killAndFlowOrigin(*UO, *SubExpr); + } +} + +void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr)) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + EscapesInCurrentBlock.push_back( + FactMgr.createFact(OID, RetExpr)); + } + } +} + +void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) + handleAssignment(BO->getLHS(), BO->getRHS()); +} + +void FactsGenerator::VisitConditionalOperator(const ConditionalOperator *CO) { + if (hasOrigin(CO)) { + // Merge origins from both branches of the conditional operator. + // We kill to clear the initial state and merge both origins into it. + killAndFlowOrigin(*CO, *CO->getTrueExpr()); + flowOrigin(*CO, *CO->getFalseExpr()); + } +} + +void FactsGenerator::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + // Assignment operators have special "kill-then-propagate" semantics + // and are handled separately. + if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2) { + handleAssignment(OCE->getArg(0), OCE->getArg(1)); + return; + } + handleFunctionCall(OCE, OCE->getDirectCallee(), + {OCE->getArgs(), OCE->getNumArgs()}, + /*IsGslConstruction=*/false); +} + +void FactsGenerator::VisitCXXFunctionalCastExpr( + const CXXFunctionalCastExpr *FCE) { + // Check if this is a test point marker. If so, we are done with this + // expression. + if (handleTestPoint(FCE)) + return; + if (isGslPointerType(FCE->getType())) + killAndFlowOrigin(*FCE, *FCE->getSubExpr()); +} + +void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { + if (!hasOrigin(ILE)) + return; + // For list initialization with a single element, like `View{...}`, the + // origin of the list itself is the origin of its single element. + if (ILE->getNumInits() == 1) + killAndFlowOrigin(*ILE, *ILE->getInit(0)); +} + +void FactsGenerator::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *MTE) { + if (!hasOrigin(MTE)) + return; + // A temporary object's origin is the same as the origin of the + // expression that initializes it. + killAndFlowOrigin(*MTE, *MTE->getSubExpr()); +} + +void FactsGenerator::handleLifetimeEnds(const CFGLifetimeEnds &LifetimeEnds) { + /// TODO: Handle loans to temporaries. + const VarDecl *LifetimeEndsVD = LifetimeEnds.getVarDecl(); + if (!LifetimeEndsVD) + return; + // Iterate through all loans to see if any expire. + for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) { + if (const auto *BL = dyn_cast(Loan)) { + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (BL->getAccessPath().D == LifetimeEndsVD) + CurrentBlockFacts.push_back(FactMgr.createFact( + BL->getID(), LifetimeEnds.getTriggerStmt()->getEndLoc())); + } + } +} + +void FactsGenerator::handleGSLPointerConstruction(const CXXConstructExpr *CCE) { + assert(isGslPointerType(CCE->getType())); + if (CCE->getNumArgs() != 1) + return; + if (hasOrigin(CCE->getArg(0))) + killAndFlowOrigin(*CCE, *CCE->getArg(0)); + else + // This could be a new borrow. + handleFunctionCall(CCE, CCE->getConstructor(), + {CCE->getArgs(), CCE->getNumArgs()}, + /*IsGslConstruction=*/true); +} + +/// Checks if a call-like expression creates a borrow by passing a value to a +/// reference parameter, creating an IssueFact if it does. +/// \param IsGslConstruction True if this is a GSL construction where all +/// argument origins should flow to the returned origin. +void FactsGenerator::handleFunctionCall(const Expr *Call, + const FunctionDecl *FD, + ArrayRef Args, + bool IsGslConstruction) { + // Ignore functions returning values with no origin. + if (!FD || !hasOrigin(Call)) + return; + auto IsArgLifetimeBound = [FD](unsigned I) -> bool { + const ParmVarDecl *PVD = nullptr; + if (const auto *Method = dyn_cast(FD); + Method && Method->isInstance()) { + if (I == 0) + // For the 'this' argument, the attribute is on the method itself. + return implicitObjectParamIsLifetimeBound(Method); + if ((I - 1) < Method->getNumParams()) + // For explicit arguments, find the corresponding parameter + // declaration. + PVD = Method->getParamDecl(I - 1); + } else if (I < FD->getNumParams()) + // For free functions or static methods. + PVD = FD->getParamDecl(I); + return PVD ? PVD->hasAttr() : false; + }; + if (Args.empty()) + return; + bool killedSrc = false; + for (unsigned I = 0; I < Args.size(); ++I) + if (IsGslConstruction || IsArgLifetimeBound(I)) { + if (!killedSrc) { + killedSrc = true; + killAndFlowOrigin(*Call, *Args[I]); + } else + flowOrigin(*Call, *Args[I]); + } +} + +/// Checks if the expression is a `void("__lifetime_test_point_...")` cast. +/// If so, creates a `TestPointFact` and returns true. +bool FactsGenerator::handleTestPoint(const CXXFunctionalCastExpr *FCE) { + if (!FCE->getType()->isVoidType()) + return false; + + const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts(); + if (const auto *SL = dyn_cast(SubExpr)) { + llvm::StringRef LiteralValue = SL->getString(); + const std::string Prefix = "__lifetime_test_point_"; + + if (LiteralValue.starts_with(Prefix)) { + StringRef Annotation = LiteralValue.drop_front(Prefix.length()); + CurrentBlockFacts.push_back( + FactMgr.createFact(Annotation)); + return true; + } + } + return false; +} + +void FactsGenerator::handleAssignment(const Expr *LHSExpr, + const Expr *RHSExpr) { + if (!hasOrigin(LHSExpr)) + return; + // Find the underlying variable declaration for the left-hand side. + if (const auto *DRE_LHS = + dyn_cast(LHSExpr->IgnoreParenImpCasts())) { + markUseAsWrite(DRE_LHS); + if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl())) { + // Kill the old loans of the destination origin and flow the new loans + // from the source origin. + killAndFlowOrigin(*VD_LHS, *RHSExpr); + } + } +} + +// A DeclRefExpr will be treated as a use of the referenced decl. It will be +// checked for use-after-free unless it is later marked as being written to +// (e.g. on the left-hand side of an assignment). +void FactsGenerator::handleUse(const DeclRefExpr *DRE) { + if (isPointerType(DRE->getType())) { + UseFact *UF = FactMgr.createFact(DRE, FactMgr.getOriginMgr()); + CurrentBlockFacts.push_back(UF); + assert(!UseFacts.contains(DRE)); + UseFacts[DRE] = UF; + } +} + +void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { + if (!isPointerType(DRE->getType())) + return; + assert(UseFacts.contains(DRE)); + UseFacts[DRE]->markAsWritten(); +} + +// Creates an IssueFact for a new placeholder loan for each pointer or reference +// parameter at the function's entry. +llvm::SmallVector FactsGenerator::issuePlaceholderLoans() { + const auto *FD = dyn_cast(AC.getDecl()); + if (!FD) + return {}; + + llvm::SmallVector PlaceholderLoanFacts; + for (const ParmVarDecl *PVD : FD->parameters()) { + if (hasOrigin(PVD)) { + const PlaceholderLoan *L = + FactMgr.getLoanMgr().createLoan(PVD); + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*PVD); + PlaceholderLoanFacts.push_back( + FactMgr.createFact(L->getID(), OID)); + } + } + return PlaceholderLoanFacts; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp new file mode 100644 index 0000000000000..54e343fc2ee5e --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp @@ -0,0 +1,104 @@ +//===- LifetimeAnnotations.cpp - -*--------------- C++------------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeLoc.h" + +namespace clang::lifetimes { + +const FunctionDecl * +getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { + return FD != nullptr ? FD->getMostRecentDecl() : nullptr; +} + +const CXXMethodDecl * +getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD) { + const FunctionDecl *FD = CMD; + return cast_if_present( + getDeclWithMergedLifetimeBoundAttrs(FD)); +} + +bool isNormalAssignmentOperator(const FunctionDecl *FD) { + OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator(); + bool IsAssignment = OO == OO_Equal || isCompoundAssignmentOperator(OO); + if (!IsAssignment) + return false; + QualType RetT = FD->getReturnType(); + if (!RetT->isLValueReferenceType()) + return false; + ASTContext &Ctx = FD->getASTContext(); + QualType LHST; + auto *MD = dyn_cast(FD); + if (MD && MD->isCXXInstanceMember()) + LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType()); + else + LHST = FD->getParamDecl(0)->getType(); + return Ctx.hasSameType(RetT, LHST); +} + +bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD) { + CMD = getDeclWithMergedLifetimeBoundAttrs(CMD); + return CMD && isNormalAssignmentOperator(CMD) && CMD->param_size() == 1 && + CMD->getParamDecl(0)->hasAttr(); +} + +bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { + FD = getDeclWithMergedLifetimeBoundAttrs(FD); + const TypeSourceInfo *TSI = FD->getTypeSourceInfo(); + if (!TSI) + return false; + // Don't declare this variable in the second operand of the for-statement; + // GCC miscompiles that by ending its lifetime before evaluating the + // third operand. See gcc.gnu.org/PR86769. + AttributedTypeLoc ATL; + for (TypeLoc TL = TSI->getTypeLoc(); + (ATL = TL.getAsAdjusted()); + TL = ATL.getModifiedLoc()) { + if (ATL.getAttrAs()) + return true; + } + + return isNormalAssignmentOperator(FD); +} + +template static bool isRecordWithAttr(QualType Type) { + auto *RD = Type->getAsCXXRecordDecl(); + if (!RD) + return false; + // Generally, if a primary template class declaration is annotated with an + // attribute, all its specializations generated from template instantiations + // should inherit the attribute. + // + // However, since lifetime analysis occurs during parsing, we may encounter + // cases where a full definition of the specialization is not required. In + // such cases, the specialization declaration remains incomplete and lacks the + // attribute. Therefore, we fall back to checking the primary template class. + // + // Note: it is possible for a specialization declaration to have an attribute + // even if the primary template does not. + // + // FIXME: What if the primary template and explicit specialization + // declarations have conflicting attributes? We should consider diagnosing + // this scenario. + bool Result = RD->hasAttr(); + + if (auto *CTSD = dyn_cast(RD)) + Result |= CTSD->getSpecializedTemplate()->getTemplatedDecl()->hasAttr(); + + return Result; +} + +bool isGslPointerType(QualType QT) { return isRecordWithAttr(QT); } +bool isGslOwnerType(QualType QT) { return isRecordWithAttr(QT); } + +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp new file mode 100644 index 0000000000000..a51ba4280f284 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LifetimeSafety.cpp @@ -0,0 +1,78 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the main LifetimeSafetyAnalysis class, which coordinates +// the various components (fact generation, loan propagation, live origins +// analysis, and checking) to detect lifetime safety violations in C++ code. +// +//===----------------------------------------------------------------------===// +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Checker.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TimeProfiler.h" +#include + +namespace clang::lifetimes { +namespace internal { + +LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) + : AC(AC), Reporter(Reporter) {} + +void LifetimeSafetyAnalysis::run() { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); + + const CFG &Cfg = *AC.getCFG(); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + FactMgr.init(Cfg); + + FactsGenerator FactGen(FactMgr, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + /// 3. Collapse ExpireFacts belonging to same source location into a single + /// Fact. + LoanPropagation = std::make_unique( + Cfg, AC, FactMgr, Factory.OriginMapFactory, Factory.LoanSetFactory); + + LiveOrigins = std::make_unique( + Cfg, AC, FactMgr, Factory.LivenessMapFactory); + DEBUG_WITH_TYPE("LiveOrigins", + LiveOrigins->dump(llvm::dbgs(), FactMgr.getTestPoints())); + + runLifetimeChecker(*LoanPropagation, *LiveOrigins, FactMgr, AC, Reporter); +} +} // namespace internal + +void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC, + LifetimeSafetyReporter *Reporter) { + internal::LifetimeSafetyAnalysis Analysis(AC, Reporter); + Analysis.run(); +} +} // namespace clang::lifetimes diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp new file mode 100644 index 0000000000000..57338122b4440 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -0,0 +1,198 @@ +//===- LiveOrigins.cpp - Live Origins Analysis -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LiveOrigins.h" +#include "Dataflow.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang::lifetimes::internal { +namespace { + +/// The dataflow lattice for origin liveness analysis. +/// It tracks which origins are live, why they're live (which UseFact), +/// and the confidence level of that liveness. +struct Lattice { + LivenessMap LiveOrigins; + + Lattice() : LiveOrigins(nullptr) {}; + + explicit Lattice(LivenessMap L) : LiveOrigins(L) {} + + bool operator==(const Lattice &Other) const { + return LiveOrigins == Other.LiveOrigins; + } + + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS, const OriginManager &OM) const { + if (LiveOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : LiveOrigins) { + OriginID OID = Entry.first; + const LivenessInfo &Info = Entry.second; + OS << " "; + OM.dump(OID, OS); + OS << " is "; + switch (Info.Kind) { + case LivenessKind::Must: + OS << "definitely"; + break; + case LivenessKind::Maybe: + OS << "maybe"; + break; + case LivenessKind::Dead: + llvm_unreachable("liveness kind of live origins should not be dead."); + } + OS << " live at this point\n"; + } + } +}; + +static SourceLocation GetFactLoc(CausingFactType F) { + if (const auto *UF = F.dyn_cast()) + return UF->getUseExpr()->getExprLoc(); + if (const auto *OEF = F.dyn_cast()) + return OEF->getEscapeExpr()->getExprLoc(); + llvm_unreachable("unhandled causing fact in PointerUnion"); +} + +/// The analysis that tracks which origins are live, with granular information +/// about the causing use fact and confidence level. This is a backward +/// analysis. +class AnalysisImpl + : public DataflowAnalysis { + +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + LivenessMap::Factory &SF) + : DataflowAnalysis(C, AC, F), FactMgr(F), Factory(SF) {} + using DataflowAnalysis::transfer; + + StringRef getAnalysisName() const { return "LiveOrigins"; } + + Lattice getInitialState() { return Lattice(Factory.getEmptyMap()); } + + /// Merges two lattices by combining liveness information. + /// When the same origin has different confidence levels, we take the lower + /// one. + Lattice join(Lattice L1, Lattice L2) const { + LivenessMap Merged = L1.LiveOrigins; + // Take the earliest Fact to make the join hermetic and commutative. + auto CombineCausingFact = [](CausingFactType A, + CausingFactType B) -> CausingFactType { + if (!A) + return B; + if (!B) + return A; + return GetFactLoc(A) < GetFactLoc(B) ? A : B; + }; + auto CombineLivenessKind = [](LivenessKind K1, + LivenessKind K2) -> LivenessKind { + assert(K1 != LivenessKind::Dead && "LivenessKind should not be dead."); + assert(K2 != LivenessKind::Dead && "LivenessKind should not be dead."); + // Only return "Must" if both paths are "Must", otherwise Maybe. + if (K1 == LivenessKind::Must && K2 == LivenessKind::Must) + return LivenessKind::Must; + return LivenessKind::Maybe; + }; + auto CombineLivenessInfo = [&](const LivenessInfo *L1, + const LivenessInfo *L2) -> LivenessInfo { + assert((L1 || L2) && "unexpectedly merging 2 empty sets"); + if (!L1) + return LivenessInfo(L2->CausingFact, LivenessKind::Maybe); + if (!L2) + return LivenessInfo(L1->CausingFact, LivenessKind::Maybe); + return LivenessInfo(CombineCausingFact(L1->CausingFact, L2->CausingFact), + CombineLivenessKind(L1->Kind, L2->Kind)); + }; + return Lattice(utils::join( + L1.LiveOrigins, L2.LiveOrigins, Factory, CombineLivenessInfo, + // A symmetric join is required here. If an origin is live on one + // branch but not the other, its confidence must be demoted to `Maybe`. + utils::JoinKind::Symmetric)); + } + + /// A read operation makes the origin live with definite confidence, as it + /// dominates this program point. A write operation kills the liveness of + /// the origin since it overwrites the value. + Lattice transfer(Lattice In, const UseFact &UF) { + OriginID OID = UF.getUsedOrigin(); + // Write kills liveness. + if (UF.isWritten()) + return Lattice(Factory.remove(In.LiveOrigins, OID)); + // Read makes origin live with definite confidence (dominates this point). + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); + } + + /// An escaping origin (e.g., via return) makes the origin live with definite + /// confidence, as it dominates this program point. + Lattice transfer(Lattice In, const OriginEscapesFact &OEF) { + OriginID OID = OEF.getEscapedOriginID(); + return Lattice(Factory.add(In.LiveOrigins, OID, + LivenessInfo(&OEF, LivenessKind::Must))); + } + + /// Issuing a new loan to an origin kills its liveness. + Lattice transfer(Lattice In, const IssueFact &IF) { + return Lattice(Factory.remove(In.LiveOrigins, IF.getOriginID())); + } + + /// An OriginFlow kills the liveness of the destination origin if `KillDest` + /// is true. Otherwise, it propagates liveness from destination to source. + Lattice transfer(Lattice In, const OriginFlowFact &OF) { + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + } + + LivenessMap getLiveOriginsAt(ProgramPoint P) const { + return getState(P).LiveOrigins; + } + + // Dump liveness values on all test points in the program. + void dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << getAnalysisName() << " results:\n"; + llvm::dbgs() << "==========================================\n"; + for (const auto &Entry : TestPoints) { + OS << "TestPoint: " << Entry.getKey() << "\n"; + getState(Entry.getValue()).dump(OS, FactMgr.getOriginMgr()); + } + } + +private: + FactManager &FactMgr; + LivenessMap::Factory &Factory; +}; +} // namespace + +// PImpl wrapper implementation +class LiveOriginsAnalysis::Impl : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LiveOriginsAnalysis::LiveOriginsAnalysis(const CFG &C, AnalysisDeclContext &AC, + FactManager &F, + LivenessMap::Factory &SF) + : PImpl(std::make_unique(C, AC, F, SF)) { + PImpl->run(); +} + +LiveOriginsAnalysis::~LiveOriginsAnalysis() = default; + +LivenessMap LiveOriginsAnalysis::getLiveOriginsAt(ProgramPoint P) const { + return PImpl->getLiveOriginsAt(P); +} + +void LiveOriginsAnalysis::dump(llvm::raw_ostream &OS, + llvm::StringMap TestPoints) const { + PImpl->dump(OS, TestPoints); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp new file mode 100644 index 0000000000000..23ce1b78dfde2 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/LoanPropagation.cpp @@ -0,0 +1,233 @@ +//===- LoanPropagation.cpp - Loan Propagation Analysis ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +#include "Dataflow.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Facts.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LoanPropagation.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" +#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang::lifetimes::internal { + +// Prepass to find persistent origins. An origin is persistent if it is +// referenced in more than one basic block. +static llvm::BitVector computePersistentOrigins(const FactManager &FactMgr, + const CFG &C) { + llvm::TimeTraceScope("ComputePersistentOrigins"); + unsigned NumOrigins = FactMgr.getOriginMgr().getNumOrigins(); + llvm::BitVector PersistentOrigins(NumOrigins); + + llvm::SmallVector OriginToFirstSeenBlock(NumOrigins, + nullptr); + for (const CFGBlock *B : C) { + for (const Fact *F : FactMgr.getFacts(B)) { + auto CheckOrigin = [&](OriginID OID) { + if (PersistentOrigins.test(OID.Value)) + return; + auto &FirstSeenBlock = OriginToFirstSeenBlock[OID.Value]; + if (FirstSeenBlock == nullptr) + FirstSeenBlock = B; + if (FirstSeenBlock != B) { + // We saw this origin in more than one block. + PersistentOrigins.set(OID.Value); + } + }; + + switch (F->getKind()) { + case Fact::Kind::Issue: + CheckOrigin(F->getAs()->getOriginID()); + break; + case Fact::Kind::OriginFlow: { + const auto *OF = F->getAs(); + CheckOrigin(OF->getDestOriginID()); + CheckOrigin(OF->getSrcOriginID()); + break; + } + case Fact::Kind::Use: + CheckOrigin(F->getAs()->getUsedOrigin()); + break; + case Fact::Kind::OriginEscapes: + case Fact::Kind::Expire: + case Fact::Kind::TestPoint: + break; + } + } + } + return PersistentOrigins; +} + +namespace { + +/// Represents the dataflow lattice for loan propagation. +/// +/// This lattice tracks which loans each origin may hold at a given program +/// point.The lattice has a finite height: An origin's loan set is bounded by +/// the total number of loans in the function. +struct Lattice { + /// The map from an origin to the set of loans it contains. + /// Origins that appear in multiple blocks. Participates in join operations. + OriginLoanMap PersistentOrigins = OriginLoanMap(nullptr); + /// Origins confined to a single block. Discarded at block boundaries. + OriginLoanMap BlockLocalOrigins = OriginLoanMap(nullptr); + + explicit Lattice(const OriginLoanMap &Persistent, + const OriginLoanMap &BlockLocal) + : PersistentOrigins(Persistent), BlockLocalOrigins(BlockLocal) {} + Lattice() = default; + + bool operator==(const Lattice &Other) const { + return PersistentOrigins == Other.PersistentOrigins && + BlockLocalOrigins == Other.BlockLocalOrigins; + } + bool operator!=(const Lattice &Other) const { return !(*this == Other); } + + void dump(llvm::raw_ostream &OS) const { + OS << "LoanPropagationLattice State:\n"; + OS << " Persistent Origins:\n"; + if (PersistentOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : PersistentOrigins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + OS << " Block-Local Origins:\n"; + if (BlockLocalOrigins.isEmpty()) + OS << " \n"; + for (const auto &Entry : BlockLocalOrigins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +class AnalysisImpl + : public DataflowAnalysis { +public: + AnalysisImpl(const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : DataflowAnalysis(C, AC, F), OriginLoanMapFactory(OriginLoanMapFactory), + LoanSetFactory(LoanSetFactory), + PersistentOrigins(computePersistentOrigins(F, C)) {} + + using Base::transfer; + + StringRef getAnalysisName() const { return "LoanPropagation"; } + + Lattice getInitialState() { return Lattice{}; } + + /// Merges two lattices by taking the union of loans for each origin. + /// Only persistent origins are joined; block-local origins are discarded. + Lattice join(Lattice A, Lattice B) { + OriginLoanMap JoinedOrigins = utils::join( + A.PersistentOrigins, B.PersistentOrigins, OriginLoanMapFactory, + [&](const LoanSet *S1, const LoanSet *S2) { + assert((S1 || S2) && "unexpectedly merging 2 empty sets"); + if (!S1) + return *S2; + if (!S2) + return *S1; + return utils::join(*S1, *S2, LoanSetFactory); + }, + // Asymmetric join is a performance win. For origins present only on one + // branch, the loan set can be carried over as-is. + utils::JoinKind::Asymmetric); + return Lattice(JoinedOrigins, OriginLoanMapFactory.getEmptyMap()); + } + + /// A new loan is issued to the origin. Old loans are erased. + Lattice transfer(Lattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + LoanSet NewLoans = LoanSetFactory.add(LoanSetFactory.getEmptySet(), LID); + return setLoans(In, OID, NewLoans); + } + + /// A flow from source to destination. If `KillDest` is true, this replaces + /// the destination's loans with the source's. Otherwise, the source's loans + /// are merged into the destination's. + Lattice transfer(Lattice In, const OriginFlowFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + + LoanSet DestLoans = + F.getKillDest() ? LoanSetFactory.getEmptySet() : getLoans(In, DestOID); + LoanSet SrcLoans = getLoans(In, SrcOID); + LoanSet MergedLoans = utils::join(DestLoans, SrcLoans, LoanSetFactory); + + return setLoans(In, DestOID, MergedLoans); + } + + LoanSet getLoans(OriginID OID, ProgramPoint P) const { + return getLoans(getState(P), OID); + } + +private: + /// Returns true if the origin is persistent (referenced in multiple blocks). + bool isPersistent(OriginID OID) const { + return PersistentOrigins.test(OID.Value); + } + + Lattice setLoans(Lattice L, OriginID OID, LoanSet Loans) { + if (isPersistent(OID)) + return Lattice(OriginLoanMapFactory.add(L.PersistentOrigins, OID, Loans), + L.BlockLocalOrigins); + return Lattice(L.PersistentOrigins, + OriginLoanMapFactory.add(L.BlockLocalOrigins, OID, Loans)); + } + + LoanSet getLoans(Lattice L, OriginID OID) const { + const OriginLoanMap *Map = + isPersistent(OID) ? &L.PersistentOrigins : &L.BlockLocalOrigins; + if (auto *Loans = Map->lookup(OID)) + return *Loans; + return LoanSetFactory.getEmptySet(); + } + + OriginLoanMap::Factory &OriginLoanMapFactory; + LoanSet::Factory &LoanSetFactory; + /// Boolean vector indexed by origin ID. If true, the origin appears in + /// multiple basic blocks and must participate in join operations. If false, + /// the origin is block-local and can be discarded at block boundaries. + llvm::BitVector PersistentOrigins; +}; +} // namespace + +class LoanPropagationAnalysis::Impl final : public AnalysisImpl { + using AnalysisImpl::AnalysisImpl; +}; + +LoanPropagationAnalysis::LoanPropagationAnalysis( + const CFG &C, AnalysisDeclContext &AC, FactManager &F, + OriginLoanMap::Factory &OriginLoanMapFactory, + LoanSet::Factory &LoanSetFactory) + : PImpl(std::make_unique(C, AC, F, OriginLoanMapFactory, + LoanSetFactory)) { + PImpl->run(); +} + +LoanPropagationAnalysis::~LoanPropagationAnalysis() = default; + +LoanSet LoanPropagationAnalysis::getLoans(OriginID OID, ProgramPoint P) const { + return PImpl->getLoans(OID, P); +} +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Loans.cpp b/clang/lib/Analysis/LifetimeSafety/Loans.cpp new file mode 100644 index 0000000000000..fdfdbb40a2a46 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Loans.cpp @@ -0,0 +1,22 @@ +//===- Loans.cpp - Loan Implementation --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Loans.h" + +namespace clang::lifetimes::internal { + +void PathLoan::dump(llvm::raw_ostream &OS) const { + OS << getID() << " (Path: "; + OS << Path.D->getNameAsString() << ")"; +} + +void PlaceholderLoan::dump(llvm::raw_ostream &OS) const { + OS << getID() << " (Placeholder loan)"; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp new file mode 100644 index 0000000000000..0f2eaa94a5987 --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -0,0 +1,91 @@ +//===- Origins.cpp - Origin Implementation -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h" + +namespace clang::lifetimes::internal { + +void OriginManager::dump(OriginID OID, llvm::raw_ostream &OS) const { + OS << OID << " ("; + Origin O = getOrigin(OID); + if (const ValueDecl *VD = O.getDecl()) + OS << "Decl: " << VD->getNameAsString(); + else if (const Expr *E = O.getExpr()) + OS << "Expr: " << E->getStmtClassName(); + else + OS << "Unknown"; + OS << ")"; +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); +} + +Origin &OriginManager::addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); +} + +// TODO: Mark this method as const once we remove the call to getOrCreate. +OriginID OriginManager::get(const Expr &E) { + if (auto *ParenIgnored = E.IgnoreParens(); ParenIgnored != &E) + return get(*ParenIgnored); + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + // If the expression itself has no specific origin, and it's a reference + // to a declaration, its origin is that of the declaration it refers to. + // For pointer types, where we don't pre-emptively create an origin for the + // DeclRefExpr itself. + if (const auto *DRE = dyn_cast(&E)) + return get(*DRE->getDecl()); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + return getOrCreate(E); +} + +OriginID OriginManager::get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + // TODO: This should be an assert(It != DeclToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == DeclToOriginID.end()) + return getOrCreate(D); + + return It->second; +} + +OriginID OriginManager::getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; +} + +const Origin &OriginManager::getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; +} + +OriginID OriginManager::getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; +} + +} // namespace clang::lifetimes::internal diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 942b5b1e08929..e769b23ee0bcf 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -31,7 +31,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" -#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -3498,6 +3498,56 @@ class CallableVisitor : public DynamicRecursiveASTVisitor { } }; +namespace clang::lifetimes { +namespace { +class LifetimeSafetyReporterImpl : public LifetimeSafetyReporter { + +public: + LifetimeSafetyReporterImpl(Sema &S) : S(S) {} + + void reportUseAfterFree(const Expr *IssueExpr, const Expr *UseExpr, + SourceLocation FreeLoc, Confidence C) override { + S.Diag(IssueExpr->getExprLoc(), + C == Confidence::Definite + ? diag::warn_lifetime_safety_loan_expires_permissive + : diag::warn_lifetime_safety_loan_expires_strict) + << IssueExpr->getEndLoc(); + S.Diag(FreeLoc, diag::note_lifetime_safety_destroyed_here); + S.Diag(UseExpr->getExprLoc(), diag::note_lifetime_safety_used_here) + << UseExpr->getEndLoc(); + } + + void reportUseAfterReturn(const Expr *IssueExpr, const Expr *EscapeExpr, + SourceLocation ExpiryLoc, Confidence C) override { + S.Diag(IssueExpr->getExprLoc(), + C == Confidence::Definite + ? diag::warn_lifetime_safety_return_stack_addr_permissive + : diag::warn_lifetime_safety_return_stack_addr_strict) + << IssueExpr->getEndLoc(); + + S.Diag(EscapeExpr->getExprLoc(), diag::note_lifetime_safety_returned_here) + << EscapeExpr->getEndLoc(); + } + + void suggestAnnotation(const ParmVarDecl *PVD, + const Expr *EscapeExpr) override { + SourceLocation InsertionPoint = Lexer::getLocForEndOfToken( + PVD->getEndLoc(), 0, S.getSourceManager(), S.getLangOpts()); + S.Diag(PVD->getBeginLoc(), diag::warn_lifetime_safety_suggest_lifetimebound) + << PVD->getSourceRange() + << FixItHint::CreateInsertion(InsertionPoint, + " [[clang::lifetimebound]]"); + S.Diag(EscapeExpr->getBeginLoc(), + diag::note_lifetime_safety_suggestion_returned_here) + << EscapeExpr->getSourceRange(); + } + +private: + Sema &S; +}; +} // namespace +} // namespace clang::lifetimes + void clang::sema::AnalysisBasedWarnings::IssueWarnings( TranslationUnitDecl *TU) { if (!TU) @@ -3600,6 +3650,11 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( AC.getCFGBuildOptions().AddCXXNewAllocator = false; AC.getCFGBuildOptions().AddCXXDefaultInitExprInCtors = true; + bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety; + + if (EnableLifetimeSafetyAnalysis) + AC.getCFGBuildOptions().AddLifetime = true; + // Force that certain expressions appear as CFGElements in the CFG. This // is used to speed up various analyses. // FIXME: This isn't the right factoring. This is here for initial @@ -3607,11 +3662,10 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // expect to always be CFGElements and then fill in the BuildOptions // appropriately. This is essentially a layering violation. if (P.enableCheckUnreachable || P.enableThreadSafetyAnalysis || - P.enableConsumedAnalysis) { + P.enableConsumedAnalysis || EnableLifetimeSafetyAnalysis) { // Unreachable code analysis and thread safety require a linearized CFG. AC.getCFGBuildOptions().setAllAlwaysAdd(); - } - else { + } else { AC.getCFGBuildOptions() .setAlwaysAdd(Stmt::BinaryOperatorClass) .setAlwaysAdd(Stmt::CompoundAssignOperatorClass) @@ -3622,7 +3676,6 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( .setAlwaysAdd(Stmt::UnaryOperatorClass); } - bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety; // Install the logical handler. std::optional LEH; if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { @@ -3749,8 +3802,10 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // TODO: Enable lifetime safety analysis for other languages once it is // stable. if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { - if (CFG *cfg = AC.getCFG()) - runLifetimeSafetyAnalysis(*cast(D), *cfg, AC); + if (AC.getCFG()) { + lifetimes::LifetimeSafetyReporterImpl LifetimeSafetyReporter(S); + lifetimes::runLifetimeSafetyAnalysis(AC, &LifetimeSafetyReporter); + } } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 20155d94900ee..9da50ecf1823c 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -119,6 +119,7 @@ add_clang_library(clangSema clangAPINotes clangAST clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangEdit clangLex diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index e02e00231e58e..17bd89ba972f4 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -10,12 +10,16 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Sema.h" #include "llvm/ADT/PointerIntPair.h" namespace clang::sema { +using lifetimes::isGslOwnerType; +using lifetimes::isGslPointerType; + namespace { enum LifetimeKind { /// The lifetime of a temporary bound to this entity ends at the end of the @@ -255,38 +259,8 @@ static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path, Expr *Init, ReferenceKind RK, LocalVisitor Visit); -template static bool isRecordWithAttr(QualType Type) { - auto *RD = Type->getAsCXXRecordDecl(); - if (!RD) - return false; - // Generally, if a primary template class declaration is annotated with an - // attribute, all its specializations generated from template instantiations - // should inherit the attribute. - // - // However, since lifetime analysis occurs during parsing, we may encounter - // cases where a full definition of the specialization is not required. In - // such cases, the specialization declaration remains incomplete and lacks the - // attribute. Therefore, we fall back to checking the primary template class. - // - // Note: it is possible for a specialization declaration to have an attribute - // even if the primary template does not. - // - // FIXME: What if the primary template and explicit specialization - // declarations have conflicting attributes? We should consider diagnosing - // this scenario. - bool Result = RD->hasAttr(); - - if (auto *CTSD = dyn_cast(RD)) - Result |= CTSD->getSpecializedTemplate()->getTemplatedDecl()->hasAttr(); - - return Result; -} - -// Tells whether the type is annotated with [[gsl::Pointer]]. -bool isGLSPointerType(QualType QT) { return isRecordWithAttr(QT); } - static bool isPointerLikeType(QualType QT) { - return isGLSPointerType(QT) || QT->isPointerType() || QT->isNullPtrType(); + return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType(); } // Decl::isInStdNamespace will return false for iterators in some STL @@ -329,7 +303,7 @@ static bool isContainerOfOwner(const RecordDecl *Container) { return false; const auto &TAs = CTSD->getTemplateArgs(); return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type && - isRecordWithAttr(TAs[0].getAsType()); + isGslOwnerType(TAs[0].getAsType()); } // Returns true if the given Record is `std::initializer_list`. @@ -347,14 +321,13 @@ static bool isStdInitializerListOfPointer(const RecordDecl *RD) { static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) { if (auto *Conv = dyn_cast_or_null(Callee)) - if (isRecordWithAttr(Conv->getConversionType()) && + if (isGslPointerType(Conv->getConversionType()) && Callee->getParent()->hasAttr()) return true; if (!isInStlNamespace(Callee->getParent())) return false; - if (!isRecordWithAttr( - Callee->getFunctionObjectParameterType()) && - !isRecordWithAttr(Callee->getFunctionObjectParameterType())) + if (!isGslPointerType(Callee->getFunctionObjectParameterType()) && + !isGslOwnerType(Callee->getFunctionObjectParameterType())) return false; if (isPointerLikeType(Callee->getReturnType())) { if (!Callee->getIdentifier()) @@ -391,7 +364,7 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { if (!RD->hasAttr() && !RD->hasAttr()) return false; if (FD->getReturnType()->isPointerType() || - isRecordWithAttr(FD->getReturnType())) { + isGslPointerType(FD->getReturnType())) { return llvm::StringSwitch(FD->getName()) .Cases("begin", "rbegin", "cbegin", "crbegin", true) .Cases("end", "rend", "cend", "crend", true) @@ -463,7 +436,7 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { return true; // RHS must be an owner. - if (!isRecordWithAttr(RHSArgType)) + if (!isGslOwnerType(RHSArgType)) return false; // Bail out if the RHS is Owner. @@ -503,60 +476,6 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { return true; } -// Return true if this is an "normal" assignment operator. -// We assume that a normal assignment operator always returns *this, that is, -// an lvalue reference that is the same type as the implicit object parameter -// (or the LHS for a non-member operator$=). -static bool isNormalAssignmentOperator(const FunctionDecl *FD) { - OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator(); - if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) { - QualType RetT = FD->getReturnType(); - if (RetT->isLValueReferenceType()) { - ASTContext &Ctx = FD->getASTContext(); - QualType LHST; - auto *MD = dyn_cast(FD); - if (MD && MD->isCXXInstanceMember()) - LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType()); - else - LHST = FD->getParamDecl(0)->getType(); - if (Ctx.hasSameType(RetT, LHST)) - return true; - } - } - return false; -} - -static const FunctionDecl * -getDeclWithMergedLifetimeBoundAttrs(const FunctionDecl *FD) { - return FD != nullptr ? FD->getMostRecentDecl() : nullptr; -} - -static const CXXMethodDecl * -getDeclWithMergedLifetimeBoundAttrs(const CXXMethodDecl *CMD) { - const FunctionDecl *FD = CMD; - return cast_if_present( - getDeclWithMergedLifetimeBoundAttrs(FD)); -} - -bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) { - FD = getDeclWithMergedLifetimeBoundAttrs(FD); - const TypeSourceInfo *TSI = FD->getTypeSourceInfo(); - if (!TSI) - return false; - // Don't declare this variable in the second operand of the for-statement; - // GCC miscompiles that by ending its lifetime before evaluating the - // third operand. See gcc.gnu.org/PR86769. - AttributedTypeLoc ATL; - for (TypeLoc TL = TSI->getTypeLoc(); - (ATL = TL.getAsAdjusted()); - TL = ATL.getModifiedLoc()) { - if (ATL.getAttrAs()) - return true; - } - - return isNormalAssignmentOperator(FD); -} - // Visit lifetimebound or gsl-pointer arguments. static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, LocalVisitor Visit) { @@ -599,7 +518,7 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, // Once we initialized a value with a non gsl-owner reference, it can no // longer dangle. if (ReturnType->isReferenceType() && - !isRecordWithAttr(ReturnType->getPointeeType())) { + !isGslOwnerType(ReturnType->getPointeeType())) { for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) { if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit || PE.Kind == IndirectLocalPathEntry::LifetimeBoundCall) @@ -639,7 +558,8 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, // lifetimebound. if (Sema::CanBeGetReturnObject(Callee)) CheckCoroObjArg = false; - if (implicitObjectParamIsLifetimeBound(Callee) || CheckCoroObjArg) + if (lifetimes::implicitObjectParamIsLifetimeBound(Callee) || + CheckCoroObjArg) VisitLifetimeBoundArg(Callee, ObjectArg); else if (EnableGSLAnalysis) { if (auto *CME = dyn_cast(Callee); @@ -648,7 +568,8 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call, } } - const FunctionDecl *CanonCallee = getDeclWithMergedLifetimeBoundAttrs(Callee); + const FunctionDecl *CanonCallee = + lifetimes::getDeclWithMergedLifetimeBoundAttrs(Callee); unsigned NP = std::min(Callee->getNumParams(), CanonCallee->getNumParams()); for (unsigned I = 0, N = std::min(NP, Args.size()); I != N; ++I) { Expr *Arg = Args[I]; @@ -1208,8 +1129,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // auto p2 = Temp().owner; // Here p2 is dangling. if (const auto *FD = llvm::dyn_cast_or_null(E.D); FD && !FD->getType()->isReferenceType() && - isRecordWithAttr(FD->getType()) && - LK != LK_MemInitializer) { + isGslOwnerType(FD->getType()) && LK != LK_MemInitializer) { return Report; } return Abandon; @@ -1241,10 +1161,9 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // const GSLOwner& func(const Foo& foo [[clang::lifetimebound]]) // GSLOwner* func(cosnt Foo& foo [[clang::lifetimebound]]) // GSLPointer func(const Foo& foo [[clang::lifetimebound]]) - if (FD && - ((FD->getReturnType()->isPointerOrReferenceType() && - isRecordWithAttr(FD->getReturnType()->getPointeeType())) || - isGLSPointerType(FD->getReturnType()))) + if (FD && ((FD->getReturnType()->isPointerOrReferenceType() && + isGslOwnerType(FD->getReturnType()->getPointeeType())) || + isGslPointerType(FD->getReturnType()))) return Report; return Abandon; @@ -1256,7 +1175,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, // int &p = *localUniquePtr; // someContainer.add(std::move(localUniquePtr)); // return p; - if (!pathContainsInit(Path) && isRecordWithAttr(L->getType())) + if (!pathContainsInit(Path) && isGslOwnerType(L->getType())) return Report; return Abandon; } @@ -1265,8 +1184,7 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, auto *MTE = dyn_cast(L); bool IsGslPtrValueFromGslTempOwner = - MTE && !MTE->getExtendingDecl() && - isRecordWithAttr(MTE->getType()); + MTE && !MTE->getExtendingDecl() && isGslOwnerType(MTE->getType()); // Skipping a chain of initializing gsl::Pointer annotated objects. // We are looking only for the final source to find out if it was // a local or temporary owner or the address of a local @@ -1276,19 +1194,14 @@ static AnalysisResult analyzePathForGSLPointer(const IndirectLocalPath &Path, return Report; } -static bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl *CMD) { - CMD = getDeclWithMergedLifetimeBoundAttrs(CMD); - return CMD && isNormalAssignmentOperator(CMD) && CMD->param_size() == 1 && - CMD->getParamDecl(0)->hasAttr(); -} - static bool shouldRunGSLAssignmentAnalysis(const Sema &SemaRef, const AssignedEntity &Entity) { bool EnableGSLAssignmentWarnings = !SemaRef.getDiagnostics().isIgnored( diag::warn_dangling_lifetime_pointer_assignment, SourceLocation()); return (EnableGSLAssignmentWarnings && - (isRecordWithAttr(Entity.LHS->getType()) || - isAssignmentOperatorLifetimeBound(Entity.AssignmentOperator))); + (isGslPointerType(Entity.LHS->getType()) || + lifetimes::isAssignmentOperatorLifetimeBound( + Entity.AssignmentOperator))); } static void @@ -1455,7 +1368,7 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity, // Suppress false positives for code like the one below: // Ctor(unique_ptr up) : pointer(up.get()), owner(move(up)) {} // FIXME: move this logic to analyzePathForGSLPointer. - if (DRE && isRecordWithAttr(DRE->getType())) + if (DRE && isGslOwnerType(DRE->getType())) return false; auto *VD = DRE ? dyn_cast(DRE->getDecl()) : nullptr; @@ -1610,11 +1523,11 @@ checkExprLifetimeImpl(Sema &SemaRef, const InitializedEntity *InitEntity, switch (LK) { case LK_Assignment: { if (shouldRunGSLAssignmentAnalysis(SemaRef, *AEntity)) - Path.push_back( - {isAssignmentOperatorLifetimeBound(AEntity->AssignmentOperator) - ? IndirectLocalPathEntry::LifetimeBoundCall - : IndirectLocalPathEntry::GslPointerAssignment, - Init}); + Path.push_back({lifetimes::isAssignmentOperatorLifetimeBound( + AEntity->AssignmentOperator) + ? IndirectLocalPathEntry::LifetimeBoundCall + : IndirectLocalPathEntry::GslPointerAssignment, + Init}); break; } case LK_LifetimeCapture: { diff --git a/clang/lib/Sema/CheckExprLifetime.h b/clang/lib/Sema/CheckExprLifetime.h index 6351e52a362f1..38b7061988dc7 100644 --- a/clang/lib/Sema/CheckExprLifetime.h +++ b/clang/lib/Sema/CheckExprLifetime.h @@ -18,9 +18,6 @@ namespace clang::sema { -// Tells whether the type is annotated with [[gsl::Pointer]]. -bool isGLSPointerType(QualType QT); - /// Describes an entity that is being assigned. struct AssignedEntity { // The left-hand side expression of the assignment. @@ -60,8 +57,6 @@ void checkCaptureByLifetime(Sema &SemaRef, const CapturingEntity &Entity, void checkExprLifetimeMustTailArg(Sema &SemaRef, const InitializedEntity &Entity, Expr *Init); -bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD); - } // namespace clang::sema #endif // LLVM_CLANG_SEMA_CHECK_EXPR_LIFETIME_H diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index 04aa8d224c7c0..57a95fc39dca4 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "CheckExprLifetime.h" #include "TypeLocBuilder.h" #include "clang/APINotes/APINotesReader.h" #include "clang/APINotes/Types.h" @@ -18,6 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/TypeLoc.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" #include "clang/Sema/SemaObjC.h" @@ -733,7 +733,7 @@ static void ProcessAPINotes(Sema &S, CXXMethodDecl *Method, const api_notes::CXXMethodInfo &Info, VersionedInfoMetadata Metadata) { if (Info.This && Info.This->isLifetimebound() && - !sema::implicitObjectParamIsLifetimeBound(Method)) { + !lifetimes::implicitObjectParamIsLifetimeBound(Method)) { auto MethodType = Method->getType(); auto *attr = ::new (S.Context) LifetimeBoundAttr(S.Context, getPlaceholderAttrInfo()); diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 3eed6ad7fe6b3..f7175d264ce23 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "CheckExprLifetime.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Lookup.h" @@ -289,7 +289,7 @@ void Sema::inferLifetimeCaptureByAttribute(FunctionDecl *FD) { // We only apply the lifetime_capture_by attribute to parameters of // pointer-like reference types (`const T&`, `T&&`). if (PVD->getType()->isReferenceType() && - sema::isGLSPointerType(PVD->getType().getNonReferenceType())) { + lifetimes::isGslPointerType(PVD->getType().getNonReferenceType())) { int CaptureByThis[] = {LifetimeCaptureByAttr::This}; PVD->addAttr( LifetimeCaptureByAttr::CreateImplicit(Context, CaptureByThis, 1)); diff --git a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt index ce37a29655668..2f9c2ac247497 100644 --- a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt +++ b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt @@ -15,6 +15,13 @@ set(LIFETIME_BENCHMARK_REQUIREMENTS set(LIFETIME_BENCHMARK_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark_results") +if(WIN32) + set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE + "${LIFETIME_BENCHMARK_VENV_DIR}/Scripts/python") +else() + set(LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE + "${LIFETIME_BENCHMARK_VENV_DIR}/bin/python") +endif() if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS}) @@ -22,7 +29,7 @@ if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREME add_custom_command( OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR} - COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} + COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS} COMMENT "Creating Python virtual environment and installing dependencies for benchmark..." ) @@ -32,7 +39,7 @@ if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREME # Main benchmark target add_custom_target(benchmark_lifetime_safety_analysis - COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT} + COMMAND ${LIFETIME_BENCHMARK_VENV_PYTHON_EXECUTABLE} ${LIFETIME_BENCHMARK_SCRIPT} --clang-binary ${LLVM_BINARY_DIR}/bin/clang --output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR} diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py index 9d5f36c51b9ee..cd5b30818a4a8 100644 --- a/clang/test/Analysis/LifetimeSafety/benchmark.py +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -99,28 +99,138 @@ def generate_cpp_merge_test(n: int) -> str: return cpp_code -def analyze_trace_file(trace_path: str) -> tuple[float, float]: +def generate_cpp_nested_loop_test(n: int) -> str: """ - Parses the -ftime-trace JSON output to find durations. + Generates C++ code with N levels of nested loops. + This pattern tests how analysis performance scales with loop nesting depth, + which is a key factor in the complexity of dataflow analyses on structured + control flow. - Returns: - A tuple of (lifetime_analysis_duration_us, total_clang_duration_us). + Example (n=3): + struct MyObj { int id; ~MyObj() {} }; + void nested_loops_3() { + MyObj* p = nullptr; + for(int i0=0; i0<2; ++i0) { + MyObj s0; + p = &s0; + for(int i1=0; i1<2; ++i1) { + MyObj s1; + p = &s1; + for(int i2=0; i2<2; ++i2) { + MyObj s2; + p = &s2; + } + } + } + } + """ + if n <= 0: + return "// Nesting depth must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void nested_loops_{n}() {{\n" + cpp_code += " MyObj* p = nullptr;\n" + + for i in range(n): + indent = " " * (i + 1) + cpp_code += f"{indent}for(int i{i}=0; i{i}<2; ++i{i}) {{\n" + cpp_code += f"{indent} MyObj s{i}; p = &s{i};\n" + + for i in range(n - 1, -1, -1): + indent = " " * (i + 1) + cpp_code += f"{indent}}}\n" + + cpp_code += "}\n" + cpp_code += f"\nint main() {{ nested_loops_{n}(); return 0; }}\n" + return cpp_code + + +def generate_cpp_switch_fan_out_test(n: int) -> str: + """ + Generates C++ code with a switch statement with N branches. + Each branch 'i' 'uses' (reads) a single, unique pointer 'pi'. + This pattern creates a "fan-in" join point for the backward + liveness analysis, stressing the LivenessMap::join operation + by forcing it to merge N disjoint, single-element sets of live origins. + The resulting complexity for LiveOrigins should be O(n log n) or higher. + + Example (n=3): + struct MyObj { int id; ~MyObj() {} }; + + void switch_fan_out_3(int condition) { + MyObj v1{1}; MyObj v2{1}; MyObj v3{1}; + MyObj* p1 = &v1; MyObj* p2 = &v2; MyObj* p3 = &v3; + + switch (condition % 3) { + case 0: + p1->id = 1; + break; + case 1: + p2->id = 1; + break; + case 2: + p3->id = 1; + break; + } + } + """ + if n <= 0: + return "// Number of variables must be positive." + + cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" + cpp_code += f"void switch_fan_out{n}(int condition) {{\n" + # Generate N distinct objects + for i in range(1, n + 1): + cpp_code += f" MyObj v{i}{{1}};\n" + cpp_code += "\n" + # Generate N distinct pointers, each as a separate variable + for i in range(1, n + 1): + cpp_code += f" MyObj* p{i} = &v{i};\n" + cpp_code += "\n" + + cpp_code += f" switch (condition % {n}) {{\n" + for case_num in range(n): + cpp_code += f" case {case_num}:\n" + cpp_code += f" p{case_num + 1}->id = 1;\n" + cpp_code += " break;\n" + + cpp_code += " }\n}\n" + cpp_code += f"\nint main() {{ switch_fan_out{n}(0); return 0; }}\n" + return cpp_code + + +def analyze_trace_file(trace_path: str) -> dict: """ - lifetime_duration = 0.0 - total_duration = 0.0 + Parses the -ftime-trace JSON output to find durations for the lifetime + analysis and its sub-phases. + Returns a dictionary of durations in microseconds. + """ + durations = { + "lifetime_us": 0.0, + "total_us": 0.0, + "fact_gen_us": 0.0, + "loan_prop_us": 0.0, + "live_origins_us": 0.0, + } + event_name_map = { + "LifetimeSafetyAnalysis": "lifetime_us", + "ExecuteCompiler": "total_us", + "FactGenerator": "fact_gen_us", + "LoanPropagation": "loan_prop_us", + "LiveOrigins": "live_origins_us", + } try: with open(trace_path, "r") as f: trace_data = json.load(f) for event in trace_data.get("traceEvents", []): - if event.get("name") == "LifetimeSafetyAnalysis": - lifetime_duration += float(event.get("dur", 0)) - if event.get("name") == "ExecuteCompiler": - total_duration += float(event.get("dur", 0)) - + event_name = event.get("name") + if event_name in event_name_map: + key = event_name_map[event_name] + durations[key] += float(event.get("dur", 0)) except (IOError, json.JSONDecodeError) as e: print(f"Error reading or parsing trace file {trace_path}: {e}", file=sys.stderr) - return 0.0, 0.0 - return lifetime_duration, total_duration + return {key: 0.0 for key in durations} + return durations def power_law(n, c, k): @@ -135,8 +245,29 @@ def human_readable_time(ms: float) -> str: return f"{ms:.2f} ms" +def calculate_complexity(n_data, y_data) -> tuple[float | None, float | None]: + """ + Calculates the exponent 'k' for the power law fit y = c * n^k. + Returns a tuple of (k, k_standard_error). + """ + try: + if len(n_data) < 3 or np.all(y_data < 1e-6) or np.var(y_data) < 1e-6: + return None, None + + non_zero_indices = y_data > 0 + if np.sum(non_zero_indices) < 3: + return None, None + + n_fit, y_fit = n_data[non_zero_indices], y_data[non_zero_indices] + popt, pcov = curve_fit(power_law, n_fit, y_fit, p0=[0, 1], maxfev=5000) + k_stderr = np.sqrt(np.diag(pcov))[1] + return popt[1], k_stderr + except (RuntimeError, ValueError): + return None, None + + def generate_markdown_report(results: dict) -> str: - """Generates a Markdown-formatted report from the benchmark results.""" + """Generates a concise, Markdown-formatted report from the benchmark results.""" report = [] timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z") report.append(f"# Lifetime Analysis Performance Report") @@ -146,54 +277,52 @@ def generate_markdown_report(results: dict) -> str: for test_name, data in results.items(): title = data["title"] report.append(f"## Test Case: {title}") - report.append("") + report.append("\n**Timing Results:**\n") # Table header - report.append("| N | Analysis Time | Total Clang Time |") - report.append("|:----|--------------:|-----------------:|") + report.append( + "| N (Input Size) | Total Time | Analysis Time (%) | Fact Generator (%) | Loan Propagation (%) | Live Origins (%) |" + ) + report.append( + "|:---------------|-----------:|------------------:|-------------------:|---------------------:|------------------:|" + ) # Table rows n_data = np.array(data["n"]) - analysis_data = np.array(data["lifetime_ms"]) - total_data = np.array(data["total_ms"]) + total_ms_data = np.array(data["total_ms"]) for i in range(len(n_data)): - analysis_str = human_readable_time(analysis_data[i]) - total_str = human_readable_time(total_data[i]) - report.append(f"| {n_data[i]:<3} | {analysis_str:>13} | {total_str:>16} |") - - report.append("") - - # Complexity analysis - report.append(f"**Complexity Analysis:**") - try: - # Curve fitting requires at least 3 points - if len(n_data) < 3: - raise ValueError("Not enough data points to perform curve fitting.") - - popt, pcov = curve_fit( - power_law, n_data, analysis_data, p0=[0, 2], maxfev=5000 - ) - _, k = popt - - # Confidence Interval for k - alpha = 0.05 # 95% confidence - dof = max(0, len(n_data) - len(popt)) # degrees of freedom - t_val = t.ppf(1.0 - alpha / 2.0, dof) - # Standard error of the parameters - perr = np.sqrt(np.diag(pcov)) - k_stderr = perr[1] - k_ci_lower = k - t_val * k_stderr - k_ci_upper = k + t_val * k_stderr - - report.append( - f"- The performance for this case scales approx. as **O(n{k:.2f})**." - ) - report.append( - f"- **95% Confidence interval for exponent:** `[{k_ci_lower:.2f}, {k_ci_upper:.2f}]`." - ) + total_t = total_ms_data[i] + if total_t < 1e-6: + total_t = 1.0 # Avoid division by zero + + row = [ + f"| {n_data[i]:<14} |", + f"{human_readable_time(total_t):>10} |", + f"{data['lifetime_ms'][i] / total_t * 100:>17.2f}% |", + f"{data['fact_gen_ms'][i] / total_t * 100:>18.2f}% |", + f"{data['loan_prop_ms'][i] / total_t * 100:>20.2f}% |", + f"{data['live_origins_ms'][i] / total_t * 100:>17.2f}% |", + ] + report.append(" ".join(row)) + + report.append("\n**Complexity Analysis:**\n") + report.append("| Analysis Phase | Complexity O(nk) | ") + report.append("|:------------------|:--------------------------|") + + analysis_phases = { + "Total Analysis": data["lifetime_ms"], + "FactGenerator": data["fact_gen_ms"], + "LoanPropagation": data["loan_prop_ms"], + "LiveOrigins": data["live_origins_ms"], + } - except (RuntimeError, ValueError) as e: - report.append(f"- Could not determine a best-fit curve for the data: {e}") + for phase_name, y_data in analysis_phases.items(): + k, delta = calculate_complexity(n_data, np.array(y_data)) + if k is not None and delta is not None: + complexity_str = f"O(n{k:.2f} ± {delta:.2f})" + else: + complexity_str = "(Negligible)" + report.append(f"| {phase_name:<17} | {complexity_str:<25} |") report.append("\n---\n") @@ -202,7 +331,7 @@ def generate_markdown_report(results: dict) -> str: def run_single_test( clang_binary: str, output_dir: str, test_name: str, generator_func, n: int -) -> tuple[float, float]: +) -> dict: """Generates, compiles, and benchmarks a single test case.""" print(f"--- Running Test: {test_name.capitalize()} with N={n} ---") @@ -221,21 +350,29 @@ def run_single_test( "-o", "/dev/null", "-ftime-trace=" + trace_file, - "-Wexperimental-lifetime-safety", + "-Xclang", + "-fexperimental-lifetime-safety", "-std=c++17", source_file, ] - result = subprocess.run(clang_command, capture_output=True, text=True) + try: + result = subprocess.run( + clang_command, capture_output=True, text=True, timeout=60 + ) + except subprocess.TimeoutExpired: + print(f"Compilation timed out for N={n}!", file=sys.stderr) + return {} if result.returncode != 0: print(f"Compilation failed for N={n}!", file=sys.stderr) print(result.stderr, file=sys.stderr) - return 0.0, 0.0 + return {} - lifetime_us, total_us = analyze_trace_file(trace_file) - - return lifetime_us / 1000.0, total_us / 1000.0 + durations_us = analyze_trace_file(trace_file) + return { + key.replace("_us", "_ms"): value / 1000.0 for key, value in durations_us.items() + } if __name__ == "__main__": @@ -257,18 +394,31 @@ def run_single_test( os.makedirs(args.output_dir, exist_ok=True) print(f"Benchmark files will be saved in: {os.path.abspath(args.output_dir)}\n") + # Maximize 'n' values while keeping execution time under 10s. test_configurations = [ { "name": "cycle", "title": "Pointer Cycle in Loop", "generator_func": generate_cpp_cycle_test, - "n_values": [10, 25, 50, 75, 100, 150], + "n_values": [50, 75, 100, 200, 300], }, { "name": "merge", "title": "CFG Merges", "generator_func": generate_cpp_merge_test, - "n_values": [10, 50, 100, 200, 400, 800], + "n_values": [400, 1000, 2000, 5000], + }, + { + "name": "nested_loops", + "title": "Deeply Nested Loops", + "generator_func": generate_cpp_nested_loop_test, + "n_values": [50, 100, 150, 200], + }, + { + "name": "switch_fan_out", + "title": "Switch Fan-out", + "generator_func": generate_cpp_switch_fan_out_test, + "n_values": [500, 1000, 2000, 4000], }, ] @@ -282,21 +432,28 @@ def run_single_test( "n": [], "lifetime_ms": [], "total_ms": [], + "fact_gen_ms": [], + "loan_prop_ms": [], + "live_origins_ms": [], } for n in config["n_values"]: - lifetime_ms, total_ms = run_single_test( + durations_ms = run_single_test( args.clang_binary, args.output_dir, test_name, config["generator_func"], n, ) - if total_ms > 0: + if durations_ms: results[test_name]["n"].append(n) - results[test_name]["lifetime_ms"].append(lifetime_ms) - results[test_name]["total_ms"].append(total_ms) + for key, value in durations_ms.items(): + results[test_name][key].append(value) + print( - f" Total: {human_readable_time(total_ms)} | Analysis: {human_readable_time(lifetime_ms)}" + f" Total Analysis: {human_readable_time(durations_ms['lifetime_ms'])} | " + f"FactGen: {human_readable_time(durations_ms['fact_gen_ms'])} | " + f"LoanProp: {human_readable_time(durations_ms['loan_prop_ms'])} | " + f"LiveOrigins: {human_readable_time(durations_ms['live_origins_ms'])}" ) print("\n\n" + "=" * 80) @@ -305,3 +462,8 @@ def run_single_test( markdown_report = generate_markdown_report(results) print(markdown_report) + + report_filename = os.path.join(args.output_dir, "performance_report.md") + with open(report_filename, "w") as f: + f.write(markdown_report) + print(f"Report saved to: {report_filename}") diff --git a/clang/test/Analysis/lifetime-cfg-output.cpp b/clang/test/Analysis/lifetime-cfg-output.cpp index 0a75c5bcc0bcc..36b36eddc440c 100644 --- a/clang/test/Analysis/lifetime-cfg-output.cpp +++ b/clang/test/Analysis/lifetime-cfg-output.cpp @@ -935,3 +935,31 @@ int backpatched_goto() { goto label; i++; } + +// CHECK: [B2 (ENTRY)] +// CHECK-NEXT: Succs (1): B1 +// CHECK: [B1] +// CHECK-NEXT: 1: a +// CHECK-NEXT: 2: [B1.1] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 3: b +// CHECK-NEXT: 4: [B1.3] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 5: [B1.2] + [B1.4] +// CHECK-NEXT: 6: c +// CHECK-NEXT: 7: [B1.6] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 8: [B1.5] + [B1.7] +// CHECK-NEXT: 9: int res = a + b + c; +// CHECK-NEXT: 10: res +// CHECK-NEXT: 11: [B1.10] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 12: return [B1.11]; +// CHECK-NEXT: 13: [B1.9] (Lifetime ends) +// CHECK-NEXT: 14: [Parm: c] (Lifetime ends) +// CHECK-NEXT: 15: [Parm: b] (Lifetime ends) +// CHECK-NEXT: 16: [Parm: a] (Lifetime ends) +// CHECK-NEXT: Preds (1): B2 +// CHECK-NEXT: Succs (1): B0 +// CHECK: [B0 (EXIT)] +// CHECK-NEXT: Preds (1): B1 +int test_param_scope_end_order(int a, int b, int c) { + int res = a + b + c; + return res; +} diff --git a/clang/test/Analysis/scopes-cfg-output.cpp b/clang/test/Analysis/scopes-cfg-output.cpp index 6ed6f3638f75b..9c75492c33a42 100644 --- a/clang/test/Analysis/scopes-cfg-output.cpp +++ b/clang/test/Analysis/scopes-cfg-output.cpp @@ -1437,12 +1437,14 @@ void test_cleanup_functions() { // CHECK-NEXT: 4: return; // CHECK-NEXT: 5: CleanupFunction (cleanup_int) // CHECK-NEXT: 6: CFGScopeEnd(i) +// CHECK-NEXT: 7: CFGScopeEnd(m) // CHECK-NEXT: Preds (1): B3 // CHECK-NEXT: Succs (1): B0 // CHECK: [B2] // CHECK-NEXT: 1: return; // CHECK-NEXT: 2: CleanupFunction (cleanup_int) // CHECK-NEXT: 3: CFGScopeEnd(i) +// CHECK-NEXT: 4: CFGScopeEnd(m) // CHECK-NEXT: Preds (1): B3 // CHECK-NEXT: Succs (1): B0 // CHECK: [B3] diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index a956386ae9332..b5ab9f91c9b84 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fexperimental-lifetime-safety -mllvm -debug-only=LifetimeFacts,LifetimeDataflow -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -mllvm -debug-only=LifetimeFacts -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s // REQUIRES: asserts struct MyObj { @@ -10,96 +10,104 @@ struct MyObj { // CHECK-LABEL: Function: return_local_addr MyObj* return_local_addr() { MyObj x {10}; - MyObj* p = &x; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]]) +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) + MyObj* p = &x; +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) return p; -// CHECK: AssignOrigin (DestID: [[O_RET_VAL:[0-9]+]], SrcID: [[O_P]]) -// CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) -// CHECK: Expire (LoanID: [[L_X]]) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: Expire ([[L_X]] (Path: x)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] -// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] // Pointer Assignment and Return // CHECK-LABEL: Function: assign_and_return_local_addr -// CHECK-NEXT: Block B{{[0-9]+}}: MyObj* assign_and_return_local_addr() { MyObj y{20}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) MyObj* ptr1 = &y; -// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]]) +// CHECK: OriginFlow (Dest: [[O_PTR1:[0-9]+]] (Decl: ptr1), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) MyObj* ptr2 = ptr1; -// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL:[0-9]+]], SrcID: [[O_PTR1]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL]]) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: OriginFlow (Dest: [[O_PTR1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: OriginFlow (Dest: [[O_PTR2:[0-9]+]] (Decl: ptr2), Src: [[O_PTR1_RVAL]] (Expr: ImplicitCastExpr)) ptr2 = ptr1; -// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]]) +// CHECK: Use ([[O_PTR1]] (Decl: ptr1), Read) +// CHECK: OriginFlow (Dest: [[O_PTR1_RVAL_2:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR1]] (Decl: ptr1)) +// CHECK: Use ({{[0-9]+}} (Decl: ptr2), Write) +// CHECK: OriginFlow (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR1_RVAL_2]] (Expr: ImplicitCastExpr)) ptr2 = ptr2; // Self assignment. -// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL:[0-9]+]], SrcID: [[O_PTR2]]) -// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL]]) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: OriginFlow (Dest: [[O_PTR2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Write) +// CHECK: OriginFlow (Dest: [[O_PTR2]] (Decl: ptr2), Src: [[O_PTR2_RVAL]] (Expr: ImplicitCastExpr)) return ptr2; -// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]]) -// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) -// CHECK: Expire (LoanID: [[L_Y]]) +// CHECK: Use ([[O_PTR2]] (Decl: ptr2), Read) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR2]] (Decl: ptr2)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]] -// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]] - // Return of Non-Pointer Type // CHECK-LABEL: Function: return_int_val -// CHECK-NEXT: Block B{{[0-9]+}}: int return_int_val() { int x = 10; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: {{[0-9]+}} (Expr: DeclRefExpr)) +// CHECK: Expire ([[L_X:[0-9]+]] (Path: x)) return x; } // CHECK-NEXT: End of Block -// CHECK: Dataflow results: -// CHECK: // Loan Expiration (Automatic Variable, C++) // CHECK-LABEL: Function: loan_expires_cpp -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_cpp() { MyObj obj{1}; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_OBJ:[0-9]+]] (Path: obj), ToOrigin: [[O_DRE_OBJ:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OBJ]] (Expr: DeclRefExpr)) MyObj* pObj = &obj; -// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) -// CHECK: Expire (LoanID: [[L_OBJ]]) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: pObj), Src: [[O_ADDR_OBJ]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_OBJ]] (Path: obj)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] -// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] -// FIXME: No expire for Trivial Destructors // CHECK-LABEL: Function: loan_expires_trivial -// CHECK-NEXT: Block B{{[0-9]+}}: void loan_expires_trivial() { int trivial_obj = 1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj), ToOrigin: [[O_DRE_TRIVIAL:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_TRIVIAL]] (Expr: DeclRefExpr)) int* pTrivialObj = &trivial_obj; -// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]]) -// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: pTrivialObj), Src: [[O_ADDR_TRIVIAL_OBJ]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_TRIVIAL_OBJ:[0-9]+]] (Path: trivial_obj)) // CHECK-NEXT: End of Block - // FIXME: Add check for Expire once trivial destructors are handled for expiration. } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] -// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] +// Trivial Destructors +// CHECK-LABEL: Function: return_int_pointer +int* return_int_pointer() { + int* ptr; +// CHECK: Block B{{[0-9]+}}: + int x = 1; +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) + ptr = &x; +// CHECK: Use ([[O_PTR:[0-9]+]] (Decl: ptr), Write) +// CHECK: OriginFlow (Dest: [[O_PTR]] (Decl: ptr), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: Use ([[O_PTR]] (Decl: ptr), Read) +// CHECK: OriginFlow (Dest: [[O_RET_VAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_PTR]] (Decl: ptr)) +// CHECK: Expire ([[L_X]] (Path: x)) +// CHECK: OriginEscapes ([[O_RET_VAL]] (Expr: ImplicitCastExpr)) + return ptr; +} +// CHECK-NEXT: End of Block // CHECK-LABEL: Function: conditional void conditional(bool condition) { @@ -108,24 +116,23 @@ void conditional(bool condition) { int* p = nullptr; if (condition) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) p = &a; - // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) else +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_B:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_B]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_B]] (Expr: UnaryOperator)) p = &b; - // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) +// CHECK: Block B{{[0-9]+}}: int *q = p; - // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) - // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: OriginFlow (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: OriginFlow (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]] -// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]] // CHECK-LABEL: Function: pointers_in_a_cycle @@ -138,83 +145,74 @@ void pointers_in_a_cycle(bool condition) { MyObj* p2 = &v2; MyObj* p3 = &v3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]]) -// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]]) -// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]]) +// CHECK: Issue ([[L_V1:[0-9]+]] (Path: v1), ToOrigin: [[O_DRE_V1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P1:[0-9]+]] (Decl: p1), Src: [[O_ADDR_V1]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V2:[0-9]+]] (Path: v2), ToOrigin: [[O_DRE_V2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_V2]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P2:[0-9]+]] (Decl: p2), Src: [[O_ADDR_V2]] (Expr: UnaryOperator)) +// CHECK: Issue ([[L_V3:[0-9]+]] (Path: v3), ToOrigin: [[O_DRE_V3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_V3:[0-g]+]] (Expr: UnaryOperator), Src: [[O_DRE_V3]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P3:[0-9]+]] (Decl: p3), Src: [[O_ADDR_V3]] (Expr: UnaryOperator)) while (condition) { +// CHECK: Block B{{[0-9]+}}: MyObj* temp = p1; +// CHECK: Use ([[O_P1]] (Decl: p1), Read) +// CHECK: OriginFlow (Dest: [[O_P1_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P1]] (Decl: p1)) +// CHECK: OriginFlow (Dest: [[O_TEMP:[0-9]+]] (Decl: temp), Src: [[O_P1_RVAL]] (Expr: ImplicitCastExpr)) p1 = p2; +// CHECK: Use ([[O_P2:[0-9]+]] (Decl: p2), Read) +// CHECK: OriginFlow (Dest: [[O_P2_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P2]] (Decl: p2)) +// CHECK: Use ({{[0-9]+}} (Decl: p1), Write) +// CHECK: OriginFlow (Dest: [[O_P1]] (Decl: p1), Src: [[O_P2_RVAL]] (Expr: ImplicitCastExpr)) p2 = p3; +// CHECK: Use ([[O_P3:[0-9]+]] (Decl: p3), Read) +// CHECK: OriginFlow (Dest: [[O_P3_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P3]] (Decl: p3)) +// CHECK: Use ({{[0-9]+}} (Decl: p2), Write) +// CHECK: OriginFlow (Dest: [[O_P2]] (Decl: p2), Src: [[O_P3_RVAL]] (Expr: ImplicitCastExpr)) p3 = temp; -// CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]]) -// CHECK: AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]]) -// CHECK: AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]]) -// CHECK: AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]]) -// CHECK: AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]]) -// CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]]) +// CHECK: Use ([[O_TEMP]] (Decl: temp), Read) +// CHECK: OriginFlow (Dest: [[O_TEMP_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_TEMP]] (Decl: temp)) +// CHECK: Use ({{[0-9]+}} (Decl: p3), Write) +// CHECK: OriginFlow (Dest: [[O_P3]] (Decl: p3), Src: [[O_TEMP_RVAL]] (Expr: ImplicitCastExpr)) } } -// At the end of the analysis, the origins for the pointers involved in the cycle -// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]] -// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]] -// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]] -// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]] - // CHECK-LABEL: Function: overwrite_origin void overwrite_origin() { MyObj s1; MyObj s2; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = &s2; -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } -// CHECK: Dataflow results: -// CHECK: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] - // CHECK-LABEL: Function: reassign_to_null void reassign_to_null() { MyObj s1; - MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + MyObj* p = &s1; +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) p = nullptr; -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_NULLPTR:[0-9]+]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: {{[0-9]+}} (Expr: CXXNullPtrLiteralExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. -// CHECK: Dataflow results: -// CHECK: Origin [[O_P]] contains no loans // CHECK-LABEL: Function: reassign_in_if @@ -223,23 +221,21 @@ void reassign_in_if(bool condition) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) if (condition) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] // CHECK-LABEL: Function: assign_in_switch @@ -249,60 +245,57 @@ void assign_in_switch(int mode) { MyObj s3; MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) switch (mode) { case 1: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s1; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]]) +// CHECK-DAG: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) break; case 2: +// CHECK-DAG: Block B{{[0-9]+}}: p = &s2; -// CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK-DAG: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK-DAG: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK-DAG: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK-DAG: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; default: - p = &s3; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]]) + p = &s3; +// CHECK: Issue ([[L_S3:[0-9]+]] (Path: s3), ToOrigin: [[O_DRE_S3:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S3:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S3]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S3]] (Expr: UnaryOperator)) break; } // CHECK: Block B{{[0-9]+}}: -// CHECK-DAG: Expire (LoanID: [[L_S3]]) -// CHECK-DAG: Expire (LoanID: [[L_S2]]) -// CHECK-DAG: Expire (LoanID: [[L_S1]]) +// CHECK-DAG: Expire ([[L_S3]] (Path: s3)) +// CHECK-DAG: Expire ([[L_S2]] (Path: s2)) +// CHECK-DAG: Expire ([[L_S1]] (Path: s1)) } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]] - // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { MyObj* p = nullptr; - // CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) - // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) +// CHECK: Block B{{[0-9]+}}: +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) while (condition) { MyObj inner; - p = &inner; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) -// CHECK: Expire (LoanID: [[L_INNER]]) + p = &inner; +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } } -// CHECK: Dataflow results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] - // CHECK-LABEL: Function: loop_with_break void loop_with_break(int count) { @@ -310,70 +303,149 @@ void loop_with_break(int count) { MyObj s2; MyObj* p = &s1; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) +// CHECK: Issue ([[L_S1:[0-9]+]] (Path: s1), ToOrigin: [[O_DRE_S1:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S1:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S1]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_S1]] (Expr: UnaryOperator)) for (int i = 0; i < count; ++i) { if (i == 5) { - p = &s2; // CHECK: Block B{{[0-9]+}}: -// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + p = &s2; +// CHECK: Issue ([[L_S2:[0-9]+]] (Path: s2), ToOrigin: [[O_DRE_S2:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_S2:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_S2]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_S2]] (Expr: UnaryOperator)) break; } } // CHECK: Block B{{[0-9]+}}: -// CHECK: Expire (LoanID: [[L_S2]]) -// CHECK: Expire (LoanID: [[L_S1]]) +// CHECK: Expire ([[L_S2]] (Path: s2)) +// CHECK: Expire ([[L_S1]] (Path: s1)) } -// CHECK-LABEL: Dataflow results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] -// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] -// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] - - // CHECK-LABEL: Function: nested_scopes void nested_scopes() { MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) +// CHECK: OriginFlow (Dest: [[O_NULLPTR_CAST:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_NULLPTR:[0-9]+]] (Expr: CXXNullPtrLiteralExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_NULLPTR_CAST]] (Expr: ImplicitCastExpr)) { MyObj outer; p = &outer; -// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], OriginID: [[O_ADDR_OUTER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_OUTER]]) +// CHECK: Issue ([[L_OUTER:[0-9]+]] (Path: outer), ToOrigin: [[O_DRE_OUTER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_OUTER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_OUTER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_OUTER]] (Expr: UnaryOperator)) { MyObj inner; p = &inner; -// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) +// CHECK: Issue ([[L_INNER:[0-9]+]] (Path: inner), ToOrigin: [[O_DRE_INNER:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_INNER:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_INNER]] (Expr: DeclRefExpr)) +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_INNER]] (Expr: UnaryOperator)) } -// CHECK: Expire (LoanID: [[L_INNER]]) +// CHECK: Expire ([[L_INNER]] (Path: inner)) } -// CHECK: Expire (LoanID: [[L_OUTER]]) +// CHECK: Expire ([[L_OUTER]] (Path: outer)) } -// CHECK-LABEL: Dataflow results: -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] -// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]] -// CHECK-NOT: Origin [[O_P]] contains Loan [[L_OUTER]] - - // CHECK-LABEL: Function: pointer_indirection void pointer_indirection() { int a; int *p = &a; -// CHECK: Block B1: -// CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_A:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_A]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_A]] (Expr: UnaryOperator)) int **pp = &p; -// CHECK: Issue (LoanID: [[L_P:[0-9]+]], OriginID: [[O_ADDR_P:[0-9]+]]) -// CHECK: AssignOrigin (DestID: [[O_PP:[0-9]+]], SrcID: [[O_ADDR_P]]) - -// FIXME: The Origin for the RHS is broken +// Note: No facts are generated for &p because the subexpression is a pointer type, +// which is not yet supported by the origin model. This is expected. int *q = *pp; -// CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: {{[0-9]+}}) +// CHECK: Use ([[O_PP:[0-9]+]] (Decl: pp), Read) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: q), Src: {{[0-9]+}} (Expr: ImplicitCastExpr)) +} + +// CHECK-LABEL: Function: ternary_operator +// FIXME: Propagate origins across ConditionalOperator. +void ternary_operator() { + int a, b; + int *p; + p = (a > b) ? &a : &b; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_A:[0-9]+]] (Path: a), ToOrigin: [[O_DRE_A:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_B:[0-9]+]] (Path: b), ToOrigin: [[O_DRE_B:[0-9]+]] (Expr: DeclRefExpr)) + +// CHECK: Block B{{[0-9]+}}: +// CHECK: Use ({{[0-9]+}} (Decl: p), Write) +// CHECK: OriginFlow (Dest: {{[0-9]+}} (Decl: p), Src: {{[0-9]+}} (Expr: ConditionalOperator)) +} + +// CHECK-LABEL: Function: test_use_facts +void usePointer(MyObj*); +void test_use_facts() { + MyObj x; + MyObj *p; +// CHECK: Block B{{[0-9]+}}: + p = &x; +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Use ([[O_P:[0-9]+]] (Decl: p), Write) +// CHECK: OriginFlow (Dest: [[O_P]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) + (void)*p; +// CHECK: Use ([[O_P]] (Decl: p), Read) + usePointer(p); +// CHECK: Use ([[O_P]] (Decl: p), Read) + p->id = 1; +// CHECK: Use ([[O_P]] (Decl: p), Read) + MyObj* q; + q = p; +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: Use ([[O_Q:[0-9]+]] (Decl: q), Write) + usePointer(q); +// CHECK: Use ([[O_Q]] (Decl: q), Read) + q->id = 2; +// CHECK: Use ([[O_Q]] (Decl: q), Read) +// CHECK: Expire ([[L_X]] (Path: x)) +} + +// CHECK-LABEL: Function: test_use_lifetimebound_call +MyObj* LifetimeBoundCall(MyObj* x [[clang::lifetimebound]], MyObj* y [[clang::lifetimebound]]); +void test_use_lifetimebound_call() { + MyObj x, y; + MyObj *p = &x; +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) + MyObj *q = &y; +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_Q:[0-9]+]] (Decl: q), Src: [[O_ADDR_Y]] (Expr: UnaryOperator)) + MyObj* r = LifetimeBoundCall(p, q); +// CHECK: Use ([[O_P]] (Decl: p), Read) +// CHECK: OriginFlow (Dest: [[O_P_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_P]] (Decl: p)) +// CHECK: Use ([[O_Q]] (Decl: q), Read) +// CHECK: OriginFlow (Dest: [[O_Q_RVAL:[0-9]+]] (Expr: ImplicitCastExpr), Src: [[O_Q]] (Decl: q)) +// CHECK: OriginFlow (Dest: [[O_CALL_EXPR:[0-9]+]] (Expr: CallExpr), Src: [[O_P_RVAL]] (Expr: ImplicitCastExpr)) +// CHECK: OriginFlow (Dest: [[O_CALL_EXPR]] (Expr: CallExpr), Src: [[O_Q_RVAL]] (Expr: ImplicitCastExpr), Merge) +// CHECK: OriginFlow (Dest: [[O_R:[0-9]+]] (Decl: r), Src: [[O_CALL_EXPR]] (Expr: CallExpr)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: Expire ([[L_X]] (Path: x)) +} +// CHECK-LABEL: Function: test_conditional_operator +void test_conditional_operator(bool cond) { + MyObj x, y; + MyObj *p = cond ? &x : &y; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_X:[0-9]+]] (Path: x), ToOrigin: [[O_DRE_X:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_X:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_X]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue ([[L_Y:[0-9]+]] (Path: y), ToOrigin: [[O_DRE_Y:[0-9]+]] (Expr: DeclRefExpr)) +// CHECK: OriginFlow (Dest: [[O_ADDR_Y:[0-9]+]] (Expr: UnaryOperator), Src: [[O_DRE_Y]] (Expr: DeclRefExpr)) +// CHECK: Block B{{[0-9]+}}: +// CHECK: OriginFlow (Dest: [[O_COND_OP:[0-9]+]] (Expr: ConditionalOperator), Src: [[O_ADDR_X]] (Expr: UnaryOperator)) +// CHECK: OriginFlow (Dest: [[O_COND_OP]] (Expr: ConditionalOperator), Src: [[O_ADDR_Y]] (Expr: UnaryOperator), Merge) +// CHECK: OriginFlow (Dest: [[O_P:[0-9]+]] (Decl: p), Src: [[O_COND_OP]] (Expr: ConditionalOperator)) +// CHECK: Expire ([[L_Y]] (Path: y)) +// CHECK: Expire ([[L_X]] (Path: x)) } diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp new file mode 100644 index 0000000000000..c0f675a301d14 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety-suggestions -verify %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor + MyObj operator+(MyObj); +}; + +struct [[gsl::Pointer()]] View { + View(const MyObj&); // Borrows from MyObj + View(); + void use() const; +}; + +View return_view_directly (View a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View conditional_return_view ( + View a, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + View b, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + bool c) { + View res; + if (c) + res = a; + else + res = b; + return res; // expected-note 2 {{param returned here}} +} + +// FIXME: Fails to generate lifetime suggestion for reference types as these are not handled currently. +MyObj& return_reference (MyObj& a, MyObj& b, bool c) { + if(c) { + return a; + } + return b; +} + +// FIXME: Fails to generate lifetime suggestion for reference types as these are not handled currently. +View return_view_from_reference (MyObj& p) { + return p; +} + +int* return_pointer_directly (int* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +MyObj* return_pointer_object (MyObj* a) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + return a; // expected-note {{param returned here}} +} + +View only_one_paramter_annotated (View a [[clang::lifetimebound]], + View b, // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + bool c) { + if(c) + return a; + return b; // expected-note {{param returned here}} +} + +View reassigned_to_another_parameter ( + View a, + View b) { // expected-warning {{param should be marked [[clang::lifetimebound]]}}. + a = b; + return a; // expected-note {{param returned here}} +} + +struct ReturnsSelf { + const ReturnsSelf& get() const { + return *this; + } +}; + +struct ViewProvider { + MyObj data; + View getView() const { + return data; + } +}; + +// FIXME: Fails to generate lifetime suggestions for the implicit 'this' parameter, as this feature is not yet implemented. +void test_get_on_temporary() { + const ReturnsSelf& s_ref = ReturnsSelf().get(); + (void)s_ref; +} + +// FIXME: Fails to generate lifetime suggestions for the implicit 'this' parameter, as this feature is not yet implemented. +void test_getView_on_temporary() { + View sv = ViewProvider{1}.getView(); + (void)sv; +} + +//===----------------------------------------------------------------------===// +// Negative Test Cases +//===----------------------------------------------------------------------===// + +View already_annotated(View a [[clang::lifetimebound]]) { + return a; +} + +MyObj return_obj_by_value(MyObj& p) { + return p; +} + +MyObj GlobalMyObj; +View Global = GlobalMyObj; +View Reassigned(View a) { + a = Global; + return a; +} diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp new file mode 100644 index 0000000000000..1191469e23df1 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -0,0 +1,945 @@ +// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety -Wexperimental-lifetime-safety -Wno-dangling -verify %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor + MyObj operator+(MyObj); +}; + +struct [[gsl::Pointer()]] View { + View(const MyObj&); // Borrows from MyObj + View(); + void use() const; +}; + +class TriviallyDestructedClass { + View a, b; +}; + +//===----------------------------------------------------------------------===// +// Basic Definite Use-After-Free (-W...permissive) +// These are cases where the pointer is guaranteed to be dangling at the use site. +//===----------------------------------------------------------------------===// + +void definite_simple_case() { + MyObj* p; + { + MyObj s; + p = &s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void definite_simple_case_gsl() { + View v; + { + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void no_use_no_error() { + MyObj* p; + { + MyObj s; + p = &s; + } + // 'p' is dangling here, but since it is never used, no warning is issued. +} + +void no_use_no_error_gsl() { + View v; + { + MyObj s; + v = s; + } + // 'v' is dangling here, but since it is never used, no warning is issued. +} + +void definite_pointer_chain() { + MyObj* p; + MyObj* q; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + q = p; + } // expected-note {{destroyed here}} + (void)*q; // expected-note {{later used here}} +} + +void definite_propagation_gsl() { + View v1, v2; + { + MyObj s; + v1 = s; // expected-warning {{object whose reference is captured does not live long enough}} + v2 = v1; + } // expected-note {{destroyed here}} + v2.use(); // expected-note {{later used here}} +} + +void definite_multiple_uses_one_warning() { + MyObj* p; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + // No second warning for the same loan. + p->id = 1; + MyObj* q = p; + (void)*q; +} + +void definite_multiple_pointers() { + MyObj *p, *q, *r; + { + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + q = &s; // expected-warning {{does not live long enough}} + r = &s; // expected-warning {{does not live long enough}} + } // expected-note 3 {{destroyed here}} + (void)*p; // expected-note {{later used here}} + (void)*q; // expected-note {{later used here}} + (void)*r; // expected-note {{later used here}} +} + +void definite_single_pointer_multiple_loans(bool cond) { + MyObj *p; + if (cond){ + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj t; + p = &t; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void definite_single_pointer_multiple_loans_gsl(bool cond) { + View v; + if (cond){ + MyObj s; + v = s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + else { + MyObj t; + v = t; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + +void definite_if_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void potential_if_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + if (!cond) + (void)*p; // expected-note {{later used here}} + else + p = &safe; +} + +void definite_if_branch_gsl(bool cond) { + MyObj safe; + View v = safe; + if (cond) { + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void definite_potential_together(bool cond) { + MyObj safe; + MyObj* p_maybe = &safe; + MyObj* p_definite = nullptr; + + { + MyObj s; + if (cond) + p_definite = &s; // expected-warning {{does not live long enough}} + if (cond) + p_maybe = &s; // expected-warning {{may not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p_definite; // expected-note {{later used here}} + if (!cond) + (void)*p_maybe; // expected-note {{later used here}} +} + +void definite_overrides_potential(bool cond) { + MyObj safe; + MyObj* p; + MyObj* q; + { + MyObj s; + q = &s; // expected-warning {{does not live long enough}} + p = q; + } // expected-note {{destroyed here}} + + if (cond) { + // 'q' is conditionally "rescued". 'p' is not. + q = &safe; + } + + // The use of 'p' is a definite error because it was never rescued. + (void)*q; + (void)*p; // expected-note {{later used here}} + (void)*q; +} + +void potential_due_to_conditional_killing(bool cond) { + MyObj safe; + MyObj* q; + { + MyObj s; + q = &s; // expected-warning {{may not live long enough}} + } // expected-note {{destroyed here}} + if (cond) { + // 'q' is conditionally "rescued". 'p' is not. + q = &safe; + } + (void)*q; // expected-note {{later used here}} +} + +void potential_for_loop_use_after_loop_body(MyObj safe) { + MyObj* p = &safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + p = &s; // expected-warning {{may not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void potential_for_loop_gsl() { + MyObj safe; + View v = safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + v = s; // expected-warning {{object whose reference is captured may not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void potential_for_loop_use_before_loop_body(MyObj safe) { + MyObj* p = &safe; + // Prefer the earlier use for diagnsotics. + for (int i = 0; i < 1; ++i) { + (void)*p; // expected-note {{later used here}} + MyObj s; + p = &s; // expected-warning {{does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; +} + +void definite_loop_with_break(bool cond) { + MyObj safe; + MyObj* p = &safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj temp; + p = &temp; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note {{later used here}} +} + +void definite_loop_with_break_gsl(bool cond) { + MyObj safe; + View v = safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj temp; + v = temp; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note {{later used here}} +} + +void potential_multiple_expiry_of_same_loan(bool cond) { + // Choose the last expiry location for the loan (e.g., through scope-ends and break statements). + MyObj safe; + MyObj* p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) { + p = &unsafe; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) { + p = &unsafe; // expected-warning {{does not live long enough}} + if (cond) + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + if (cond) { + MyObj unsafe2; + p = &unsafe2; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + + // TODO: This can be argued to be a "maybe" warning. This is because + // we only check for confidence of liveness and not the confidence of + // the loan contained in an origin. To deal with this, we can introduce + // a confidence in loan propagation analysis as well like liveness. + (void)*p; // expected-note {{later used here}} + + p = &safe; + for (int i = 0; i < 10; ++i) { + MyObj unsafe; + if (cond) + p = &unsafe; // expected-warning {{does not live long enough}} + if (cond) + break; // expected-note {{destroyed here}} + } + (void)*p; // expected-note {{later used here}} +} + +void potential_switch(int mode) { + MyObj safe; + MyObj* p = &safe; + switch (mode) { + case 1: { + MyObj temp; + p = &temp; // expected-warning {{object whose reference is captured may not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + p = &safe; // This path is okay. + break; + } + } + if (mode == 2) + (void)*p; // expected-note {{later used here}} +} + +void definite_switch(int mode) { + MyObj safe; + MyObj* p = &safe; + // A use domintates all the loan expires --> all definite error. + switch (mode) { + case 1: { + MyObj temp1; + p = &temp1; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + MyObj temp2; + p = &temp2; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + default: { + MyObj temp2; + p = &temp2; // expected-warning {{does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + (void)*p; // expected-note 3 {{later used here}} +} + +void definite_switch_gsl(int mode) { + View v; + switch (mode) { + case 1: { + MyObj temp1; + v = temp1; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + case 2: { + MyObj temp2; + v = temp2; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + default: { + MyObj temp3; + v = temp3; // expected-warning {{object whose reference is captured does not live long enough}} + break; // expected-note {{destroyed here}} + } + } + v.use(); // expected-note 3 {{later used here}} +} + +void loan_from_previous_iteration(MyObj safe, bool condition) { + MyObj* p = &safe; + MyObj* q = &safe; + + while (condition) { + MyObj x; + p = &x; // expected-warning {{may not live long enough}} + + if (condition) + q = p; + (void)*p; + (void)*q; // expected-note {{later used here}} + } // expected-note {{destroyed here}} +} + +void trivial_int_uaf() { + int * a; + { + int b = 1; + a = &b; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*a; // expected-note {{later used here}} +} + +void trivial_class_uaf() { + TriviallyDestructedClass* ptr; + { + TriviallyDestructedClass s; + ptr = &s; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)ptr; // expected-note {{later used here}} +} + +//===----------------------------------------------------------------------===// +// Basic Definite Use-After-Return (Return-Stack-Address) (-W...permissive) +// These are cases where the pointer is guaranteed to be dangling at the use site. +//===----------------------------------------------------------------------===// + +MyObj* simple_return_stack_address() { + MyObj s; + MyObj* p = &s; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} +} + +MyObj* direct_return() { + MyObj s; + return &s; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const MyObj* conditional_assign_unconditional_return(const MyObj& safe, bool c) { + MyObj s; + const MyObj* p = &safe; + if (c) { + p = &s; // expected-warning {{address of stack memory is returned later}} + } + return p; // expected-note {{returned here}} +} + +View conditional_assign_both_branches(const MyObj& safe, bool c) { + MyObj s; + View p; + if (c) { + p = s; // expected-warning {{address of stack memory is returned later}} + } + else { + p = safe; + } + return p; // expected-note {{returned here}} + +} + +View reassign_safe_to_local(const MyObj& safe) { + MyObj local; + View p = safe; + p = local; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} +} + +View pointer_chain_to_local() { + MyObj local; + View p1 = local; // expected-warning {{address of stack memory is returned later}} + View p2 = p1; + return p2; // expected-note {{returned here}} +} + +View multiple_assign_multiple_return(const MyObj& safe, bool c1, bool c2) { + MyObj local1; + MyObj local2; + View p; + if (c1) { + p = local1; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} + } + else if (c2) { + p = local2; // expected-warning {{address of stack memory is returned later}} + return p; // expected-note {{returned here}} + } + p = safe; + return p; +} + +View multiple_assign_single_return(const MyObj& safe, bool c1, bool c2) { + MyObj local1; + MyObj local2; + View p; + if (c1) { + p = local1; // expected-warning {{address of stack memory is returned later}} + } + else if (c2) { + p = local2; // expected-warning {{address of stack memory is returned later}} + } + else { + p = safe; + } + return p; // expected-note 2 {{returned here}} +} + +View direct_return_of_local() { + MyObj stack; + return stack; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +MyObj& reference_return_of_local() { + MyObj stack; + return stack; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +int* trivial_int_uar() { + int *a; + int b = 1; + a = &b; // expected-warning {{address of stack memory is returned later}} + return a; // expected-note {{returned here}} +} + +TriviallyDestructedClass* trivial_class_uar () { + TriviallyDestructedClass *ptr; + TriviallyDestructedClass s; + ptr = &s; // expected-warning {{address of stack memory is returned later}} + return ptr; // expected-note {{returned here}} +} + +const int& return_parameter(int a) { + return a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +int* return_pointer_to_parameter(int a) { + return &a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const int& return_reference_to_parameter(int a) +{ + const int &b = a; + return b; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const int& get_ref_to_local() { + int a = 42; + return a; // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +//===----------------------------------------------------------------------===// +// Use-After-Scope & Use-After-Return (Return-Stack-Address) Combined +// These are cases where the diagnostic kind is determined by location +//===----------------------------------------------------------------------===// + +MyObj* uaf_before_uar() { + MyObj* p; + { + MyObj local_obj; + p = &local_obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + return p; // expected-note {{later used here}} +} + +View uar_before_uaf(const MyObj& safe, bool c) { + View p; + { + MyObj local_obj; + p = local_obj; // expected-warning {{address of stack memory is returned later}} + if (c) { + return p; // expected-note {{returned here}} + } + } + p.use(); + p = safe; + return p; +} + +//===----------------------------------------------------------------------===// +// No-Error Cases +//===----------------------------------------------------------------------===// +void no_error_if_dangle_then_rescue() { + MyObj safe; + MyObj* p; + { + MyObj temp; + p = &temp; // p is temporarily dangling. + } + p = &safe; // p is "rescued" before use. + (void)*p; // This is safe. +} + +void no_error_if_dangle_then_rescue_gsl() { + MyObj safe; + View v; + { + MyObj temp; + v = temp; // 'v' is temporarily dangling. + } + v = safe; // 'v' is "rescued" before use by reassigning to a valid object. + v.use(); // This is safe. +} + +void no_error_loan_from_current_iteration(bool cond) { + // See https://github.com/llvm/llvm-project/issues/156959. + MyObj b; + while (cond) { + MyObj a; + View p = b; + if (cond) { + p = a; + } + (void)p; + } +} + +View safe_return(const MyObj& safe) { + MyObj local; + View p = local; + p = safe; // p has been reassigned + return p; // This is safe +} + +//===----------------------------------------------------------------------===// +// Lifetimebound Attribute Tests +//===----------------------------------------------------------------------===// + +View Identity(View v [[clang::lifetimebound]]); +const MyObj& IdentityRef(const MyObj& obj [[clang::lifetimebound]]); +MyObj* Identity(MyObj* v [[clang::lifetimebound]]); +View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); +MyObj* GetPointer(const MyObj& obj [[clang::lifetimebound]]); + +struct [[gsl::Pointer()]] LifetimeBoundView { + LifetimeBoundView(); + LifetimeBoundView(const MyObj& obj [[clang::lifetimebound]]); + LifetimeBoundView pass() [[clang::lifetimebound]] { return *this; } + operator View() const [[clang::lifetimebound]]; +}; + +void lifetimebound_simple_function() { + View v; + { + MyObj obj; + v = Identity(obj); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_multiple_args_definite() { + View v; + { + MyObj obj1, obj2; + v = Choose(true, + obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + +void lifetimebound_multiple_args_potential(bool cond) { + MyObj safe; + View v = safe; + { + MyObj obj1; + if (cond) { + MyObj obj2; + v = Choose(true, + obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + } // expected-note {{destroyed here}} + v.use(); // expected-note 2 {{later used here}} +} + +View SelectFirst(View a [[clang::lifetimebound]], View b); +void lifetimebound_mixed_args() { + View v; + { + MyObj obj1, obj2; + v = SelectFirst(obj1, // expected-warning {{object whose reference is captured does not live long enough}} + obj2); + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_member_function() { + LifetimeBoundView lbv, lbv2; + { + MyObj obj; + lbv = obj; // expected-warning {{object whose reference is captured does not live long enough}} + lbv2 = lbv.pass(); + } // expected-note {{destroyed here}} + View v = lbv2; // expected-note {{later used here}} + v.use(); +} + +void lifetimebound_conversion_operator() { + View v; + { + MyObj obj; + LifetimeBoundView lbv = obj; // expected-warning {{object whose reference is captured does not live long enough}} + v = lbv; // Conversion operator is lifetimebound + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_chained_calls() { + View v; + { + MyObj obj; + v = Identity(Identity(Identity(obj))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +void lifetimebound_with_pointers() { + MyObj* ptr; + { + MyObj obj; + ptr = GetPointer(obj); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*ptr; // expected-note {{later used here}} +} + +void lifetimebound_no_error_safe_usage() { + MyObj obj; + View v1 = Identity(obj); // No warning - obj lives long enough + View v2 = Choose(true, v1, Identity(obj)); // No warning - all args are safe + v2.use(); // Safe usage +} + +void lifetimebound_partial_safety(bool cond) { + MyObj safe_obj; + View v = safe_obj; + + if (cond) { + MyObj temp_obj; + v = Choose(true, + safe_obj, + temp_obj); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + v.use(); // expected-note {{later used here}} +} + +// FIXME: Warning should be on the 'GetObject' call, not the assignment to 'ptr'. +// The loan from the lifetimebound argument is not propagated to the call expression itself. +const MyObj& GetObject(View v [[clang::lifetimebound]]); +void lifetimebound_return_reference() { + View v; + const MyObj* ptr; + { + MyObj obj; + View temp_v = obj; + const MyObj& ref = GetObject(temp_v); + ptr = &ref; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*ptr; // expected-note {{later used here}} +} + +// FIXME: No warning for non gsl::Pointer types. Origin tracking is only supported for pointer types. +struct LifetimeBoundCtor { + LifetimeBoundCtor(); + LifetimeBoundCtor(const MyObj& obj [[clang::lifetimebound]]); +}; +void lifetimebound_ctor() { + LifetimeBoundCtor v; + { + MyObj obj; + v = obj; + } + (void)v; +} + +View lifetimebound_return_of_local() { + MyObj stack; + return Identity(stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +const MyObj& lifetimebound_return_ref_to_local() { + MyObj stack; + return IdentityRef(stack); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +View lifetimebound_return_by_value_param(MyObj stack_param) { + return Identity(stack_param); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +View lifetimebound_return_by_value_multiple_param(int cond, MyObj a, MyObj b, MyObj c) { + if (cond == 1) + return Identity(a); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + if (cond == 2) + return Identity(b); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + return Identity(c); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} + +template +View lifetimebound_return_by_value_param_template(T t) { + return Identity(t); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} +} +void use_lifetimebound_return_by_value_param_template() { + lifetimebound_return_by_value_param_template(MyObj{}); // expected-note {{in instantiation of}} +} + +void lambda_uar_param() { + auto lambda = [](MyObj stack_param) { + return Identity(stack_param); // expected-warning {{address of stack memory is returned later}} + // expected-note@-1 {{returned here}} + }; + lambda(MyObj{}); +} + +// FIXME: This should be detected. We see correct destructors but origin flow breaks somewhere. +namespace VariadicTemplatedParamsUAR{ + +template +View Max(Args... args [[clang::lifetimebound]]); + +template +View lifetimebound_return_of_variadic_param(Args... args) { + return Max(args...); +} +void test_variadic() { + lifetimebound_return_of_variadic_param(MyObj{1}, MyObj{2}, MyObj{3}); +} +} + +// FIXME: Fails to diagnose UAF when a reference to a by-value param escapes via an out-param. +void uaf_from_by_value_param_failing(MyObj param, View* out_p) { + *out_p = Identity(param); +} + +// Conditional operator. +void conditional_operator_one_unsafe_branch(bool cond) { + MyObj safe; + MyObj* p = &safe; + { + MyObj temp; + p = cond ? &temp // expected-warning {{object whose reference is captured may not live long enough}} + : &safe; + } // expected-note {{destroyed here}} + + // This is not a use-after-free for any value of `cond` but the analysis + // cannot reason this and marks the above as a false positive. This + // ensures safety regardless of cond's value. + if (cond) + p = &safe; + (void)*p; // expected-note {{later used here}} +} + +void conditional_operator_two_unsafe_branches(bool cond) { + MyObj* p; + { + MyObj a, b; + p = cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_nested(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = cond ? cond ? &a // expected-warning {{object whose reference is captured does not live long enough}}. + : &b // expected-warning {{object whose reference is captured does not live long enough}}. + : cond ? &c // expected-warning {{object whose reference is captured does not live long enough}}. + : &d; // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} + +void conditional_operator_lifetimebound(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested(bool cond) { + MyObj* p; + { + MyObj a, b; + p = Identity(cond ? Identity(&a) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(&b)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 2 {{destroyed here}} + (void)*p; // expected-note 2 {{later used here}} +} + +void conditional_operator_lifetimebound_nested_deep(bool cond) { + MyObj* p; + { + MyObj a, b, c, d; + p = Identity(cond ? Identity(cond ? &a // expected-warning {{object whose reference is captured does not live long enough}} + : &b) // expected-warning {{object whose reference is captured does not live long enough}} + : Identity(cond ? &c // expected-warning {{object whose reference is captured does not live long enough}} + : &d)); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} + +void parentheses(bool cond) { + MyObj* p; + { + MyObj a; + p = &((((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a; + p = ((GetPointer((a)))); // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} + + { + MyObj a, b, c, d; + p = &(cond ? (cond ? a // expected-warning {{object whose reference is captured does not live long enough}}. + : b) // expected-warning {{object whose reference is captured does not live long enough}}. + : (cond ? c // expected-warning {{object whose reference is captured does not live long enough}}. + : d)); // expected-warning {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} + + { + MyObj a, b, c, d; + p = ((cond ? (((cond ? &a : &b))) // expected-warning 2 {{object whose reference is captured does not live long enough}}. + : &(((cond ? c : d))))); // expected-warning 2 {{object whose reference is captured does not live long enough}}. + } // expected-note 4 {{destroyed here}} + (void)*p; // expected-note 4 {{later used here}} +} diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt index 059a74843155c..e0acf436b37c7 100644 --- a/clang/unittests/Analysis/CMakeLists.txt +++ b/clang/unittests/Analysis/CMakeLists.txt @@ -4,12 +4,14 @@ add_clang_unittest(ClangAnalysisTests CloneDetectionTest.cpp ExprMutationAnalyzerTest.cpp IntervalPartitionTest.cpp + LifetimeSafetyTest.cpp MacroExpansionContextTest.cpp UnsafeBufferUsageTest.cpp CLANG_LIBS clangAST clangASTMatchers clangAnalysis + clangAnalysisLifetimeSafety clangBasic clangFrontend clangLex diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp index 88630119ba8a1..609255437fe82 100644 --- a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp @@ -149,9 +149,18 @@ recordState(Elements=8, Branches=2, Joins=1) enterElement(return b ? p : q;) transfer() recordState(Elements=9, Branches=2, Joins=1) +enterElement([Parm: q] (Lifetime ends)) +transfer() +recordState(Elements=10, Branches=2, Joins=1) +enterElement([Parm: p] (Lifetime ends)) +transfer() +recordState(Elements=11, Branches=2, Joins=1) +enterElement([Parm: b] (Lifetime ends)) +transfer() +recordState(Elements=12, Branches=2, Joins=1) enterBlock(0, false) -recordState(Elements=9, Branches=2, Joins=1) +recordState(Elements=12, Branches=2, Joins=1) endAnalysis() )"); diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp new file mode 100644 index 0000000000000..fee4e79e27d03 --- /dev/null +++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp @@ -0,0 +1,1577 @@ +//===- LifetimeSafetyTest.cpp - Lifetime Safety Tests -*---------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Testing/TestAST.h" +#include "llvm/ADT/StringMap.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#include + +namespace clang::lifetimes::internal { +namespace { + +using namespace ast_matchers; +using ::testing::Not; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAreArray; + +// A helper class to run the full lifetime analysis on a piece of code +// and provide an interface for querying the results. +class LifetimeTestRunner { +public: + LifetimeTestRunner(llvm::StringRef Code) { + std::string FullCode = R"( + #define POINT(name) void("__lifetime_test_point_" #name) + + struct MyObj { ~MyObj() {} int i; }; + + struct [[gsl::Pointer()]] View { + View(const MyObj&); + View(); + }; + )"; + FullCode += Code.str(); + + Inputs = TestInputs(FullCode); + Inputs.Language = TestLanguage::Lang_CXX20; + AST = std::make_unique(Inputs); + ASTCtx = &AST->context(); + + // Find the target function using AST matchers. + auto MatchResult = + match(functionDecl(hasName("target")).bind("target"), *ASTCtx); + auto *FD = selectFirst("target", MatchResult); + if (!FD) { + ADD_FAILURE() << "Test case must have a function named 'target'"; + return; + } + AnalysisCtx = std::make_unique(nullptr, FD); + CFG::BuildOptions &BuildOptions = AnalysisCtx->getCFGBuildOptions(); + BuildOptions.setAllAlwaysAdd(); + BuildOptions.AddImplicitDtors = true; + BuildOptions.AddTemporaryDtors = true; + BuildOptions.AddLifetime = true; + + // Run the main analysis. + Analysis = std::make_unique(*AnalysisCtx, nullptr); + Analysis->run(); + + AnnotationToPointMap = Analysis->getFactManager().getTestPoints(); + } + + LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; } + ASTContext &getASTContext() { return *ASTCtx; } + AnalysisDeclContext &getAnalysisContext() { return *AnalysisCtx; } + + ProgramPoint getProgramPoint(llvm::StringRef Annotation) { + auto It = AnnotationToPointMap.find(Annotation); + if (It == AnnotationToPointMap.end()) { + ADD_FAILURE() << "Annotation '" << Annotation << "' not found."; + return nullptr; + } + return It->second; + } + +private: + TestInputs Inputs; + std::unique_ptr AST; + ASTContext *ASTCtx = nullptr; + std::unique_ptr AnalysisCtx; + std::unique_ptr Analysis; + llvm::StringMap AnnotationToPointMap; +}; + +// A convenience wrapper that uses the LifetimeSafetyAnalysis public API. +class LifetimeTestHelper { +public: + LifetimeTestHelper(LifetimeTestRunner &Runner) + : Runner(Runner), Analysis(Runner.getAnalysis()) {} + + std::optional getOriginForDecl(llvm::StringRef VarName) { + auto *VD = findDecl(VarName); + if (!VD) + return std::nullopt; + // This assumes the OriginManager's `get` can find an existing origin. + // We might need a `find` method on OriginManager to avoid `getOrCreate` + // logic in a const-query context if that becomes an issue. + return const_cast(Analysis.getFactManager().getOriginMgr()) + .get(*VD); + } + + std::vector getLoansForVar(llvm::StringRef VarName) { + auto *VD = findDecl(VarName); + if (!VD) { + ADD_FAILURE() << "Failed to find VarDecl for '" << VarName << "'"; + return {}; + } + std::vector LID; + for (const Loan *L : Analysis.getFactManager().getLoanMgr().getLoans()) + if (const auto *BL = dyn_cast(L)) + if (BL->getAccessPath().D == VD) + LID.push_back(L->getID()); + if (LID.empty()) { + ADD_FAILURE() << "Loan for '" << VarName << "' not found."; + return {}; + } + return LID; + } + + // Gets the set of loans that are live at the given program point. A loan is + // considered live at point P if there is a live origin which contains this + // loan. + std::optional getLiveLoansAtPoint(ProgramPoint P) const { + const auto &LiveOriginsAnalysis = Runner.getAnalysis().getLiveOrigins(); + const auto &LoanPropagation = Runner.getAnalysis().getLoanPropagation(); + + LivenessMap LiveOriginsMap = LiveOriginsAnalysis.getLiveOriginsAt(P); + + LoanSet::Factory F; + LoanSet Result = F.getEmptySet(); + + for (const auto &[OID, LI] : LiveOriginsMap) { + LoanSet Loans = LoanPropagation.getLoans(OID, P); + Result = clang::lifetimes::internal::utils::join(Result, Loans, F); + } + + if (Result.isEmpty()) + return std::nullopt; + + return Result; + } + + const ExpireFact * + getExpireFactFromAllFacts(const llvm::ArrayRef &FactsInBlock, + const LoanID &loanID) { + for (const Fact *F : FactsInBlock) { + if (auto const *CurrentEF = F->getAs()) + if (CurrentEF->getLoanID() == loanID) + return CurrentEF; + } + return nullptr; + } + + std::optional getLoansAtPoint(OriginID OID, + llvm::StringRef Annotation) { + ProgramPoint PP = Runner.getProgramPoint(Annotation); + if (!PP) + return std::nullopt; + return Analysis.getLoanPropagation().getLoans(OID, PP); + } + + std::optional>> + getLiveOriginsAtPoint(llvm::StringRef Annotation) { + ProgramPoint PP = Runner.getProgramPoint(Annotation); + if (!PP) + return std::nullopt; + std::vector> Result; + for (auto &[OID, Info] : Analysis.getLiveOrigins().getLiveOriginsAt(PP)) + Result.push_back({OID, Info.Kind}); + return Result; + } + + ProgramPoint getProgramPoint(llvm::StringRef Annotation) { + return Runner.getProgramPoint(Annotation); + } + + llvm::ArrayRef getBlockContaining(ProgramPoint P) { + return Runner.getAnalysis().getFactManager().getBlockContaining(P); + } + +private: + template DeclT *findDecl(llvm::StringRef Name) { + auto &Ctx = Runner.getASTContext(); + const auto *TargetFunc = Runner.getAnalysisContext().getDecl(); + auto Results = + match(valueDecl(hasName(Name), + hasAncestor(functionDecl(equalsNode(TargetFunc)))) + .bind("v"), + Ctx); + if (Results.empty()) { + ADD_FAILURE() << "Declaration '" << Name << "' not found in AST."; + return nullptr; + } + if (Results.size() > 1) { + ADD_FAILURE() << "Multiple declarations found for '" << Name << "'"; + return nullptr; + } + return const_cast(selectFirst("v", Results)); + } + + LifetimeTestRunner &Runner; + LifetimeSafetyAnalysis &Analysis; +}; + +// ========================================================================= // +// GTest Matchers & Fixture +// ========================================================================= // + +// A helper class to represent a set of loans, identified by variable names. +class LoanSetInfo { +public: + LoanSetInfo(const std::vector &Vars, LifetimeTestHelper &H) + : LoanVars(Vars), Helper(H) {} + std::vector LoanVars; + LifetimeTestHelper &Helper; +}; + +// It holds the name of the origin variable and a reference to the helper. +class OriginInfo { +public: + OriginInfo(llvm::StringRef OriginVar, LifetimeTestHelper &Helper) + : OriginVar(OriginVar), Helper(Helper) {} + llvm::StringRef OriginVar; + LifetimeTestHelper &Helper; +}; + +// A helper class to represent a set of origins, identified by variable names. +class OriginsInfo { +public: + OriginsInfo(const std::vector &Vars, LifetimeTestHelper &H) + : OriginVars(Vars), Helper(H) {} + std::vector OriginVars; + LifetimeTestHelper &Helper; +}; + +/// Matcher to verify the set of loans held by an origin at a specific +/// program point. +/// +/// This matcher is intended to be used with an \c OriginInfo object. +/// +/// \param LoanVars A vector of strings, where each string is the name of a +/// variable expected to be the source of a loan. +/// \param Annotation A string identifying the program point (created with +/// POINT()) where the check should be performed. +MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") { + const OriginInfo &Info = arg; + std::optional OIDOpt = Info.Helper.getOriginForDecl(Info.OriginVar); + if (!OIDOpt) { + *result_listener << "could not find origin for '" << Info.OriginVar.str() + << "'"; + return false; + } + + std::optional ActualLoansSetOpt = + Info.Helper.getLoansAtPoint(*OIDOpt, Annotation); + if (!ActualLoansSetOpt) { + *result_listener << "could not get a valid loan set at point '" + << Annotation << "'"; + return false; + } + std::vector ActualLoans(ActualLoansSetOpt->begin(), + ActualLoansSetOpt->end()); + + std::vector ExpectedLoans; + for (const auto &LoanVar : LoanVars) { + std::vector ExpectedLIDs = Info.Helper.getLoansForVar(LoanVar); + if (ExpectedLIDs.empty()) { + *result_listener << "could not find loan for var '" << LoanVar << "'"; + return false; + } + ExpectedLoans.insert(ExpectedLoans.end(), ExpectedLIDs.begin(), + ExpectedLIDs.end()); + } + std::sort(ExpectedLoans.begin(), ExpectedLoans.end()); + std::sort(ActualLoans.begin(), ActualLoans.end()); + if (ExpectedLoans != ActualLoans) { + *result_listener << "Expected: {"; + for (const auto &LoanID : ExpectedLoans) { + *result_listener << LoanID.Value << ", "; + } + *result_listener << "} Actual: {"; + for (const auto &LoanID : ActualLoans) { + *result_listener << LoanID.Value << ", "; + } + *result_listener << "}"; + return false; + } + + return ExplainMatchResult(UnorderedElementsAreArray(ExpectedLoans), + ActualLoans, result_listener); +} + +enum class LivenessKindFilter { Maybe, Must, All }; + +/// Matcher to verify the complete set of live origins at a program point. +MATCHER_P2(AreLiveAtImpl, Annotation, ConfFilter, "") { + const OriginsInfo &Info = arg; + auto &Helper = Info.Helper; + auto ActualLiveSetOpt = Helper.getLiveOriginsAtPoint(Annotation); + if (!ActualLiveSetOpt) { + *result_listener << "could not get a valid live origin set at point '" + << Annotation << "'"; + return false; + } + std::vector ActualLiveOrigins; + for (const auto [OID, ActualConfidence] : ActualLiveSetOpt.value()) { + if (ConfFilter == LivenessKindFilter::All) + ActualLiveOrigins.push_back(OID); + if (ActualConfidence == LivenessKind::Maybe && + ConfFilter == LivenessKindFilter::Maybe) + ActualLiveOrigins.push_back(OID); + if (ActualConfidence == LivenessKind::Must && + ConfFilter == LivenessKindFilter::Must) + ActualLiveOrigins.push_back(OID); + } + + std::vector ExpectedLiveOrigins; + for (const auto &VarName : Info.OriginVars) { + auto OriginIDOpt = Helper.getOriginForDecl(VarName); + if (!OriginIDOpt) { + *result_listener << "could not find an origin for variable '" << VarName + << "'"; + return false; + } + ExpectedLiveOrigins.push_back(*OriginIDOpt); + } + std::sort(ExpectedLiveOrigins.begin(), ExpectedLiveOrigins.end()); + std::sort(ActualLiveOrigins.begin(), ActualLiveOrigins.end()); + if (ExpectedLiveOrigins != ActualLiveOrigins) { + *result_listener << "Expected: {"; + for (const auto &OriginID : ExpectedLiveOrigins) { + *result_listener << OriginID.Value << ", "; + } + *result_listener << "} Actual: {"; + for (const auto &OriginID : ActualLiveOrigins) { + *result_listener << OriginID.Value << ", "; + } + *result_listener << "}"; + return false; + } + return true; +} + +MATCHER_P2(HasLiveLoanAtExpiryImpl, HelperPtr, Annotation, "") { + llvm::StringRef VarName = arg; + LifetimeTestHelper &Helper = *HelperPtr; + + std::vector Loans = Helper.getLoansForVar(VarName); + if (Loans.empty()) { + *result_listener << "No loans found for variable" << VarName.str(); + return false; + } + + ProgramPoint PP = Helper.getProgramPoint(Annotation); + llvm::ArrayRef AllFactsInBlock = Helper.getBlockContaining(PP); + + bool NoExpireFactLive = false; + for (const LoanID CurrentLoanID : Loans) { + const ExpireFact *EF = + Helper.getExpireFactFromAllFacts(AllFactsInBlock, CurrentLoanID); + if (!EF) { + NoExpireFactLive = true; + continue; + } + std::optional LiveLoans = Helper.getLiveLoansAtPoint(EF); + if (!LiveLoans.has_value()) { + *result_listener << "No Live Loans At Expiry Location."; + continue; + } + if (LiveLoans->contains({CurrentLoanID})) + return true; + } + if (NoExpireFactLive) { + *result_listener << "No Expire Fact for loan of " << VarName.str(); + return false; + } + *result_listener << "No loans of " << VarName.str() << " are live"; + return false; +} + +MATCHER_P(MustBeLiveAt, Annotation, "") { + return ExplainMatchResult(AreLiveAtImpl(Annotation, LivenessKindFilter::Must), + arg, result_listener); +} + +MATCHER_P(MaybeLiveAt, Annotation, "") { + return ExplainMatchResult( + AreLiveAtImpl(Annotation, LivenessKindFilter::Maybe), arg, + result_listener); +} + +MATCHER_P(AreLiveAt, Annotation, "") { + return ExplainMatchResult(AreLiveAtImpl(Annotation, LivenessKindFilter::All), + arg, result_listener); +} + +// Base test fixture to manage the runner and helper. +class LifetimeAnalysisTest : public ::testing::Test { +protected: + void SetupTest(llvm::StringRef Code) { + Runner = std::make_unique(Code); + Helper = std::make_unique(*Runner); + } + + OriginInfo Origin(llvm::StringRef OriginVar) { + return OriginInfo(OriginVar, *Helper); + } + + /// Factory function that hides the std::vector creation. + OriginsInfo Origins(std::initializer_list OriginVars) { + return OriginsInfo({OriginVars}, *Helper); + } + + OriginsInfo NoOrigins() { return Origins({}); } + + /// Factory function that hides the std::vector creation. + LoanSetInfo LoansTo(std::initializer_list LoanVars) { + return LoanSetInfo({LoanVars}, *Helper); + } + + /// A convenience helper for asserting that no loans are expired. + LoanSetInfo NoLoans() { return LoansTo({}); } + + // Factory function that hides the std::vector creation. + auto HasLoansTo(std::initializer_list LoanVars, + const char *Annotation) { + return HasLoansToImpl(std::vector(LoanVars), Annotation); + } + + auto HasLiveLoanAtExpiry(const char *Annotation) { + return HasLiveLoanAtExpiryImpl(Helper.get(), Annotation); + } + + std::unique_ptr Runner; + std::unique_ptr Helper; +}; + +// ========================================================================= // +// TESTS +// ========================================================================= // + +TEST_F(LifetimeAnalysisTest, SimpleLoanAndOrigin) { + SetupTest(R"( + void target() { + int x; + int* p = &x; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"x"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, OverwriteOrigin) { + SetupTest(R"( + void target() { + MyObj s1, s2; + + MyObj* p = &s1; + POINT(after_s1); + + p = &s2; + POINT(after_s2); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_s1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_s2")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalLoan) { + SetupTest(R"( + void target(bool cond) { + int a, b; + int *p = nullptr; + if (cond) { + p = &a; + POINT(after_then); + } else { + p = &b; + POINT(after_else); + } + POINT(after_if); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "after_then")); + EXPECT_THAT(Origin("p"), HasLoansTo({"b"}, "after_else")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a", "b"}, "after_if")); +} + +TEST_F(LifetimeAnalysisTest, PointerChain) { + SetupTest(R"( + void target() { + MyObj y; + MyObj* ptr1 = &y; + POINT(p1); + + MyObj* ptr2 = ptr1; + POINT(p2); + + ptr2 = ptr1; + POINT(p3); + + ptr2 = ptr2; // Self assignment + POINT(p4); + } + )"); + EXPECT_THAT(Origin("ptr1"), HasLoansTo({"y"}, "p1")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p2")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p3")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p4")); +} + +TEST_F(LifetimeAnalysisTest, ReassignToNull) { + SetupTest(R"( + void target() { + MyObj s1; + MyObj* p = &s1; + POINT(before_null); + p = nullptr; + POINT(after_null); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_null")); + EXPECT_THAT(Origin("p"), HasLoansTo({}, "after_null")); +} + +TEST_F(LifetimeAnalysisTest, ReassignInIf) { + SetupTest(R"( + void target(bool condition) { + MyObj s1, s2; + MyObj* p = &s1; + POINT(before_if); + if (condition) { + p = &s2; + POINT(after_reassign); + } + POINT(after_if); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_if")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_reassign")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_if")); +} + +TEST_F(LifetimeAnalysisTest, AssignInSwitch) { + SetupTest(R"( + void target(int mode) { + MyObj s1, s2, s3; + MyObj* p = nullptr; + switch (mode) { + case 1: + p = &s1; + POINT(case1); + break; + case 2: + p = &s2; + POINT(case2); + break; + default: + p = &s3; + POINT(case3); + break; + } + POINT(after_switch); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "case1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "case2")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s3"}, "case3")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2", "s3"}, "after_switch")); +} + +TEST_F(LifetimeAnalysisTest, LoopWithBreak) { + SetupTest(R"( + void target(int count) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; + POINT(before_loop); + for (int i = 0; i < count; ++i) { + if (i == 5) { + p = &s2; + POINT(inside_if); + break; + } + POINT(after_if); + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "inside_if")); + // At the join point after if, s2 cannot make it to p without the if. + EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_if")); + // At the join point after the loop, p could hold a loan to s1 (if the loop + // completed normally) or to s2 (if the loop was broken). + EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, PointersInACycle) { + SetupTest(R"( + void target(bool condition) { + MyObj v1, v2, v3; + MyObj *p1 = &v1, *p2 = &v2, *p3 = &v3; + + POINT(before_while); + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = temp; + POINT(in_loop); + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while")); + EXPECT_THAT(Origin("p3"), HasLoansTo({"v3"}, "before_while")); + + // At the fixed point after the loop, all pointers could point to any of + // the three variables. + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + EXPECT_THAT(Origin("p3"), HasLoansTo({"v1", "v2", "v3"}, "after_loop")); + + EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "in_loop")); + // 'temp' is a block-local origin and it's loans are not tracked outside the + // block. + EXPECT_THAT(Origin("temp"), HasLoansTo({}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) { + SetupTest(R"( + void target(bool condition) { + MyObj v1, v2; + MyObj *p1 = &v1, *p2 = &v2; + + POINT(before_while); + while (condition) { + POINT(in_loop_before_temp); + MyObj temp; + p1 = &temp; + POINT(in_loop_after_temp); + + MyObj* q = p1; + p1 = p2; + p2 = q; + } + POINT(after_loop); + } + )"); + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while")); + + EXPECT_THAT(Origin("p1"), + HasLoansTo({"v1", "v2", "temp"}, "in_loop_before_temp")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_before_temp")); + + EXPECT_THAT(Origin("p1"), HasLoansTo({"temp"}, "in_loop_after_temp")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_after_temp")); + + EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "temp"}, "after_loop")); + EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "after_loop")); +} + +TEST_F(LifetimeAnalysisTest, InfiniteLoopPrunesEdges) { + SetupTest(R"( + void target(MyObj out) { + MyObj *p = &out; + POINT(before_loop); + + for (;;) { + POINT(begin); + MyObj in; + p = ∈ + POINT(end); + } + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"out"}, "before_loop")); + EXPECT_THAT(Origin("p"), HasLoansTo({"in", "out"}, "begin")); + EXPECT_THAT(Origin("p"), HasLoansTo({"in"}, "end")); +} + +TEST_F(LifetimeAnalysisTest, NestedScopes) { + SetupTest(R"( + void target() { + MyObj* p = nullptr; + { + MyObj outer; + p = &outer; + POINT(before_inner_scope); + { + MyObj inner; + p = &inner; + POINT(inside_inner_scope); + } // inner expires + POINT(after_inner_scope); + } // outer expires + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"outer"}, "before_inner_scope")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "inside_inner_scope")); + EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_inner_scope")); +} + +TEST_F(LifetimeAnalysisTest, NoDuplicateLoansForImplicitCastToConst) { + SetupTest(R"( + void target() { + MyObj a; + const MyObj* p = &a; + const MyObj* q = &a; + POINT(at_end); + } + )"); + EXPECT_THAT(Helper->getLoansForVar("a"), SizeIs(2)); +} + +TEST_F(LifetimeAnalysisTest, GslPointerSimpleLoan) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromOwner) { + SetupTest(R"( + void target() { + MyObj al, bl, cl, dl, el, fl; + View a = View(al); + View b = View{bl}; + View c = View(View(View(cl))); + View d = View{View(View(dl))}; + View e = View{View{View{el}}}; + View f = {fl}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("a"), HasLoansTo({"al"}, "p1")); + EXPECT_THAT(Origin("b"), HasLoansTo({"bl"}, "p1")); + EXPECT_THAT(Origin("c"), HasLoansTo({"cl"}, "p1")); + EXPECT_THAT(Origin("d"), HasLoansTo({"dl"}, "p1")); + EXPECT_THAT(Origin("e"), HasLoansTo({"el"}, "p1")); + EXPECT_THAT(Origin("f"), HasLoansTo({"fl"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConstructFromView) { + SetupTest(R"( + void target() { + MyObj a; + View x = View(a); + View y = View{x}; + View z = View(View(View(y))); + View p = View{View(View(x))}; + View q = {x}; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("q"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerInConditionalOperator) { + SetupTest(R"( + void target(bool cond) { + MyObj a, b; + View v = cond ? a : b; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({"a", "b"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, ExtraParenthesis) { + SetupTest(R"( + void target() { + MyObj a; + View x = ((View((((a)))))); + View y = ((View{(((x)))})); + View z = ((View(((y))))); + View p = ((View{((x))})); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "p1")); +} + +// FIXME: Handle temporaries. +TEST_F(LifetimeAnalysisTest, ViewFromTemporary) { + SetupTest(R"( + MyObj temporary(); + void target() { + View v = temporary(); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerWithConstAndAuto) { + SetupTest(R"( + void target() { + MyObj a; + const View v1 = a; + auto v2 = v1; + const auto& v3 = v2; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerPropagation) { + SetupTest(R"( + void target() { + MyObj a; + View x = a; + POINT(p1); + + View y = x; // Propagation via copy-construction + POINT(p2); + + View z; + z = x; // Propagation via copy-assignment + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("x"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("z"), HasLoansTo({"a"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerReassignment) { + SetupTest(R"( + void target() { + MyObj safe; + View v; + v = safe; + POINT(p1); + { + MyObj unsafe; + v = unsafe; + POINT(p2); + } // `unsafe` expires here. + POINT(p3); + } + )"); + + EXPECT_THAT(Origin("v"), HasLoansTo({"safe"}, "p1")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p2")); + EXPECT_THAT(Origin("v"), HasLoansTo({"unsafe"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, GslPointerConversionOperator) { + SetupTest(R"( + struct String; + + struct [[gsl::Pointer()]] StringView { + StringView() = default; + }; + + struct String { + ~String() {} + operator StringView() const; + }; + + void target() { + String xl, yl; + StringView x = xl; + StringView y; + y = yl; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("x"), HasLoansTo({"xl"}, "p1")); + EXPECT_THAT(Origin("y"), HasLoansTo({"yl"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundSimple) { + SetupTest(R"( + View Identity(View v [[clang::lifetimebound]]); + void target() { + MyObj a, b; + View v1 = a; + POINT(p1); + + View v2 = Identity(v1); + View v3 = Identity(b); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + // The origin of v2 should now contain the loan to 'o' from v1. + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"b"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMemberFunction) { + SetupTest(R"( + struct [[gsl::Pointer()]] MyView { + MyView(const MyObj& o) {} + MyView pass() [[clang::lifetimebound]] { return *this; } + }; + void target() { + MyObj o; + MyView v1 = o; + POINT(p1); + MyView v2 = v1.pass(); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o"}, "p1")); + // The call v1.pass() is bound to 'v1'. The origin of v2 should get the loans + // from v1. + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMultipleArgs) { + SetupTest(R"( + View Choose(bool cond, View a [[clang::lifetimebound]], View b [[clang::lifetimebound]]); + void target() { + MyObj o1, o2; + View v1 = o1; + View v2 = o2; + POINT(p1); + + View v3 = Choose(true, v1, v2); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o1"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o2"}, "p2")); + // v3 should have loans from both v1 and v2, demonstrating the union of + // loans. + EXPECT_THAT(Origin("v3"), HasLoansTo({"o1", "o2"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundMixedArgs) { + SetupTest(R"( + View Choose(bool cond, View a [[clang::lifetimebound]], View b); + void target() { + MyObj o1, o2; + View v1 = o1; + View v2 = o2; + POINT(p1); + + View v3 = Choose(true, v1, v2); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o1"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o2"}, "p1")); + // v3 should only have loans from v1, as v2 is not lifetimebound. + EXPECT_THAT(Origin("v3"), HasLoansTo({"o1"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundChainOfViews) { + SetupTest(R"( + View Identity(View v [[clang::lifetimebound]]); + View DoubleIdentity(View v [[clang::lifetimebound]]); + + void target() { + MyObj obj; + View v1 = obj; + POINT(p1); + View v2 = DoubleIdentity(Identity(v1)); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"obj"}, "p1")); + // v2 should inherit the loan from v1 through the chain of calls. + EXPECT_THAT(Origin("v2"), HasLoansTo({"obj"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundRawPointerParameter) { + SetupTest(R"( + View ViewFromPtr(const MyObj* p [[clang::lifetimebound]]); + MyObj* PtrFromPtr(const MyObj* p [[clang::lifetimebound]]); + MyObj* PtrFromView(View v [[clang::lifetimebound]]); + + void target() { + MyObj a; + View v = ViewFromPtr(&a); + POINT(p1); + + MyObj b; + MyObj* ptr1 = PtrFromPtr(&b); + MyObj* ptr2 = PtrFromPtr(PtrFromPtr(PtrFromPtr(ptr1))); + POINT(p2); + + MyObj c; + View v2 = ViewFromPtr(PtrFromView(c)); + POINT(p3); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("ptr1"), HasLoansTo({"b"}, "p2")); + EXPECT_THAT(Origin("ptr2"), HasLoansTo({"b"}, "p2")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"c"}, "p3")); +} + +// FIXME: This can be controversial and may be revisited in the future. +TEST_F(LifetimeAnalysisTest, LifetimeboundConstRefViewParameter) { + SetupTest(R"( + View Identity(const View& v [[clang::lifetimebound]]); + void target() { + MyObj o; + View v1 = o; + View v2 = Identity(v1); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundConstRefObjParam) { + SetupTest(R"( + View Identity(const MyObj& o [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = Identity(a); + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundReturnReference) { + SetupTest(R"( + const MyObj& Identity(View v [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = a; + POINT(p1); + + View v2 = Identity(v1); + + const MyObj& b = Identity(v1); + View v3 = Identity(b); + POINT(p2); + + MyObj c; + View v4 = Identity(c); + POINT(p3); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + + // FIXME: Handle reference types. 'v3' should have loan to 'a' instead of 'b'. + EXPECT_THAT(Origin("v3"), HasLoansTo({"b"}, "p2")); + + EXPECT_THAT(Origin("v4"), HasLoansTo({"c"}, "p3")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundTemplateFunction) { + SetupTest(R"( + template + const T& Identity(T&& v [[clang::lifetimebound]]); + void target() { + MyObj a; + View v1 = Identity(a); + POINT(p1); + + View v2 = Identity(v1); + const View& v3 = Identity(v1); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"a"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"a"}, "p2")); + EXPECT_THAT(Origin("v3"), HasLoansTo({"a"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundTemplateClass) { + SetupTest(R"( + template + struct [[gsl::Pointer()]] MyTemplateView { + MyTemplateView(const T& o) {} + MyTemplateView pass() [[clang::lifetimebound]] { return *this; } + }; + void target() { + MyObj o; + MyTemplateView v1 = o; + POINT(p1); + MyTemplateView v2 = v1.pass(); + POINT(p2); + } + )"); + EXPECT_THAT(Origin("v1"), HasLoansTo({"o"}, "p1")); + EXPECT_THAT(Origin("v2"), HasLoansTo({"o"}, "p2")); +} + +TEST_F(LifetimeAnalysisTest, LifetimeboundConversionOperator) { + SetupTest(R"( + struct MyOwner { + MyObj o; + operator View() const [[clang::lifetimebound]]; + }; + + void target() { + MyOwner owner; + View v = owner; + POINT(p1); + } + )"); + EXPECT_THAT(Origin("v"), HasLoansTo({"owner"}, "p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessDeadPointer) { + SetupTest(R"( + void target() { + POINT(p1); + MyObj s; + MyObj* p = &s; + POINT(p2); + } + )"); + EXPECT_THAT(NoOrigins(), AreLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessSimpleReturn) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessKilledByReassignment) { + SetupTest(R"( + MyObj* target() { + MyObj s1, s2; + MyObj* p = &s1; + POINT(p1); + p = &s2; + POINT(p2); + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessAcrossBranches) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj x, y; + MyObj* p = nullptr; + POINT(p1); + if (c) { + p = &x; + POINT(p2); + } else { + p = &y; + POINT(p3); + } + return p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p3")); + // Before the `if`, the value of `p` (`nullptr`) is always overwritten before. + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoop) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1, s2; + MyObj* p = &s1; + MyObj* q = &s2; + POINT(p1); + while(c) { + POINT(p2); + + p = q; + POINT(p3); + } + POINT(p4); + return p; + } + )"); + + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p4")); + EXPECT_THAT(NoOrigins(), MaybeLiveAt("p4")); + + EXPECT_THAT(Origins({"p", "q"}), MaybeLiveAt("p3")); + + EXPECT_THAT(Origins({"q"}), MustBeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), MaybeLiveAt("p2")); + + EXPECT_THAT(Origins({"p", "q"}), MaybeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoopAndIf) { + // See https://github.com/llvm/llvm-project/issues/156959. + SetupTest(R"( + void target(bool cond) { + MyObj b; + while (cond) { + POINT(p1); + + MyObj a; + View p = b; + + POINT(p2); + + if (cond) { + POINT(p3); + p = a; + } + POINT(p4); + (void)p; + POINT(p5); + } + } + )"); + EXPECT_THAT(NoOrigins(), AreLiveAt("p5")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p4")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p3")); + EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), AreLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessInLoopAndIf2) { + SetupTest(R"( + void target(MyObj safe, bool condition) { + MyObj* p = &safe; + MyObj* q = &safe; + POINT(p1); + + while (condition) { + POINT(p2); + MyObj x; + p = &x; + + POINT(p3); + + if (condition) { + q = p; + POINT(p4); + } + + POINT(p5); + (void)*p; + (void)*q; + POINT(p6); + } + } + )"); + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p6")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p6")); + + EXPECT_THAT(Origins({"p", "q"}), MustBeLiveAt("p5")); + + EXPECT_THAT(Origins({"p", "q"}), MustBeLiveAt("p4")); + + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p3")); + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p3")); + + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p2")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p2")); + + EXPECT_THAT(Origins({"q"}), MaybeLiveAt("p1")); + EXPECT_THAT(NoOrigins(), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, LivenessOutsideLoop) { + SetupTest(R"( + void target(MyObj safe) { + MyObj* p = &safe; + for (int i = 0; i < 1; ++i) { + MyObj s; + p = &s; + POINT(p1); + } + POINT(p2); + (void)*p; + } + )"); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + EXPECT_THAT(Origins({"p"}), MaybeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialDestructorsUAF) { + SetupTest(R"( + void target() { + int *ptr; + { + int s = 1; + ptr = &s; + } + POINT(p1); + (void)*ptr; + } + )"); + EXPECT_THAT(Origin("ptr"), HasLoansTo({"s"}, "p1")); + EXPECT_THAT(Origins({"ptr"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialClassDestructorsUAF) { + SetupTest(R"( + class S { + View a, b; + }; + + void target() { + S* ptr; + { + S s; + ptr = &s; + } + POINT(p1); + (void)ptr; + } + )"); + EXPECT_THAT(Origin("ptr"), HasLoansTo({"s"}, "p1")); + EXPECT_THAT(Origins({"ptr"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, SimpleReturnStackAddress) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + +TEST_F(LifetimeAnalysisTest, DirectReturn) { + SetupTest(R"( + MyObj* target() { + MyObj s; + POINT(P); + return &s; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalAssignUnconditionalReturn) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1; + MyObj* p = nullptr; + if (c) { + p = &s1; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("s1", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignments) { + SetupTest(R"( + MyObj* target() { + MyObj s; + MyObj* p1 = &s; + MyObj* p2 = &s; + POINT(P); + return p2; + } + )"); + // Test if atleast one loan to "s" is live; + EXPECT_THAT("s", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ConditionalAssignBothBranches) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj s1; + static MyObj s2; + MyObj* p = nullptr; + if (c) { + p = &s1; + } else { + p = &s2; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("s1", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, ReassignFromSafeToLocalThenReturn) { + SetupTest(R"( + MyObj* target() { + static MyObj safe_obj; + MyObj local_obj; + MyObj* p = &safe_obj; + + p = &local_obj; + POINT(P); + return p; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, PointerChainToLocal) { + SetupTest(R"( + MyObj* target() { + MyObj local_obj; + MyObj* p1 = &local_obj; + MyObj* p2 = p1; + POINT(P); + return p2; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignmentMultipleReturn) { + SetupTest(R"( + MyObj* target(bool c1, bool c2) { + static MyObj global_obj; + MyObj local_obj1; + MyObj local_obj2; + MyObj* p = nullptr; + if(c1){ + p = &local_obj1; + POINT(C1); + return p; + } + else if(c2){ + p = &local_obj2; + POINT(C2); + return p; + } + p = &global_obj; + POINT(C3); + return p; + } + )"); + + EXPECT_THAT("local_obj1", HasLiveLoanAtExpiry("C1")); + EXPECT_THAT("local_obj2", HasLiveLoanAtExpiry("C2")); + + EXPECT_THAT("local_obj1", Not(HasLiveLoanAtExpiry("C3"))); + EXPECT_THAT("local_obj2", Not(HasLiveLoanAtExpiry("C3"))); +} + +TEST_F(LifetimeAnalysisTest, MultipleAssignmentsSingleReturn) { + SetupTest(R"( + MyObj* target(bool c1, bool c2) { + static MyObj global_obj; + MyObj local_obj1; + MyObj local_obj2; + MyObj* p = nullptr; + if(c1){ + p = &local_obj1; + } + else if(c2){ + p = &local_obj2; + } + else{ + p = &global_obj; + } + POINT(P); + return p; + } + )"); + EXPECT_THAT("local_obj1", HasLiveLoanAtExpiry("P")); + EXPECT_THAT("local_obj2", HasLiveLoanAtExpiry("P")); +} + +TEST_F(LifetimeAnalysisTest, UseAfterScopeThenReturn) { + SetupTest(R"( + MyObj* target() { + MyObj* p; + { + MyObj local_obj; + p = &local_obj; + POINT(p1); + } + POINT(p2); + return p; + } + )"); + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p2")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p2")); + + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p1")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); + + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("p2")); +} + +TEST_F(LifetimeAnalysisTest, ReturnBeforeUseAfterScope) { + SetupTest(R"( + MyObj* target(bool c) { + MyObj* p; + static MyObj global_obj; + { + MyObj local_obj; + p = &local_obj; + if(c){ + POINT(p1); + return p; + } + } + POINT(p2); + return &global_obj; + } + )"); + EXPECT_THAT("local_obj", HasLiveLoanAtExpiry("p1")); + + EXPECT_THAT(NoOrigins(), AreLiveAt("p2")); + + EXPECT_THAT(Origin("p"), HasLoansTo({"local_obj"}, "p1")); + EXPECT_THAT(Origins({"p"}), MustBeLiveAt("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialDestructorsUAR) { + SetupTest(R"( + int* target() { + int s = 10; + int* p = &s; + POINT(p1); + return p; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + +TEST_F(LifetimeAnalysisTest, TrivialClassDestructorsUAR) { + SetupTest(R"( + class S { + View a, b; + }; + + S* target() { + S *ptr; + S s; + ptr = &s; + POINT(p1); + return ptr; + } + )"); + EXPECT_THAT("s", HasLiveLoanAtExpiry("p1")); +} + +} // anonymous namespace +} // namespace clang::lifetimes::internal diff --git a/llvm/include/llvm/ADT/ImmutableSet.h b/llvm/include/llvm/ADT/ImmutableSet.h index ac86f43b2048e..b737d5b227340 100644 --- a/llvm/include/llvm/ADT/ImmutableSet.h +++ b/llvm/include/llvm/ADT/ImmutableSet.h @@ -21,7 +21,9 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Signals.h" #include #include #include