diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b86a9c437ffb1..f239fd7be01fa 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -159,6 +159,14 @@ Removed Compiler Flags
 Attribute Changes in Clang
 --------------------------
 
+- Introduced a new attribute ``[[clang::coro_await_suspend_destroy]]``.  When
+  applied to an ``await_suspend(std::coroutine_handle<Promise>)`` member of a
+  coroutine awaiter, it causes suspensions into this awaiter to use a new
+  ``await_suspend_destroy(Promise&)`` method.  The coroutine is then immediately
+  destroyed.  This flow bypasses the original ``await_suspend()`` (though it
+  must contain a compatibility stub), and omits suspend intrinsics.  The net
+  effect is improved code speed & size for "short-circuiting" coroutines.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 9fedb230ce397..e9419b9560ebf 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -5266,6 +5266,7 @@ class CoroutineSuspendExpr : public Expr {
       : Expr(SC, Resume->getType(), Resume->getValueKind(),
              Resume->getObjectKind()),
         KeywordLoc(KeywordLoc), OpaqueValue(OpaqueValue) {
+    CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
     SubExprs[SubExpr::Operand] = Operand;
     SubExprs[SubExpr::Common] = Common;
     SubExprs[SubExpr::Ready] = Ready;
@@ -5279,6 +5280,7 @@ class CoroutineSuspendExpr : public Expr {
       : Expr(SC, Ty, VK_PRValue, OK_Ordinary), KeywordLoc(KeywordLoc) {
     assert(Common->isTypeDependent() && Ty->isDependentType() &&
            "wrong constructor for non-dependent co_await/co_yield expression");
+    CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
     SubExprs[SubExpr::Operand] = Operand;
     SubExprs[SubExpr::Common] = Common;
     SubExprs[SubExpr::Ready] = nullptr;
@@ -5288,6 +5290,7 @@ class CoroutineSuspendExpr : public Expr {
   }
 
   CoroutineSuspendExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {
+    CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
     SubExprs[SubExpr::Operand] = nullptr;
     SubExprs[SubExpr::Common] = nullptr;
     SubExprs[SubExpr::Ready] = nullptr;
@@ -5295,6 +5298,14 @@ class CoroutineSuspendExpr : public Expr {
     SubExprs[SubExpr::Resume] = nullptr;
   }
 
+  bool useAwaitSuspendDestroy() const {
+    return CoroutineSuspendExprBits.UseAwaitSuspendDestroy;
+  }
+
+  void setUseAwaitSuspendDestroy(bool Use = true) {
+    CoroutineSuspendExprBits.UseAwaitSuspendDestroy = Use;
+  }
+
   Expr *getCommonExpr() const {
     return static_cast<Expr*>(SubExprs[SubExpr::Common]);
   }
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index a5b0d5053003f..27cbfbfa3d319 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -1258,12 +1258,23 @@ class alignas(void *) Stmt {
 
   //===--- C++ Coroutines bitfields classes ---===//
 
-  class CoawaitExprBitfields {
-    friend class CoawaitExpr;
+  class CoroutineSuspendExprBitfields {
+    friend class CoroutineSuspendExpr;
 
     LLVM_PREFERRED_TYPE(ExprBitfields)
     unsigned : NumExprBits;
 
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned UseAwaitSuspendDestroy : 1;
+  };
+  enum { NumCoroutineSuspendExprBits = NumExprBits + 1 };
+
+  class CoawaitExprBitfields {
+    friend class CoawaitExpr;
+
+    LLVM_PREFERRED_TYPE(CoroutineSuspendExprBitfields)
+    unsigned : NumCoroutineSuspendExprBits;
+
     LLVM_PREFERRED_TYPE(bool)
     unsigned IsImplicit : 1;
   };
@@ -1388,6 +1399,7 @@ class alignas(void *) Stmt {
     PackIndexingExprBitfields PackIndexingExprBits;
 
     // C++ Coroutines expressions
+    CoroutineSuspendExprBitfields CoroutineSuspendExprBits;
     CoawaitExprBitfields CoawaitBits;
 
     // Obj-C Expressions
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 8c8e0b3bca46c..e4cedd5e55784 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def CoroAwaitSuspendDestroy: InheritableAttr {
+  let Spellings = [Clang<"coro_await_suspend_destroy">];
+  let Subjects = SubjectList<[CXXMethod]>;
+  let LangOpts = [CPlusPlus];
+  let Documentation = [CoroAwaitSuspendDestroyDoc];
+  let SimpleHandler = 1;
+}
+
 // OSObject-based attributes.
 def OSConsumed : InheritableParamAttr {
   let Spellings = [Clang<"os_consumed">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 00e8fc0787884..8372fa98fc294 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9363,6 +9363,126 @@ Example:
 }];
 }
 
+def CoroAwaitSuspendDestroyDoc : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+
+The ``[[clang::coro_await_suspend_destroy]]`` attribute applies to an
+``await_suspend(std::coroutine_handle<Promise>)`` member function of a
+coroutine awaiter.  When applied, suspensions into the awaiter use an optimized
+call path that bypasses standard suspend intrinsics, and immediately destroys
+the suspending coro.
+
+Instead of calling the annotated ``await_suspend()``, the coroutine calls
+``await_suspend_destroy(Promise&)`` and immediately destroys the coroutine.
+
+Although it is not called, it is strongly recommended that `await_suspend()`
+contain the following portability stub.  The stub ensures the awaiter behaves
+equivalently without `coro_await_suspend_destroy` support, and makes the
+control flow clear to readers unfamiliar with the attribute:
+
+.. code-block:: c++
+
+  void await_suspend_destroy(Promise&) { /* actual implementation*/ }
+  [[clang::coro_await_suspend_destroy]]
+  void await_suspend(std::coroutine_handle<Promise> handle) {
+    // Stub to preserve behavior when the attribute is not supported
+    await_suspend_destroy(handle.promise());
+    handle.destroy();
+  }
+
+An awaiter type may provide both annotated and non-annotated overloads of
+``await_suspend()``, as long as each invocation of an annotated overload has a
+corresponding ``await_suspend_destroy(Promise&)`` overload.
+
+The return type of ``await_suspend()`` must match ``await_suspend_destroy()``. 
+The latter must return ``void``.  (Note: if desired, it would be
+straightforward to also support the "symmetric transfer"
+``std::coroutine_handle`` return type.)
+
+This optimization improves code speed and size for "short-circuiting"
+coroutines — those that use coroutine syntax **exclusively** for early returns
+and control flow rather than true asynchronous operations.
+
+Specifically, a short-circuiting awaiter is one that either proceeds
+immediately (``await_ready()`` returns ``true``, skipping to
+``await_resume()``) or terminates the coroutine execution.
+
+Then, a short-circuiting coroutine is one where **all** the awaiters (including
+``co_await``, ``co_yield``, initial, and final suspend) are short-circuiting.
+
+The short-circuiting coroutine concept introduced above has close analogs in
+other languages:
+
+- Rust has ``Result<T>`` and a ``?`` operator to unpack it, while
+  ``folly::result<T>`` is a C++ short-circuiting coroutine, within which
+  ``co_await or_unwind(someResult())`` acts just like ``someResult()?``.
+
+- Haskell has ``Maybe`` & ``Error`` monads.  A short-circuiting ``co_await``
+  loosely corresponds to the monadic ``>>=``, whereas a short-circuiting
+  ``std::optional`` coro would be an exact analog of ``Maybe``.
+
+Returning to C++, even non-short-circuiting coroutines, including asynchronous
+ones that suspend, may contain short-circuiting awaiters, and those might still
+see some performance benefit if annotated.
+
+Marking your ``await_suspend_destroy`` as ``noexcept`` can sometimes further
+improve optimization.
+
+However, if **all** awaiters within a coroutine are short-circuiting, then the
+coro frame **can reliably be allocated on-stack**, making short-circuiting
+coros behave qualitatively more like plain functions -- with better
+optimization & more predictable behavior under memory pressure.
+
+Technical aside: Heap elision becomes reliable because LLVM is allowed to elide
+heap allocations whenever it can prove that the handle doesn't "escape" from
+the coroutine.  User code can only access the handle via suspend intrinsics,
+and annotated short-circuiting awaiters simply don't use any.
+
+Note that a short-circuiting coroutine differs in one important way from a
+function that replaced each ``co_await awaiter`` with explicit control flow:
+
+.. code-block:: c++
+
+  T value;
+  if (awaiter.await_ready()) {
+    value = awaiter.await_resume();
+  } else {
+    // ... content of `await_suspend_destroy` ...
+    return /* early-termination return object */;
+  }
+
+That key difference is that ``unhandled_exception()`` lets the promise type
+wrap the function body in an implicit try-catch.  This automatic exception
+boundary behavior can be desirable in robust, return-value-oriented programs
+that benefit from short-circuiting coroutines.  If not, the promise can
+re-throw.
+
+Here is an example of a short-circuiting awaiter for a hypothetical
+``std::optional`` coroutine:
+
+.. code-block:: c++
+
+  template <typename T>
+  struct optional_awaiter {
+    std::optional<T> opt_;
+    bool await_ready() const noexcept { return opt_.has_value(); }
+    T await_resume() { return std::move(opt_).value(); }
+    void await_suspend_destroy(auto& promise) {
+      // The return object of `promise`'s coro should default to "empty".
+      assert(!promise.returned_optional_ptr_->has_value());
+    }
+    [[clang::coro_await_suspend_destroy]]
+    void await_suspend(auto handle) {
+      // Fallback for when `coro_await_suspend_destroy` is unavailable.
+      await_suspend_destroy(handle.promise());
+      handle.destroy();
+    }
+  };
+
+}];
+}
+
 def CountedByDocs : Documentation {
   let Category = DocCatField;
   let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c733e8823cea6..a151de47d2657 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12513,6 +12513,12 @@ def note_coroutine_promise_call_implicitly_required : Note<
 def err_await_suspend_invalid_return_type : Error<
   "return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
 >;
+def err_await_suspend_destroy_invalid_return_type : Error<
+  "return type of 'await_suspend_destroy' is required to be 'void' (have %0)"
+>;
+def err_await_suspend_suspend_destroy_return_type_mismatch : Error<
+  "return type of 'await_suspend' (%1) must match return type of 'await_suspend_destroy' (%0)"
+>;
 def note_await_ready_no_bool_conversion : Note<
   "return type of 'await_ready' is required to be contextually convertible to 'bool'"
 >;
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 827385f9c1a1f..ab972dda1c7c4 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -220,51 +220,54 @@ namespace {
     RValue RV;
   };
 }
-static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
-                                    CoroutineSuspendExpr const &S,
-                                    AwaitKind Kind, AggValueSlot aggSlot,
-                                    bool ignoreResult, bool forLValue) {
-  auto *E = S.getCommonExpr();
-
-  auto CommonBinder =
-      CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
-  auto UnbindCommonOnExit =
-      llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });
 
-  auto Prefix = buildSuspendPrefixStr(Coro, Kind);
-  BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
-  BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
-  BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
-
-  // If expression is ready, no need to suspend.
-  CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+// The simplified `await_suspend_destroy` path avoids suspend intrinsics.
+//
+// If a coro has only `await_suspend_destroy` and trivial (`suspend_never`)
+// awaiters, then subsequent passes are able to allocate its frame on-stack.
+//
+// As of 2025, there is still an optimization gap between a realistic
+// short-circuiting coro, and the equivalent plain function.  For a
+// guesstimate, expect 4-5ns per call on x86.  One idea for improvement is to
+// also elide trivial suspends like `std::suspend_never`, in order to hit the
+// `HasCoroSuspend` path in `CoroEarly.cpp`.
+static void emitAwaitSuspendDestroy(CodeGenFunction &CGF, CGCoroData &Coro,
+                                    llvm::Function *SuspendWrapper,
+                                    llvm::Value *Awaiter, llvm::Value *Frame,
+                                    bool AwaitSuspendCanThrow) {
+  SmallVector<llvm::Value *, 2> DirectCallArgs;
+  DirectCallArgs.push_back(Awaiter);
+  DirectCallArgs.push_back(Frame);
+
+  if (AwaitSuspendCanThrow) {
+    CGF.EmitCallOrInvoke(SuspendWrapper, DirectCallArgs);
+  } else {
+    CGF.EmitNounwindRuntimeCall(SuspendWrapper, DirectCallArgs);
+  }
 
-  // Otherwise, emit suspend logic.
-  CGF.EmitBlock(SuspendBlock);
+  CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+}
 
+static void emitStandardAwaitSuspend(
+    CodeGenFunction &CGF, CGCoroData &Coro, CoroutineSuspendExpr const &S,
+    llvm::Function *SuspendWrapper, llvm::Value *Awaiter, llvm::Value *Frame,
+    bool AwaitSuspendCanThrow, SmallString<32> Prefix, BasicBlock *ReadyBlock,
+    AwaitKind Kind, CoroutineSuspendExpr::SuspendReturnType SuspendReturnType) {
   auto &Builder = CGF.Builder;
-  llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
-  auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
-  auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
-
-  auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
-      CGF.CurFn->getName(), Prefix, S);
 
   CGF.CurCoro.InSuspendBlock = true;
 
-  assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
-         "expected to be called in coroutine context");
-
   SmallVector<llvm::Value *, 3> SuspendIntrinsicCallArgs;
-  SuspendIntrinsicCallArgs.push_back(
-      CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF));
-
-  SuspendIntrinsicCallArgs.push_back(CGF.CurCoro.Data->CoroBegin);
+  SuspendIntrinsicCallArgs.push_back(Awaiter);
+  SuspendIntrinsicCallArgs.push_back(Frame);
   SuspendIntrinsicCallArgs.push_back(SuspendWrapper);
+  BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
 
-  const auto SuspendReturnType = S.getSuspendReturnType();
-  llvm::Intrinsic::ID AwaitSuspendIID;
+  llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
+  auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
+  auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
 
+  llvm::Intrinsic::ID AwaitSuspendIID;
   switch (SuspendReturnType) {
   case CoroutineSuspendExpr::SuspendReturnType::SuspendVoid:
     AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_void;
@@ -279,12 +282,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
 
   llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);
 
-  // SuspendHandle might throw since it also resumes the returned handle.
-  const bool AwaitSuspendCanThrow =
-      SuspendReturnType ==
-          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
-      StmtCanThrow(S.getSuspendExpr());
-
   llvm::CallBase *SuspendRet = nullptr;
   // FIXME: add call attributes?
   if (AwaitSuspendCanThrow)
@@ -332,6 +329,54 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   // Emit cleanup for this suspend point.
   CGF.EmitBlock(CleanupBlock);
   CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+}
+
+static LValueOrRValue
+emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
+                      CoroutineSuspendExpr const &S, AwaitKind Kind,
+                      AggValueSlot aggSlot, bool ignoreResult, bool forLValue) {
+  auto *E = S.getCommonExpr();
+
+  auto CommonBinder =
+      CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
+  auto UnbindCommonOnExit =
+      llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });
+
+  auto Prefix = buildSuspendPrefixStr(Coro, Kind);
+  BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
+  BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
+
+  // If expression is ready, no need to suspend.
+  CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+
+  // Otherwise, emit suspend logic.
+  CGF.EmitBlock(SuspendBlock);
+
+  auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
+      CGF.CurFn->getName(), Prefix, S);
+
+  assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
+         "expected to be called in coroutine context");
+
+  // SuspendHandle might throw since it also resumes the returned handle.
+  const auto SuspendReturnType = S.getSuspendReturnType();
+  const bool AwaitSuspendCanThrow =
+      SuspendReturnType ==
+          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
+      StmtCanThrow(S.getSuspendExpr());
+
+  llvm::Value *Awaiter =
+      CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF);
+  llvm::Value *Frame = CGF.CurCoro.Data->CoroBegin;
+
+  if (S.useAwaitSuspendDestroy()) { // Call `await_suspend_destroy` & cleanup
+    emitAwaitSuspendDestroy(CGF, Coro, SuspendWrapper, Awaiter, Frame,
+                            AwaitSuspendCanThrow);
+  } else { // Normal suspend path -- can actually suspend, uses intrinsics
+    emitStandardAwaitSuspend(CGF, Coro, S, SuspendWrapper, Awaiter, Frame,
+                             AwaitSuspendCanThrow, Prefix, ReadyBlock, Kind,
+                             SuspendReturnType);
+  }
 
   // Emit await_resume expression.
   CGF.EmitBlock(ReadyBlock);
@@ -341,6 +386,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   CXXTryStmt *TryStmt = nullptr;
   if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
       StmtCanThrow(S.getResumeExpr())) {
+    auto &Builder = CGF.Builder;
     Coro.ResumeEHVar =
         CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
     Builder.CreateFlagStore(true, Coro.ResumeEHVar);
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index cc03616e0dfe1..21dcadcee6f06 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -284,11 +284,41 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType,
   return S.BuildCallExpr(nullptr, FromAddr.get(), Loc, FramePtr, Loc);
 }
 
+// To support [[clang::coro_await_suspend_destroy]], this builds
+//   *static_cast<Promise*>(
+//       __builtin_coro_promise(handle, alignof(Promise), false))
+static ExprResult buildPromiseRef(Sema &S, QualType PromiseType,
+                                  SourceLocation Loc) {
+  uint64_t Align =
+      S.Context.getTypeAlign(PromiseType) / S.Context.getCharWidth();
+
+  // Build the call to __builtin_coro_promise()
+  SmallVector<Expr *, 3> Args = {
+      S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}),
+      S.ActOnIntegerConstant(Loc, Align).get(),         // alignof(Promise)
+      S.ActOnCXXBoolLiteral(Loc, tok::kw_false).get()}; // false
+  ExprResult CoroPromiseCall =
+      S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_promise, Args);
+
+  if (CoroPromiseCall.isInvalid())
+    return ExprError();
+
+  // Cast to Promise*
+  ExprResult CastExpr = S.ImpCastExprToType(
+      CoroPromiseCall.get(), S.Context.getPointerType(PromiseType), CK_BitCast);
+  if (CastExpr.isInvalid())
+    return ExprError();
+
+  // Dereference to get Promise&
+  return S.CreateBuiltinUnaryOp(Loc, UO_Deref, CastExpr.get());
+}
+
 struct ReadySuspendResumeResult {
   enum AwaitCallType { ACT_Ready, ACT_Suspend, ACT_Resume };
   Expr *Results[3];
   OpaqueValueExpr *OpaqueValue;
   bool IsInvalid;
+  bool UseAwaitSuspendDestroy;
 };
 
 static ExprResult buildMemberCall(Sema &S, Expr *Base, SourceLocation Loc,
@@ -360,7 +390,8 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
 
   // Assume valid until we see otherwise.
   // Further operations are responsible for setting IsInalid to true.
-  ReadySuspendResumeResult Calls = {{}, Operand, /*IsInvalid=*/false};
+  ReadySuspendResumeResult Calls = {
+      {}, Operand, /*IsInvalid=*/false, /*UseAwaitSuspendDestroy=*/false};
 
   using ACT = ReadySuspendResumeResult::AwaitCallType;
 
@@ -401,10 +432,39 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
     return Calls;
   }
   Expr *CoroHandle = CoroHandleRes.get();
+  Calls.UseAwaitSuspendDestroy = false;
   CallExpr *AwaitSuspend = cast_or_null<CallExpr>(
       BuildSubExpr(ACT::ACT_Suspend, "await_suspend", CoroHandle));
   if (!AwaitSuspend)
     return Calls;
+
+  // When this `await_suspend()` overload is annotated with
+  // `[[clang::coro_await_suspend_destroy]]`, do NOT call `await_suspend()` --
+  // instead call `await_suspend_destroy(Promise&)`.  This assumes that the
+  // `await_suspend()` is just a compatibility stub consisting of:
+  //     await_suspend_destroy(handle.promise());
+  //     handle.destroy();
+  // Users of the attribute must follow this contract.  Then, diagnostics from
+  // both `await_suspend` and `await_suspend_destroy` will get exposed.
+  CallExpr *PlainAwaitSuspend = nullptr;
+  if (FunctionDecl *AwaitSuspendCallee = AwaitSuspend->getDirectCallee()) {
+    if (AwaitSuspendCallee->hasAttr<CoroAwaitSuspendDestroyAttr>()) {
+      Calls.UseAwaitSuspendDestroy = true;
+      ExprResult PromiseRefRes =
+          buildPromiseRef(S, CoroPromise->getType(), Loc);
+      if (PromiseRefRes.isInvalid()) {
+        Calls.IsInvalid = true;
+        return Calls;
+      }
+      Expr *PromiseRef = PromiseRefRes.get();
+      PlainAwaitSuspend = AwaitSuspend;
+      AwaitSuspend = cast_or_null<CallExpr>(
+          BuildSubExpr(ACT::ACT_Suspend, "await_suspend_destroy", PromiseRef));
+      if (!AwaitSuspend)
+        return Calls;
+    }
+  }
+
   if (!AwaitSuspend->getType()->isDependentType()) {
     // [expr.await]p3 [...]
     //   - await-suspend is the expression e.await_suspend(h), which shall be
@@ -412,25 +472,45 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
     //     type Z.
     QualType RetType = AwaitSuspend->getCallReturnType(S.Context);
 
-    // Support for coroutine_handle returning await_suspend.
-    if (Expr *TailCallSuspend =
-            maybeTailCall(S, RetType, AwaitSuspend, Loc))
+    auto EmitAwaitSuspendDiag = [&](unsigned int DiagCode, auto... args) {
+      ((S.Diag(AwaitSuspend->getCalleeDecl()->getLocation(), DiagCode)
+        << RetType)
+       << ... << args);
+      S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required)
+          << AwaitSuspend->getDirectCallee();
+      Calls.IsInvalid = true;
+    };
+
+    if (Calls.UseAwaitSuspendDestroy) {
+      // The return types of `await_suspend` and `await_suspend_destroy` must
+      // match. For now, the latter must return `void` -- though this could be
+      // extended to support returning handles.
+      QualType PlainRetType = PlainAwaitSuspend->getCallReturnType(S.Context);
+      if (!S.Context.hasSameType(PlainRetType, RetType)) {
+        EmitAwaitSuspendDiag(
+            diag::err_await_suspend_suspend_destroy_return_type_mismatch,
+            PlainRetType);
+      } else if (RetType->isVoidType()) {
+        Calls.Results[ACT::ACT_Suspend] =
+            S.MaybeCreateExprWithCleanups(AwaitSuspend);
+      } else {
+        EmitAwaitSuspendDiag(
+            diag::err_await_suspend_destroy_invalid_return_type);
+      }
+      // Support for coroutine_handle returning await_suspend.
+    } else if (Expr *TailCallSuspend =
+                   maybeTailCall(S, RetType, AwaitSuspend, Loc)) {
       // Note that we don't wrap the expression with ExprWithCleanups here
       // because that might interfere with tailcall contract (e.g. inserting
       // clean up instructions in-between tailcall and return). Instead
       // ExprWithCleanups is wrapped within maybeTailCall() prior to the resume
       // call.
       Calls.Results[ACT::ACT_Suspend] = TailCallSuspend;
-    else {
+    } else {
       // non-class prvalues always have cv-unqualified types
       if (RetType->isReferenceType() ||
           (!RetType->isBooleanType() && !RetType->isVoidType())) {
-        S.Diag(AwaitSuspend->getCalleeDecl()->getLocation(),
-               diag::err_await_suspend_invalid_return_type)
-            << RetType;
-        S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required)
-            << AwaitSuspend->getDirectCallee();
-        Calls.IsInvalid = true;
+        EmitAwaitSuspendDiag(diag::err_await_suspend_invalid_return_type);
       } else
         Calls.Results[ACT::ACT_Suspend] =
             S.MaybeCreateExprWithCleanups(AwaitSuspend);
@@ -949,6 +1029,8 @@ ExprResult Sema::BuildResolvedCoawaitExpr(SourceLocation Loc, Expr *Operand,
   Expr *Res = new (Context)
       CoawaitExpr(Loc, Operand, Awaiter, RSS.Results[0], RSS.Results[1],
                   RSS.Results[2], RSS.OpaqueValue, IsImplicit);
+  static_cast<CoroutineSuspendExpr *>(Res)->setUseAwaitSuspendDestroy(
+      RSS.UseAwaitSuspendDestroy);
 
   return Res;
 }
@@ -1006,6 +1088,8 @@ ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) {
   Expr *Res =
       new (Context) CoyieldExpr(Loc, Operand, E, RSS.Results[0], RSS.Results[1],
                                 RSS.Results[2], RSS.OpaqueValue);
+  static_cast<CoroutineSuspendExpr *>(Res)->setUseAwaitSuspendDestroy(
+      RSS.UseAwaitSuspendDestroy);
 
   return Res;
 }
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 3f37dfbc3dea9..c83a2601d19e4 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -480,6 +480,7 @@ void ASTStmtReader::VisitCoawaitExpr(CoawaitExpr *E) {
   for (auto &SubExpr: E->SubExprs)
     SubExpr = Record.readSubStmt();
   E->OpaqueValue = cast_or_null<OpaqueValueExpr>(Record.readSubStmt());
+  E->setUseAwaitSuspendDestroy(Record.readInt() != 0);
   E->setIsImplicit(Record.readInt() != 0);
 }
 
@@ -489,6 +490,7 @@ void ASTStmtReader::VisitCoyieldExpr(CoyieldExpr *E) {
   for (auto &SubExpr: E->SubExprs)
     SubExpr = Record.readSubStmt();
   E->OpaqueValue = cast_or_null<OpaqueValueExpr>(Record.readSubStmt());
+  E->setUseAwaitSuspendDestroy(Record.readInt() != 0);
 }
 
 void ASTStmtReader::VisitDependentCoawaitExpr(DependentCoawaitExpr *E) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index be9bad9e96cc1..25c7ab165edf0 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -445,6 +445,7 @@ void ASTStmtWriter::VisitCoroutineSuspendExpr(CoroutineSuspendExpr *E) {
   for (Stmt *S : E->children())
     Record.AddStmt(S);
   Record.AddStmt(E->getOpaqueValue());
+  Record.push_back(E->useAwaitSuspendDestroy());
 }
 
 void ASTStmtWriter::VisitCoawaitExpr(CoawaitExpr *E) {
diff --git a/clang/test/CodeGenCoroutines/coro-await-suspend-destroy.cpp b/clang/test/CodeGenCoroutines/coro-await-suspend-destroy.cpp
new file mode 100644
index 0000000000000..0169778993ae4
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/coro-await-suspend-destroy.cpp
@@ -0,0 +1,301 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN:   -disable-llvm-passes | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN:   -O2 | FileCheck %s --check-prefix=CHECK-OPT
+
+// See `SemaCXX/coro-await-suspend-destroy-errors.cpp` for error checks.
+
+#include "Inputs/coroutine.h"
+
+// This is used to implement a few `await_suspend()`s annotated with the
+// [[clang::coro_await_suspend_destroy]] attribute. As a consequence, it is only
+// test-called, never emitted.
+//
+// The `operator new()` is meant to fail subsequent "no allocation" checks if
+// this does get emitted.
+//
+// It is followed by the recommended `await_suspend` stub, to check it compiles.
+#define STUB_AWAIT_SUSPEND(handle) \
+    operator new(1); \
+    await_suspend_destroy(handle.promise()); \
+    handle.destroy()
+
+// Use a dynamic `await_ready()` to ensure the suspend branch cannot be
+// optimized away. Implements everything but `await_suspend()`.
+struct BaseAwaiter {
+  bool ready_;
+  bool await_ready() { return ready_; }
+  void await_resume() {}
+  BaseAwaiter(bool ready) : ready_{ready} {}
+};
+
+// For a coroutine function to be a short-circuiting function, it needs a
+// coroutine type with `std::suspend_never` for initial/final suspend
+template <typename TaskT>
+struct BasePromiseType {
+  TaskT get_return_object() { return {}; }
+  std::suspend_never initial_suspend() { return {}; }
+  std::suspend_never final_suspend() noexcept { return {}; }
+  void return_void() {}
+  void unhandled_exception() {}
+};
+
+// The coros look the same, but `MaybeSuspendingAwaiter` handles them differently.
+struct NonSuspendingTask {
+  struct promise_type : BasePromiseType<NonSuspendingTask> {};
+};
+struct MaybeSuspendingTask {
+  struct promise_type : BasePromiseType<MaybeSuspendingTask> {};
+};
+
+// When a coro only uses short-circuiting awaiters, it should elide allocations.
+//   - `DestroyingAwaiter` is always short-circuiting
+//   - `MaybeSuspendingAwaiter` short-circuits only in `NonSuspendingTask`
+
+struct DestroyingAwaiter : BaseAwaiter {
+  void await_suspend_destroy(auto& promise) {}
+  [[clang::coro_await_suspend_destroy]]
+  void await_suspend(auto handle) { STUB_AWAIT_SUSPEND(handle); }
+};
+
+struct MaybeSuspendingAwaiter : BaseAwaiter {
+  // Without the attribute, the coro will use `await.suspend` intrinsics, which
+  // currently trigger heap allocations for coro frames. Since the body isn't
+  // visible, escape analysis should prevent heap elision.
+  void await_suspend(std::coroutine_handle<MaybeSuspendingTask::promise_type>);
+
+  void await_suspend_destroy(NonSuspendingTask::promise_type&) {}
+  [[clang::coro_await_suspend_destroy]]
+  void await_suspend(std::coroutine_handle<NonSuspendingTask::promise_type> h) {
+    STUB_AWAIT_SUSPEND(h);
+  }
+};
+
+// Should result in no allocation after optimization.
+NonSuspendingTask test_single_destroying_await(bool ready) {
+  co_await DestroyingAwaiter{ready};
+}
+
+// The reason this first `CHECK` test is so long is that it shows most of the
+// unoptimized IR before coroutine lowering. The granular detail is provided per
+// PR152623 code review, with the aim of helping future authors understand the
+// intended control flow.
+//
+// This mostly shows the standard coroutine flow. Find **ATTRIBUTE-SPECIFIC** in
+// the comments below to understand where the behavior diverges.
+
+// Basic coro setup
+
+// CHECK-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitb
+// CHECK: entry:
+// CHECK: %__promise = alloca %"struct.NonSuspendingTask::promise_type", align 1
+// CHECK: %[[PROMISE:.+]] = bitcast ptr %__promise to ptr
+// CHECK-NEXT: %[[CORO_ID:.+]] = call token @llvm.coro.id(i32 {{[0-9]+}}, ptr %[[PROMISE]],
+// CHECK-NEXT: %[[USE_DYNAMIC_ALLOC:.+]] = call i1 @llvm.coro.alloc(token %[[CORO_ID]])
+// CHECK-NEXT: br i1 %[[USE_DYNAMIC_ALLOC]], label %coro.alloc, label %coro.init
+
+// Conditional heap alloc -- must be elided after lowering
+
+// CHECK: coro.alloc: ; preds = %entry
+// CHECK: call{{.*}} @_Znwm
+
+// Init coro frame & handle initial suspend
+
+// CHECK: coro.init: ; preds = %coro.alloc, %entry
+// CHECK: %[[FRAME:.+]] = call ptr @llvm.coro.begin(token %[[CORO_ID]]
+//
+// CHECK: call{{.*}} @_ZN15BasePromiseTypeI17NonSuspendingTaskE15initial_suspendEv
+// CHECK-NEXT: %[[INIT_SUSPEND_READY:.+]] = call{{.*}} i1 @_ZNSt13suspend_never11await_readyEv
+// CHECK-NEXT: br i1 %[[INIT_SUSPEND_READY]], label %init.ready, label %init.suspend
+//
+// CHECK: init.suspend: ; preds = %coro.init
+// ... implementation omitted, not reached ...
+//
+// CHECK: init.ready: ; preds = %init.suspend, %coro.init
+
+// Handle the user-visible `co_await` suspend point:
+
+// CHECK: %[[CO_AWAIT_READY:.+]] = call{{.*}} i1 @_ZN11BaseAwaiter11await_readyEv(
+// CHECK-NEXT: br i1 %[[CO_AWAIT_READY]], label %await.ready, label %await.suspend
+
+// **ATTRIBUTE-SPECIFIC**
+//
+// This `co_await`'s suspend is trivial & lacks suspend intrinsics. For cleanup
+// we branch to the same location as `await_resume`, but diverge later.
+
+// CHECK: await.suspend:
+// CHECK-NEXT: call void @_Z28test_single_destroying_awaitb.__await_suspend_wrapper__await(ptr %{{.+}}, ptr %[[FRAME]])
+// CHECK-NEXT: br label %[[CO_AWAIT_CLEANUP:.+]]
+
+// When ready, call `await_resume` :
+
+// CHECK: await.ready:
+// CHECK-NEXT: call{{.*}} @_ZN11BaseAwaiter12await_resumeEv(ptr{{.*}} %{{.+}})
+// CHECK-NEXT: br label %[[CO_AWAIT_CLEANUP]]
+
+// Further cleanup is conditional on whether we did "ready" or "suspend":
+
+// CHECK: [[CO_AWAIT_CLEANUP]]: ; preds = %await.ready, %await.suspend
+// CHECK-NEXT: %[[CLEANUP_PHI:.+]] = phi i32 [ 0, %await.ready ], [ 2, %await.suspend ]
+// CHECK: switch i32 %[[CLEANUP_PHI]], label %[[ON_AWAIT_SUSPEND:.+]] [
+// CHECK: i32 0, label %[[ON_AWAIT_READY:.+]]
+// CHECK: ]
+
+// On "ready", we `co_return` and do final suspend (not shown).
+
+// CHECK: [[ON_AWAIT_READY]]: ; preds = %[[CO_AWAIT_CLEANUP]]
+// CHECK-NEXT: call void @_ZN15BasePromiseTypeI17NonSuspendingTaskE11return_voidEv(
+// CHECK-NEXT: br label %coro.final
+//
+// CHECK: coro.final: ; preds = %[[ON_AWAIT_READY]]
+//
+// ... here, we handle final suspend, and eventually ...
+//
+// CHECK: br label %[[ON_AWAIT_SUSPEND]]
+
+// This [[ON_AWAIT_SUSPEND]] is actually the "destroy scope" code path,
+// including conditional `operator delete`, which will be elided.
+
+// CHECK: [[ON_AWAIT_SUSPEND]]:
+// CHECK: %[[HEAP_OR_NULL:.+]] = call ptr @llvm.coro.free(token %[[CORO_ID]], ptr %[[FRAME]])
+// CHECK-NEXT: %[[NON_NULL:.+]] = icmp ne ptr %[[HEAP_OR_NULL]], null
+// CHECK-NEXT: br i1 %[[NON_NULL]], label %coro.free, label %after.coro.free
+
+// The `operator delete()` call will be removed by optimizations.
+
+// CHECK: coro.free:
+// CHECK-NEXT: %[[CORO_SIZE:.+]] = call i64 @llvm.coro.size.i64()
+// CHECK-NEXT: call void @_ZdlPvm(ptr noundef %[[HEAP_OR_NULL]], i64 noundef %[[CORO_SIZE]])
+// CHECK-NEXT: br label %after.coro.free
+
+// CHECK: after.coro.free:
+//
+// ... Not shown: Coro teardown finishes, and if we handle normal return vs
+// exception.
+
+// Don't let the matchers skip past the end of `test_single_destroying_await()`
+
+// CHECK: }
+
+// The optimized IR is thankfully brief.
+
+// CHECK-OPT: define{{.*}} void @_Z28test_single_destroying_awaitb({{.*}} {
+// CHECK-OPT-NEXT: entry:
+// CHECK-OPT-NEXT: ret void
+// CHECK-OPT-NEXT: }
+
+///////////////////////////////////////////////////////////////////////////////
+// The subsequent tests variations on the above theme. For brevity, they do not
+// repeat the above coroutine skeleton, but merely check for heap allocations.
+///////////////////////////////////////////////////////////////////////////////
+
+// Multiple `co_await`s, all with `coro_await_suspend_destroy`.
+NonSuspendingTask test_multiple_destroying_awaits(bool ready, bool condition) {
+  co_await DestroyingAwaiter{ready};
+  co_await MaybeSuspendingAwaiter{ready}; // Destroys `NonSuspendingTask`
+  if (condition) {
+    co_await DestroyingAwaiter{ready};
+  }
+}
+
+// The unlowered IR has heaps allocs, but the optimized IR does not.
+
+// CHECK-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
+// CHECK: call{{.*}} @_Znwm
+// CHECK: call{{.*}} @_ZdlPvm
+// CHECK: }
+
+// CHECK-OPT-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
+// CHECK-OPT-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPT-NOT: call{{.*}} malloc
+// CHECK-OPT-NOT: call{{.*}} @_Znwm
+// CHECK-OPT: }
+
+// Same behavior as `test_multiple_destroying_awaits`, but with a
+// `MaybeSuspendingTask`, and without a `MaybeSuspendingAwaiter`.
+NonSuspendingTask test_multiple_destroying_awaits_too(bool ready, bool condition) {
+  co_await DestroyingAwaiter{ready};
+  co_await MaybeSuspendingAwaiter{ready}; // Destroys `NonSuspendingTask`
+  if (condition) {
+    co_await DestroyingAwaiter{ready};
+  }
+}
+
+// The unlowered IR has heaps allocs, but the optimized IR does not.
+
+// CHECK-LABEL: define{{.*}} void @_Z35test_multiple_destroying_awaits_toob
+// CHECK: call{{.*}} @_Znwm
+// CHECK: call{{.*}} @_ZdlPvm
+// CHECK: }
+
+// CHECK-OPT-LABEL: define{{.*}} void @_Z35test_multiple_destroying_awaits_toob
+// CHECK-OPT-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPT-NOT: call{{.*}} malloc
+// CHECK-OPT-NOT: call{{.*}} @_Znwm
+// CHECK-OPT: }
+
+// Mixed awaits - some with `coro_await_suspend_destroy`, some without.
+MaybeSuspendingTask test_mixed_awaits(bool ready) {
+  co_await MaybeSuspendingAwaiter{ready}; // Suspends `MaybeSuspendingTask`
+  co_await DestroyingAwaiter{ready};
+}
+
+// Both the unlowered & optimized IR have a heap allocation because not all
+// awaits destroy the coroutine.
+
+// CHECK-INITIAL-LABEL: define{{.*}} void @_Z17test_mixed_awaitsb
+// CHECK: call{{.*}} @_Znwm
+// CHECK: call{{.*}} @_ZdlPvm
+// CHECK: }
+
+// CHECK-OPT-LABEL: define{{.*}} void @_Z17test_mixed_awaitsb
+// CHECK-OPT: call{{.*}} @_Znwm
+// CHECK-OPT: call{{.*}} @_ZdlPvm
+// CHECK-OPT: }
+
+MaybeSuspendingTask test_unreachable_normal_suspend(bool ready) {
+  co_await DestroyingAwaiter{false};
+  // Unreachable in OPTIMIZED, so those builds don't see an allocation.
+  co_await MaybeSuspendingAwaiter{ready}; // Would suspend `MaybeSuspendingTask`
+}
+
+// The unlowered IR has heaps allocs, but the optimized IR does not, since
+// `co_await DestroyingAwaiter{false}` is effectively a `co_return`.
+
+// CHECK-LABEL: define{{.*}} void @_Z31test_unreachable_normal_suspendb
+// CHECK: call{{.*}} @_Znwm
+// CHECK: call{{.*}} @_ZdlPvm
+// CHECK: }
+
+// CHECK-OPT-LABEL: define{{.*}} void @_Z31test_unreachable_normal_suspendb
+// CHECK-OPT-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPT-NOT: call{{.*}} malloc
+// CHECK-OPT-NOT: call{{.*}} @_Znwm
+// CHECK-OPT: }
+
+// Template awaitable with `coro_await_suspend_destroy` attribute. Checks for
+// bugs where we don't handle dependent types appropriately.
+template<typename T>
+struct TemplateDestroyingAwaiter : BaseAwaiter {
+  void await_suspend_destroy(auto& promise) {}
+  [[clang::coro_await_suspend_destroy]]
+  void await_suspend(auto handle) { STUB_AWAIT_SUSPEND(handle); }
+};
+
+template <typename T>
+NonSuspendingTask test_template_destroying_await(bool ready) {
+  co_await TemplateDestroyingAwaiter<T>{ready};
+}
+
+template NonSuspendingTask test_template_destroying_await<int>(bool ready);
+
+// CHECK-LABEL: define{{.*}} void @_Z30test_template_destroying_awaitIiE17NonSuspendingTaskb
+// CHECK: call{{.*}} @_Znwm
+// CHECK: call{{.*}} @_ZdlPvm
+// CHECK: }
+
+// CHECK-OPT-LABEL: define{{.*}} void @_Z30test_template_destroying_awaitIiE17NonSuspendingTaskb
+// CHECK-OPT-NOT: call{{.*}} @llvm.coro.alloc
+// CHECK-OPT-NOT: call{{.*}} malloc
+// CHECK-OPT-NOT: call{{.*}} @_Znwm
+// CHECK-OPT: }
diff --git a/clang/test/CodeGenCoroutines/issue148380.cpp b/clang/test/CodeGenCoroutines/issue148380.cpp
new file mode 100644
index 0000000000000..a3dc429f20b64
--- /dev/null
+++ b/clang/test/CodeGenCoroutines/issue148380.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN:   -O2 | FileCheck %s --check-prefix=CHECK-OPT
+
+// This test just confirms that `[[clang::coro_await_suspend_destroy]]` works
+// around the optimization problem from PR148380.
+//
+// See `coro-await-suspend-destroy.cpp` for a test showing the detailed control
+// flow in un-lowered, un-optimized IR.
+
+#include "Inputs/coroutine.h"
+
+struct coro {
+  struct promise_type {
+    auto get_return_object() { return coro{}; }
+    auto initial_suspend() noexcept { return std::suspend_never{}; }
+    auto final_suspend() noexcept { return std::suspend_never{}; }
+    auto unhandled_exception() {}
+    auto return_void() {}
+  };
+
+  auto await_ready() { return false; }
+  void await_suspend_destroy(auto& promise) {}
+  [[clang::coro_await_suspend_destroy]] auto await_suspend(auto handle) {
+    // The attribute causes this stub not to be called.  Instead, we call
+    // `await_suspend_destroy()`, as on the next line.
+    await_suspend_destroy(handle.promise());
+    handle.destroy();
+  }
+  auto await_resume() {}
+};
+
+coro f1() noexcept;
+coro f2() noexcept
+{
+    co_await f1();
+}
+
+// CHECK-OPT: define{{.+}} void @_Z2f2v({{.+}} {
+// CHECK-OPT-NEXT: entry:
+// CHECK-OPT-NEXT: tail call void @_Z2f1v()
+// CHECK-OPT-NEXT: ret void
+// CHECK-OPT-NEXT: }
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index b9cf7cf9462fe..830c681303824 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -63,6 +63,7 @@
 // CHECK-NEXT: Convergent (SubjectMatchRule_function)
 // CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
 // CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter)
+// CHECK-NEXT: CoroAwaitSuspendDestroy (SubjectMatchRule_function_is_member)
 // CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
 // CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
 // CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)
diff --git a/clang/test/SemaCXX/coro-await-suspend-destroy-errors.cpp b/clang/test/SemaCXX/coro-await-suspend-destroy-errors.cpp
new file mode 100644
index 0000000000000..3666fa0e28d20
--- /dev/null
+++ b/clang/test/SemaCXX/coro-await-suspend-destroy-errors.cpp
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -std=c++20 -verify %s
+
+#include "Inputs/std-coroutine.h"
+
+// Coroutine type with `std::suspend_never` for initial/final suspend
+struct Task {
+  struct promise_type {
+    Task get_return_object() { return {}; }
+    std::suspend_never initial_suspend() { return {}; }
+    std::suspend_never final_suspend() noexcept { return {}; }
+    void return_void() {}
+    void unhandled_exception() {}
+  };
+};
+
+struct WrongReturnTypeAwaitable {
+  bool await_ready() { return false; }
+  bool await_suspend_destroy(auto& promise) { return true; } // expected-error {{return type of 'await_suspend_destroy' is required to be 'void' (have 'bool')}}
+  [[clang::coro_await_suspend_destroy]] 
+  bool await_suspend(auto handle) {}
+  void await_resume() {}
+};
+
+Task test_wrong_return_type() {
+  co_await WrongReturnTypeAwaitable{}; // expected-note {{call to 'await_suspend_destroy<Task::promise_type>' implicitly required by coroutine function here}}
+}
+
+struct NoSuchMemberAwaitable {
+  bool await_ready() { return false; }
+  [[clang::coro_await_suspend_destroy]] 
+  void await_suspend(auto handle) {}
+  void await_resume() {}
+};
+
+Task test_no_method() {
+  co_await NoSuchMemberAwaitable{}; // expected-error {{no member named 'await_suspend_destroy' in 'NoSuchMemberAwaitable'}}
+}
+
+struct WrongOverloadAwaitable {
+  bool await_ready() { return false; }
+  void await_suspend_destroy(int x) {} // expected-note {{passing argument to parameter 'x' here}}
+  [[clang::coro_await_suspend_destroy]] 
+  void await_suspend(auto handle) {}
+  void await_resume() {}
+};
+
+Task test_wrong_overload() {
+  co_await WrongOverloadAwaitable{}; // expected-error {{no viable conversion from 'std::coroutine_traits<Task>::promise_type' (aka 'typename Task::promise_type') to 'int'}}
+}
+
+struct ReturnTypeMismatchAwaiter {
+  bool await_ready() { return false; }
+  void await_suspend_destroy(auto& promise) {} // expected-error {{return type of 'await_suspend' ('bool') must match return type of 'await_suspend_destroy' ('void')}}
+  [[clang::coro_await_suspend_destroy]] 
+  bool await_suspend(auto handle) { return true; }
+  void await_resume() {}
+};
+
+Task test_return_type_mismatch() {
+  co_await ReturnTypeMismatchAwaiter{}; // expected-note {{call to 'await_suspend_destroy<Task::promise_type>' implicitly required by coroutine function here}}
+}