-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[flang][OpenMP] Reassociate floating-point ATOMIC update expressions #155840
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][OpenMP] Reassociate floating-point ATOMIC update expressions #155840
Conversation
This is a follow-up to PR153488, this time the reassociation is enabled for floating-point expressions, but only when associative-nath is enabled in the language options. This can be done via -ffast-math on the command line.
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-flang-semantics Author: Krzysztof Parzyszek (kparzysz) ChangesThis is a follow-up to PR153488, this time the reassociation is enabled for floating-point expressions, but only when associative-nath is enabled in the language options. This can be done via -ffast-math on the command line. Full diff: https://github.com/llvm/llvm-project/pull/155840.diff 3 Files Affected:
diff --git a/flang/include/flang/Evaluate/match.h b/flang/include/flang/Evaluate/match.h
index 79da40f7c1338..01932226fa500 100644
--- a/flang/include/flang/Evaluate/match.h
+++ b/flang/include/flang/Evaluate/match.h
@@ -8,6 +8,7 @@
#ifndef FORTRAN_EVALUATE_MATCH_H_
#define FORTRAN_EVALUATE_MATCH_H_
+#include "flang/Common/Fortran-consts.h"
#include "flang/Common/visit.h"
#include "flang/Evaluate/expression.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,15 +35,29 @@ struct IsOperation<T, std::void_t<decltype(T::operands)>> {
template <typename T>
constexpr bool is_operation_v{detail::IsOperation<T>::value};
-template <typename T>
-const evaluate::Expr<T> &deparen(const evaluate::Expr<T> &x) {
- if (auto *parens{std::get_if<evaluate::Parentheses<T>>(&x.u)}) {
+template <common::TypeCategory C, int K>
+const evaluate::Expr<Type<C, K>> &deparen(const evaluate::Expr<Type<C, K>> &x) {
+ if (auto *parens{std::get_if<Parentheses<Type<C, K>>>(&x.u)}) {
return deparen(parens->template operand<0>());
} else {
return x;
}
}
+template <common::TypeCategory C>
+const evaluate::Expr<SomeKind<C>> &deparen(
+ const evaluate::Expr<SomeKind<C>> &x) {
+ return x;
+}
+
+// Some expressions (e.g. TypelessExpression) don't allow parentheses, while
+// those that do have Expr<Type> as the argument to the parentheses. This means
+// that there is no consistent return type that works for all expressions.
+// Delete this overload explicitly so an attempt to use it creates a clearer
+// error message.
+const evaluate::Expr<SomeType> &deparen(
+ const evaluate::Expr<SomeType> &) = delete;
+
// Expr<T> matchers (patterns)
//
// Each pattern should implement
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index 50e63d356be02..f25497ece61c4 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -67,6 +67,22 @@ struct IsIntegral<evaluate::Type<C, K>> {
template <typename T> constexpr bool is_integral_v{IsIntegral<T>::value};
+template <typename...> struct IsFloatingPoint {
+ static constexpr bool value{false};
+};
+
+template <common::TypeCategory C, int K>
+struct IsFloatingPoint<evaluate::Type<C, K>> {
+ static constexpr bool value{//
+ C == common::TypeCategory::Real || C == common::TypeCategory::Complex};
+};
+
+template <typename T>
+constexpr bool is_floating_point_v{IsFloatingPoint<T>::value};
+
+template <typename T>
+constexpr bool is_numeric_v{is_integral_v<T> || is_floating_point_v<T>};
+
template <typename T, typename Op0, typename Op1>
using ReassocOpBase = evaluate::match::AnyOfPattern< //
evaluate::match::Add<T, Op0, Op1>, //
@@ -88,7 +104,8 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
using Id = evaluate::rewrite::Identity;
struct NonIntegralTag {};
- ReassocRewriter(const SomeExpr &atom) : atom_(atom) {}
+ ReassocRewriter(const SomeExpr &atom, const SemanticsContext &context)
+ : atom_(atom), context_(context) {}
// Try to find cases where the input expression is of the form
// (1) (a . b) . c, or
@@ -102,8 +119,13 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
// For example, assuming x is the atomic variable:
// (a + x) + b -> (a + b) + x, i.e. (conceptually) swap x and b.
template <typename T, typename U,
- typename = std::enable_if_t<is_integral_v<T>>>
+ typename = std::enable_if_t<is_numeric_v<T>>>
evaluate::Expr<T> operator()(evaluate::Expr<T> &&x, const U &u) {
+ if constexpr (is_floating_point_v<T>) {
+ if (!context_.langOptions().AssociativeMath) {
+ return Id::operator()(std::move(x), u);
+ }
+ }
// As per the above comment, there are 3 subexpressions involved in this
// transformation. A match::Expr<T> will match evaluate::Expr<U> when T is
// same as U, plus it will store a pointer (ref) to the matched expression.
@@ -169,7 +191,7 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
}
template <typename T, typename U,
- typename = std::enable_if_t<!is_integral_v<T>>>
+ typename = std::enable_if_t<!is_numeric_v<T>>>
evaluate::Expr<T> operator()(
evaluate::Expr<T> &&x, const U &u, NonIntegralTag = {}) {
return Id::operator()(std::move(x), u);
@@ -181,6 +203,7 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
}
const SomeExpr &atom_;
+ const SemanticsContext &context_;
};
struct AnalyzedCondStmt {
@@ -809,7 +832,7 @@ OmpStructureChecker::CheckAtomicUpdateAssignment(
CheckStorageOverlap(atom, GetNonAtomArguments(atom, update.rhs), source);
return std::nullopt;
} else if (tryReassoc) {
- ReassocRewriter ra(atom);
+ ReassocRewriter ra(atom, context_);
SomeExpr raRhs{evaluate::rewrite::Mutator(ra)(update.rhs)};
std::tie(hasErrors, tryReassoc) = CheckAtomicUpdateAssignmentRhs(
diff --git a/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90 b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
new file mode 100644
index 0000000000000..c86589cacd679
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
@@ -0,0 +1,100 @@
+!RUN: %flang_fc1 -emit-hlfir -ffast-math -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+subroutine f00(x, y)
+ implicit none
+ real :: x, y
+
+ !$omp atomic update
+ x = ((x + 1) + y) + 2
+end
+
+!CHECK-LABEL: func.func @_QPf00
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %cst, %[[LOAD_Y]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 2.000000e+00 : f32
+!CHECK: %[[Y_1_2:[0-9]+]] = arith.addf %[[Y_1]], %cst_0 fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG]], %[[Y_1_2]] fastmath<fast> : f32
+!CHECK: omp.yield(%[[ARG_P]] : f32)
+!CHECK: }
+
+
+subroutine f01(x, y, z)
+ implicit none
+ complex :: x, y, z
+
+ !$omp atomic update
+ x = (x + y) + z
+end
+
+!CHECK-LABEL: func.func @_QPf01
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[Z:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[LOAD_Z:[0-9]+]] = fir.load %[[Z]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[Y_Z:[0-9]+]] = fir.addc %[[LOAD_Y]], %[[LOAD_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK: %[[ARG_P:[0-9]+]] = fir.addc %[[ARG]], %[[Y_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK: omp.yield(%[[ARG_P]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f02(x, y)
+ implicit none
+ complex :: x
+ real :: y
+
+ !$omp atomic update
+ x = (real(x) + y) + 1
+end
+
+!CHECK-LABEL: func.func @_QPf02
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %[[LOAD_Y]], %cst fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK: %[[ARG_X:[0-9]+]] = fir.extract_value %[[ARG]], [0 : index] : (complex<f32>) -> f32
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_X]], %[[Y_1]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 0.000000e+00 : f32
+!CHECK: %[[CPLX:[0-9]+]] = fir.undefined complex<f32>
+!CHECK: %[[CPLX_I:[0-9]+]] = fir.insert_value %[[CPLX]], %[[ARG_P]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK: %[[CPLX_R:[0-9]+]] = fir.insert_value %[[CPLX_I]], %cst_0, [1 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK: omp.yield(%[[CPLX_R]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f03(x, a, b, c)
+ implicit none
+ real(kind=4) :: x
+ real(kind=8) :: a, b, c
+
+ !$omp atomic update
+ x = ((b + a) + x) + c
+end
+
+!CHECK-LABEL: func.func @_QPf03
+!CHECK: %[[A:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[B:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[C:[0-9]+]]:2 = hlfir.declare %arg3
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[LOAD_B:[0-9]+]] = fir.load %[[B]]#0 : !fir.ref<f64>
+!CHECK: %[[LOAD_A:[0-9]+]] = fir.load %[[A]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B:[0-9]+]] = arith.addf %[[LOAD_B]], %[[LOAD_A]] fastmath<fast> : f64
+!CHECK: %[[LOAD_C:[0-9]+]] = fir.load %[[C]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B_C:[0-9]+]] = arith.addf %[[A_B]], %[[LOAD_C]] fastmath<fast> : f64
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK: %[[ARG_8:[0-9]+]] = fir.convert %[[ARG]] : (f32) -> f64
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_8]], %[[A_B_C]] fastmath<fast> : f64
+!CHECK: %[[ARG_4:[0-9]+]] = fir.convert %[[ARG_P]] : (f64) -> f32
+!CHECK: omp.yield(%[[ARG_4]] : f32)
+!CHECK: }
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Krzysztof Parzyszek (kparzysz) ChangesThis is a follow-up to PR153488, this time the reassociation is enabled for floating-point expressions, but only when associative-nath is enabled in the language options. This can be done via -ffast-math on the command line. Full diff: https://github.com/llvm/llvm-project/pull/155840.diff 3 Files Affected:
diff --git a/flang/include/flang/Evaluate/match.h b/flang/include/flang/Evaluate/match.h
index 79da40f7c1338..01932226fa500 100644
--- a/flang/include/flang/Evaluate/match.h
+++ b/flang/include/flang/Evaluate/match.h
@@ -8,6 +8,7 @@
#ifndef FORTRAN_EVALUATE_MATCH_H_
#define FORTRAN_EVALUATE_MATCH_H_
+#include "flang/Common/Fortran-consts.h"
#include "flang/Common/visit.h"
#include "flang/Evaluate/expression.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,15 +35,29 @@ struct IsOperation<T, std::void_t<decltype(T::operands)>> {
template <typename T>
constexpr bool is_operation_v{detail::IsOperation<T>::value};
-template <typename T>
-const evaluate::Expr<T> &deparen(const evaluate::Expr<T> &x) {
- if (auto *parens{std::get_if<evaluate::Parentheses<T>>(&x.u)}) {
+template <common::TypeCategory C, int K>
+const evaluate::Expr<Type<C, K>> &deparen(const evaluate::Expr<Type<C, K>> &x) {
+ if (auto *parens{std::get_if<Parentheses<Type<C, K>>>(&x.u)}) {
return deparen(parens->template operand<0>());
} else {
return x;
}
}
+template <common::TypeCategory C>
+const evaluate::Expr<SomeKind<C>> &deparen(
+ const evaluate::Expr<SomeKind<C>> &x) {
+ return x;
+}
+
+// Some expressions (e.g. TypelessExpression) don't allow parentheses, while
+// those that do have Expr<Type> as the argument to the parentheses. This means
+// that there is no consistent return type that works for all expressions.
+// Delete this overload explicitly so an attempt to use it creates a clearer
+// error message.
+const evaluate::Expr<SomeType> &deparen(
+ const evaluate::Expr<SomeType> &) = delete;
+
// Expr<T> matchers (patterns)
//
// Each pattern should implement
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index 50e63d356be02..f25497ece61c4 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -67,6 +67,22 @@ struct IsIntegral<evaluate::Type<C, K>> {
template <typename T> constexpr bool is_integral_v{IsIntegral<T>::value};
+template <typename...> struct IsFloatingPoint {
+ static constexpr bool value{false};
+};
+
+template <common::TypeCategory C, int K>
+struct IsFloatingPoint<evaluate::Type<C, K>> {
+ static constexpr bool value{//
+ C == common::TypeCategory::Real || C == common::TypeCategory::Complex};
+};
+
+template <typename T>
+constexpr bool is_floating_point_v{IsFloatingPoint<T>::value};
+
+template <typename T>
+constexpr bool is_numeric_v{is_integral_v<T> || is_floating_point_v<T>};
+
template <typename T, typename Op0, typename Op1>
using ReassocOpBase = evaluate::match::AnyOfPattern< //
evaluate::match::Add<T, Op0, Op1>, //
@@ -88,7 +104,8 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
using Id = evaluate::rewrite::Identity;
struct NonIntegralTag {};
- ReassocRewriter(const SomeExpr &atom) : atom_(atom) {}
+ ReassocRewriter(const SomeExpr &atom, const SemanticsContext &context)
+ : atom_(atom), context_(context) {}
// Try to find cases where the input expression is of the form
// (1) (a . b) . c, or
@@ -102,8 +119,13 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
// For example, assuming x is the atomic variable:
// (a + x) + b -> (a + b) + x, i.e. (conceptually) swap x and b.
template <typename T, typename U,
- typename = std::enable_if_t<is_integral_v<T>>>
+ typename = std::enable_if_t<is_numeric_v<T>>>
evaluate::Expr<T> operator()(evaluate::Expr<T> &&x, const U &u) {
+ if constexpr (is_floating_point_v<T>) {
+ if (!context_.langOptions().AssociativeMath) {
+ return Id::operator()(std::move(x), u);
+ }
+ }
// As per the above comment, there are 3 subexpressions involved in this
// transformation. A match::Expr<T> will match evaluate::Expr<U> when T is
// same as U, plus it will store a pointer (ref) to the matched expression.
@@ -169,7 +191,7 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
}
template <typename T, typename U,
- typename = std::enable_if_t<!is_integral_v<T>>>
+ typename = std::enable_if_t<!is_numeric_v<T>>>
evaluate::Expr<T> operator()(
evaluate::Expr<T> &&x, const U &u, NonIntegralTag = {}) {
return Id::operator()(std::move(x), u);
@@ -181,6 +203,7 @@ struct ReassocRewriter : public evaluate::rewrite::Identity {
}
const SomeExpr &atom_;
+ const SemanticsContext &context_;
};
struct AnalyzedCondStmt {
@@ -809,7 +832,7 @@ OmpStructureChecker::CheckAtomicUpdateAssignment(
CheckStorageOverlap(atom, GetNonAtomArguments(atom, update.rhs), source);
return std::nullopt;
} else if (tryReassoc) {
- ReassocRewriter ra(atom);
+ ReassocRewriter ra(atom, context_);
SomeExpr raRhs{evaluate::rewrite::Mutator(ra)(update.rhs)};
std::tie(hasErrors, tryReassoc) = CheckAtomicUpdateAssignmentRhs(
diff --git a/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90 b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
new file mode 100644
index 0000000000000..c86589cacd679
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-update-reassoc-fp.f90
@@ -0,0 +1,100 @@
+!RUN: %flang_fc1 -emit-hlfir -ffast-math -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+subroutine f00(x, y)
+ implicit none
+ real :: x, y
+
+ !$omp atomic update
+ x = ((x + 1) + y) + 2
+end
+
+!CHECK-LABEL: func.func @_QPf00
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %cst, %[[LOAD_Y]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 2.000000e+00 : f32
+!CHECK: %[[Y_1_2:[0-9]+]] = arith.addf %[[Y_1]], %cst_0 fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG]], %[[Y_1_2]] fastmath<fast> : f32
+!CHECK: omp.yield(%[[ARG_P]] : f32)
+!CHECK: }
+
+
+subroutine f01(x, y, z)
+ implicit none
+ complex :: x, y, z
+
+ !$omp atomic update
+ x = (x + y) + z
+end
+
+!CHECK-LABEL: func.func @_QPf01
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[Z:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[LOAD_Z:[0-9]+]] = fir.load %[[Z]]#0 : !fir.ref<complex<f32>>
+!CHECK: %[[Y_Z:[0-9]+]] = fir.addc %[[LOAD_Y]], %[[LOAD_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK: %[[ARG_P:[0-9]+]] = fir.addc %[[ARG]], %[[Y_Z]] {fastmath = #arith.fastmath<fast>} : complex<f32>
+!CHECK: omp.yield(%[[ARG_P]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f02(x, y)
+ implicit none
+ complex :: x
+ real :: y
+
+ !$omp atomic update
+ x = (real(x) + y) + 1
+end
+
+!CHECK-LABEL: func.func @_QPf02
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[Y:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[LOAD_Y:[0-9]+]] = fir.load %[[Y]]#0 : !fir.ref<f32>
+!CHECK: %cst = arith.constant 1.000000e+00 : f32
+!CHECK: %[[Y_1:[0-9]+]] = arith.addf %[[LOAD_Y]], %cst fastmath<fast> : f32
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<complex<f32>> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: complex<f32>):
+!CHECK: %[[ARG_X:[0-9]+]] = fir.extract_value %[[ARG]], [0 : index] : (complex<f32>) -> f32
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_X]], %[[Y_1]] fastmath<fast> : f32
+!CHECK: %cst_0 = arith.constant 0.000000e+00 : f32
+!CHECK: %[[CPLX:[0-9]+]] = fir.undefined complex<f32>
+!CHECK: %[[CPLX_I:[0-9]+]] = fir.insert_value %[[CPLX]], %[[ARG_P]], [0 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK: %[[CPLX_R:[0-9]+]] = fir.insert_value %[[CPLX_I]], %cst_0, [1 : index] : (complex<f32>, f32) -> complex<f32>
+!CHECK: omp.yield(%[[CPLX_R]] : complex<f32>)
+!CHECK: }
+
+
+subroutine f03(x, a, b, c)
+ implicit none
+ real(kind=4) :: x
+ real(kind=8) :: a, b, c
+
+ !$omp atomic update
+ x = ((b + a) + x) + c
+end
+
+!CHECK-LABEL: func.func @_QPf03
+!CHECK: %[[A:[0-9]+]]:2 = hlfir.declare %arg1
+!CHECK: %[[B:[0-9]+]]:2 = hlfir.declare %arg2
+!CHECK: %[[C:[0-9]+]]:2 = hlfir.declare %arg3
+!CHECK: %[[X:[0-9]+]]:2 = hlfir.declare %arg0
+!CHECK: %[[LOAD_B:[0-9]+]] = fir.load %[[B]]#0 : !fir.ref<f64>
+!CHECK: %[[LOAD_A:[0-9]+]] = fir.load %[[A]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B:[0-9]+]] = arith.addf %[[LOAD_B]], %[[LOAD_A]] fastmath<fast> : f64
+!CHECK: %[[LOAD_C:[0-9]+]] = fir.load %[[C]]#0 : !fir.ref<f64>
+!CHECK: %[[A_B_C:[0-9]+]] = arith.addf %[[A_B]], %[[LOAD_C]] fastmath<fast> : f64
+!CHECK: omp.atomic.update memory_order(relaxed) %[[X]]#0 : !fir.ref<f32> {
+!CHECK: ^bb0(%[[ARG:arg[0-9]+]]: f32):
+!CHECK: %[[ARG_8:[0-9]+]] = fir.convert %[[ARG]] : (f32) -> f64
+!CHECK: %[[ARG_P:[0-9]+]] = arith.addf %[[ARG_8]], %[[A_B_C]] fastmath<fast> : f64
+!CHECK: %[[ARG_4:[0-9]+]] = fir.convert %[[ARG_P]] : (f64) -> f32
+!CHECK: omp.yield(%[[ARG_4]] : f32)
+!CHECK: }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is a follow-up to PR153488, this time the reassociation is enabled for floating-point expressions, but only when associative-nath is enabled in the language options. This can be done via -ffast-math on the command line.