-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang] Rely on global initialization for simpler derived types #114002
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-flang-fir-hlfir Author: None (NimishMishra) ChangesCurrently, all derived types are initialized through Full diff: https://github.com/llvm/llvm-project/pull/114002.diff 6 Files Affected:
diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h
index de394a39e112ed..ac285c846fc7d1 100644
--- a/flang/include/flang/Lower/ConvertVariable.h
+++ b/flang/include/flang/Lower/ConvertVariable.h
@@ -67,7 +67,7 @@ bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym);
/// Call default initialization runtime routine to initialize \p var.
void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym,
+ const Fortran::lower::pft::Variable &var,
Fortran::lower::SymMap &symMap);
/// Create a fir::GlobalOp given a module variable definition. This is intended
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index cc51d5a9bb8daf..c261d3b6c10fb8 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -776,9 +776,10 @@ mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) {
/// Call default initialization runtime routine to initialize \p var.
void Fortran::lower::defaultInitializeAtRuntime(
Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, Fortran::lower::SymMap &symMap) {
+ const Fortran::lower::pft::Variable &var, Fortran::lower::SymMap &symMap) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
+ const Fortran::semantics::Symbol &sym = var.getSymbol();
fir::ExtendedValue exv = converter.getSymbolExtendedValue(sym, &symMap);
if (Fortran::semantics::IsOptional(sym)) {
// 15.5.2.12 point 3, absent optional dummies are not initialized.
@@ -793,11 +794,35 @@ void Fortran::lower::defaultInitializeAtRuntime(
})
.end();
} else {
- mlir::Value box = builder.createBox(loc, exv);
- fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ /// For "simpler" types, relying on "_FortranAInitialize"
+ /// leads to poor runtime performance. Hence optimize
+ /// the same.
+ const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
+ mlir::Type symTy = converter.genType(var);
+ if (!var.isAlias() && !hasAllocatableDirectComponent(sym) &&
+ declTy->category() ==
+ Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
+ !mlir::isa<fir::SequenceType>(symTy) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
+ !sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+ std::string globalName = converter.mangleName(sym) + "_globalinit";
+ mlir::Location loc = genLocation(converter, sym);
+ mlir::StringAttr linkage = getLinkageAttribute(builder, var);
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
+ sym);
+ fir::GlobalOp global =
+ defineGlobal(converter, var, globalName, linkage, dataAttr);
+ auto addrOf = builder.create<fir::AddrOfOp>(loc, global.resultType(),
+ global.getSymbol());
+ fir::LoadOp load = builder.create<fir::LoadOp>(loc, addrOf.getResult());
+ builder.create<fir::StoreOp>(loc, load, fir::getBase(exv));
+ } else {
+ mlir::Value box = builder.createBox(loc, exv);
+ fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+ }
}
}
-
enum class VariableCleanUp { Finalize, Deallocate };
/// Check whether a local variable needs to be finalized according to clause
/// 7.5.6.3 point 3 or if it is an allocatable that must be deallocated. Note
@@ -943,8 +968,7 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
if (needDummyIntentoutFinalization(var))
finalizeAtRuntime(converter, var, symMap);
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
auto *builder = &converter.getFirOpBuilder();
mlir::Location loc = converter.getCurrentLocation();
@@ -1185,8 +1209,7 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter,
// do not try optimizing this to single default initializations of
// the equivalenced storages. Keep lowering simple.
if (mustBeDefaultInitializedAtRuntime(var))
- Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
- symMap);
+ Fortran::lower::defaultInitializeAtRuntime(converter, var, symMap);
}
//===--------------------------------------------------------------===//
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index 709ac402cc702d..ba8b7177953bb5 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -118,7 +118,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) {
bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
if (!isFirstPrivate &&
Fortran::lower::hasDefaultInitialization(sym->GetUltimate()))
- Fortran::lower::defaultInitializeAtRuntime(converter, *sym, *symTable);
+ Fortran::lower::defaultInitializeAtRuntime(converter, pft::Variable{*sym},
+ *symTable);
}
void DataSharingProcessor::copyFirstPrivateSymbol(
diff --git a/flang/test/Lower/HLFIR/structure-constructor.f90 b/flang/test/Lower/HLFIR/structure-constructor.f90
index 41d08c14f5fa98..68a29015f60177 100644
--- a/flang/test/Lower/HLFIR/structure-constructor.f90
+++ b/flang/test/Lower/HLFIR/structure-constructor.f90
@@ -98,12 +98,9 @@ end subroutine test3
! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>
! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}> {bindc_name = "res", uniq_name = "_QFtest3Eres"}
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtest3Eres"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
-! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
-! CHECK: %[[VAL_5:.*]] = fir.address_of(@_QQclX{{.*}}) : !fir.ref<!fir.char<1,{{[0-9]*}}>>
-! CHECK: %[[VAL_6:.*]] = arith.constant {{[0-9]*}} : i32
-! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_4]] : (!fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<none>
-! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_5]] : (!fir.ref<!fir.char<1,{{[0-9]*}}>>) -> !fir.ref<i8>
-! CHECK: %[[VAL_9:.*]] = fir.call @_FortranAInitialize(%[[VAL_7]], %[[VAL_8]], %[[VAL_6]]) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> none
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest3Eres_globalinit) : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOADED_VAL]] to %[[VAL_3]]#1 : !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest3Ex"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "ctor.temp"} : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
! CHECK: %[[VAL_12:.*]] = fir.embox %[[VAL_11]]#0 : (!fir.ref<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.box<!fir.type<_QMtypesTt3{r:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
diff --git a/flang/test/Lower/default-initialization.f90 b/flang/test/Lower/default-initialization.f90
index 7a6133452b3a25..f6e37d57f19eb4 100644
--- a/flang/test/Lower/default-initialization.f90
+++ b/flang/test/Lower/default-initialization.f90
@@ -22,9 +22,9 @@ module test_dinit
! CHECK-LABEL: func @_QMtest_dinitPlocal()
subroutine local
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFlocalEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: x
print *, x%i
end subroutine
@@ -56,9 +56,9 @@ subroutine local_alloc_comp
! CHECK-LABEL: func @_QMtest_dinitPresult() -> !fir.type<_QMtest_dinitTt{i:i32}>
function result()
! CHECK: %[[x:.*]] = fir.alloca !fir.type<_QMtest_dinitTt{i:i32}>
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFresultEresult_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t) :: result
end function
@@ -66,9 +66,9 @@ function result()
! CHECK-LABEL: func @_QMtest_dinitPintent_out(
! CHECK-SAME: %[[x:.*]]: !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
subroutine intent_out(x)
- ! CHECK: %[[xbox:.*]] = fir.embox %[[x]] : (!fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>) -> !fir.box<!fir.type<_QMtest_dinitTt{i:i32}>>
- ! CHECK: %[[xboxNone:.*]] = fir.convert %[[xbox]]
- ! CHECK: fir.call @_FortranAInitialize(%[[xboxNone]], %{{.*}}, %{{.*}}) {{.*}}: (!fir.box<none>, !fir.ref<i8>, i32) -> none
+ ! CHECK: %[[ADDR:.*]] = fir.address_of(@_QMtest_dinitFintent_outEx_globalinit) : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: %[[LOADED_VAL:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
+ ! CHECK: fir.store %[[LOADED_VAL]] to %[[x]] : !fir.ref<!fir.type<_QMtest_dinitTt{i:i32}>>
type(t), intent(out) :: x
end subroutine
diff --git a/flang/test/Lower/pointer-default-init.f90 b/flang/test/Lower/pointer-default-init.f90
index 0fb42683a3486b..0e97f3bea90024 100644
--- a/flang/test/Lower/pointer-default-init.f90
+++ b/flang/test/Lower/pointer-default-init.f90
@@ -38,7 +38,9 @@ subroutine test_local()
type(t) :: x
end subroutine
! CHECK-LABEL: func.func @_QPtest_local() {
-! CHECK: fir.call @_FortranAInitialize(
+! CHECK: %[[ADDR:.*]] = fir.address_of(@_QFtest_localEx_globalinit) : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ADDR]] : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
+! CHECK: fir.store %[[LOAD]] to {{.*}} : !fir.ref<!fir.type<_QMtestTt{i:i32,x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>
subroutine test_saved()
use test, only : t
|
For the following test case, here are the runtimes with/without this patch: Without patch: With patch: About 9 seconds of improvement in runtime.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall approach looks good to me, some comments inlined.
Can you also give some performance improvement number to backup/document this PR? [edit: just saw your comment above, thanks]
7e1f567
to
1b3ab16
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the update, few comments, the direction looks good to me.
@@ -67,7 +67,7 @@ bool hasDefaultInitialization(const Fortran::semantics::Symbol &sym); | |||
|
|||
/// Call default initialization runtime routine to initialize \p var. | |||
void defaultInitializeAtRuntime(Fortran::lower::AbstractConverter &converter, | |||
const Fortran::semantics::Symbol &sym, | |||
const Fortran::lower::pft::Variable &var, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can switch to Symbol here now and have the caller do the var.getSymbol()
.
converter.genType()
accepts Symbol too, and var.isTarget
can be replaced by symbol.getUltimate().attrs().test(Fortran::semantics::Attr::TARGET)
Using Symbol for helpers is better when that is sufficient because that enables using them in places where pft::Variable not accessible (in general, one should not create a "pft::Variable" by simply wrapping a Symbol, some analysis/invariants are needed to rule out it is an alias for instance).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation; yeah we would ideally try to avoid pft::Variable{sym}
.
mlir::StringAttr linkage = builder.createInternalLinkage(); | ||
cuf::DataAttributeAttr dataAttr = | ||
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), | ||
sym); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@clementval. I am not sure copying the CUDA data attributes from the local/dummy symbol that must be default initialized to the global makes sense. Are some attributes needed for the global if the dynamic initialization happens on the device, or will the global with the initial image be automatically cloned/mapped to the device if needed?
flang/lib/Lower/ConvertVariable.cpp
Outdated
}); | ||
} else if (!global) { | ||
global = builder.createGlobal(loc, symTy, globalName, linkage, | ||
mlir::Attribute{}, isConstant(sym), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isConstant(sym)
is what should be swapped with true
to get read only memory.
flang/lib/Lower/ConvertVariable.cpp
Outdated
!mlir::isa<fir::SequenceType>(symTy) && | ||
!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) && | ||
!sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { | ||
std::string globalName = converter.mangleName(*declTy->AsDerived()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using the derived makes sense to me, but some prefix/suffix would be nice (fir.TypeInfoOp ops already uses that has MLIR symbol name, better avoid conflicts here), as well as using the compiler generated name prefix (doGenerated()).
I would go for fir::NameUniquer::doGenerated(mangledName + kNameSeparator + kDerivedTypeInitSuffix)
where kDerivedTypeInitSuffix
can be defined as init
in Optimizer/Support/InternalNames.h.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I've pushed a new commit with these changes.
1b3ab16
to
d0f986a
Compare
Currently, all derived types are initialized through
_FortranAInitialize
, which is functionally correct, but bears poor runtime performance. This patch falls back on global initialization for "simpler" derived types to speed up the initialization.