From 2c498ee497837a2a8e8a0f285306d37528077ef4 Mon Sep 17 00:00:00 2001 From: Lixun Zhang Date: Sat, 5 Oct 2024 09:40:39 -0500 Subject: [PATCH] [AMD] Add back "Hint compiler to preload kernel args" (#4830) This reverts ad9afc8 since the issue was fixed by https://github.com/llvm/llvm-project/pull/104743 --- third_party/amd/backend/compiler.py | 5 +++++ third_party/amd/python/triton_amd.cc | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/third_party/amd/backend/compiler.py b/third_party/amd/backend/compiler.py index 61a782f334d0..6450d582457e 100644 --- a/third_party/amd/backend/compiler.py +++ b/third_party/amd/backend/compiler.py @@ -265,6 +265,11 @@ def make_llir(src, metadata, options): denormal_mode = "preserve-sign" if options.allow_flush_denorm else "ieee" fns[0].add_fn_attr("denormal-fp-math-f32", denormal_mode) + # Hint the compiler that we'd like the firmware to set the kernel arguments + # to user SGPRs so that the kernel does not need to s_load its arguments + # from memory. + amd.set_all_fn_arg_inreg(fns[0]) + if options.extern_libs: paths = [path for (name, path) in options.extern_libs if amd.need_extern_lib(llvm_mod, name)] llvm.link_extern_libs(llvm_mod, paths) diff --git a/third_party/amd/python/triton_amd.cc b/third_party/amd/python/triton_amd.cc index 84558ea12e3b..5b5cca5b053e 100644 --- a/third_party/amd/python/triton_amd.cc +++ b/third_party/amd/python/triton_amd.cc @@ -257,4 +257,13 @@ void init_triton_amd(py::module &&m) { return false; } }); + + m.def("set_all_fn_arg_inreg", [](llvm::Function *fn) { + for (llvm::Argument &arg : fn->args()) { + // Check for incompatible attributes. + if (arg.hasByRefAttr() || arg.hasNestAttr()) + continue; + arg.addAttr(llvm::Attribute::InReg); + } + }); }