at master 2.4 kB view raw
1diff --git a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h 2index 7f05c2ad166..1632b595c4c 100644 3--- a/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h 4+++ b/aten/src/ATen/cpu/vec/sve/vec_bfloat16.h 5@@ -220,8 +220,12 @@ class Vectorized<BFloat16> { 6 Vectorized<BFloat16> le(const Vectorized<BFloat16>& other) const; 7 }; 8 9-inline std::tuple<Vectorized<float>, Vectorized<float>> convert_bfloat16_float( 10- const Vectorized<c10::BFloat16>& a) { 11+#if defined(__GNUC__) && __GNUC__ == 14 12+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE 13+__attribute__((optimize("no-tree-vectorize"))) 14+#endif 15+inline std::tuple<Vectorized<float>, Vectorized<float>> 16+convert_bfloat16_float(const Vectorized<c10::BFloat16>& a) { 17 static_assert( 18 Vectorized<c10::BFloat16>::size() == 2 * Vectorized<float>::size()); 19 auto zero = svreinterpret_bf16_f32(svdup_n_f32(0.0f)); 20diff --git a/aten/src/ATen/native/cpu/Activation.cpp b/aten/src/ATen/native/cpu/Activation.cpp 21index 52d5383e60f..00c9f4eb253 100644 22--- a/aten/src/ATen/native/cpu/Activation.cpp 23+++ b/aten/src/ATen/native/cpu/Activation.cpp 24@@ -26,6 +26,10 @@ namespace at::native { 25 26 namespace { 27 28+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__aarch64__) && !defined(__ARM_FEATURE_SVE) 29+// Workaround for gcc-14.2.0 ICE during RTL pass: expand when compiling for NEON 30+__attribute__((optimize("no-tree-vectorize"))) 31+#endif 32 static void log_sigmoid_cpu_kernel(TensorBase &output, TensorBase &buffer, const TensorBase &input) { 33 if (at::isReducedFloatingType(input.scalar_type())) { 34 AT_DISPATCH_REDUCED_FLOATING_TYPES(input.scalar_type(), "log_sigmoid_cpu", [&]() { 35diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp 36index 8ef0741e77a..8c94decfff0 100644 37--- a/aten/src/ATen/native/cpu/Unfold2d.cpp 38+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp 39@@ -169,6 +169,10 @@ static void unfolded2d_acc_channels_last( 40 41 /* note: due to write issues, this one cannot be parallelized as well as 42 * unfolded2d_copy */ 43+#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_FEATURE_SVE) && !defined(__ARM_FEATURE_BF16) 44+// Workaround for gcc-14.2.0 ICE during RTL pass: vregs when compiling for SVE without BF16 45+__attribute__((optimize("no-tree-vectorize"))) 46+#endif 47 void unfolded2d_acc_kernel( 48 ScalarType dtype, 49 void *finput_data,