From cb888971d233a53282c31f498b79b9d788a402d6 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 24 Nov 2022 19:24:21 +0000 Subject: [PATCH] [libc] Improve bcmp performance for aarch64 --- libc/src/string/memory_utils/bcmp_implementations.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h index 2e18ee81aaf6..7a7054b3376f 100644 --- a/libc/src/string/memory_utils/bcmp_implementations.h +++ b/libc/src/string/memory_utils/bcmp_implementations.h @@ -116,7 +116,7 @@ inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) { inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) { if (likely(count <= 32)) { if (unlikely(count >= 16)) { - return generic::Bcmp<16>::head_tail(p1, p2, count); + return aarch64::Bcmp<16>::head_tail(p1, p2, count); } switch (count) { case 0: @@ -147,15 +147,15 @@ inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) { } if (count <= 64) - return generic::Bcmp<32>::head_tail(p1, p2, count); + return aarch64::Bcmp<32>::head_tail(p1, p2, count); // Aligned loop if > 256, otherwise normal loop - if (count > 256) { - if (auto value = generic::Bcmp<32>::block(p1, p2)) + if (unlikely(count > 256)) { + if (auto value = aarch64::Bcmp<32>::block(p1, p2)) return value; align_to_next_boundary<16, Arg::P1>(p1, p2, count); } - return generic::Bcmp<32>::loop_and_tail(p1, p2, count); + return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count); } #endif // defined(LLVM_LIBC_ARCH_AARCH64)