[libc] Improve bcmp performance for aarch64

This commit is contained in:
Guillaume Chatelet 2022-11-24 19:24:21 +00:00
parent ae852750b3
commit cb888971d2
1 changed files with 5 additions and 5 deletions

View File

@ -116,7 +116,7 @@ inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) { inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
if (likely(count <= 32)) { if (likely(count <= 32)) {
if (unlikely(count >= 16)) { if (unlikely(count >= 16)) {
return generic::Bcmp<16>::head_tail(p1, p2, count); return aarch64::Bcmp<16>::head_tail(p1, p2, count);
} }
switch (count) { switch (count) {
case 0: case 0:
@ -147,15 +147,15 @@ inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
} }
if (count <= 64) if (count <= 64)
return generic::Bcmp<32>::head_tail(p1, p2, count); return aarch64::Bcmp<32>::head_tail(p1, p2, count);
// Aligned loop if > 256, otherwise normal loop // Aligned loop if > 256, otherwise normal loop
if (count > 256) { if (unlikely(count > 256)) {
if (auto value = generic::Bcmp<32>::block(p1, p2)) if (auto value = aarch64::Bcmp<32>::block(p1, p2))
return value; return value;
align_to_next_boundary<16, Arg::P1>(p1, p2, count); align_to_next_boundary<16, Arg::P1>(p1, p2, count);
} }
return generic::Bcmp<32>::loop_and_tail(p1, p2, count); return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
} }
#endif // defined(LLVM_LIBC_ARCH_AARCH64) #endif // defined(LLVM_LIBC_ARCH_AARCH64)