[libc] Improve bcmp performance for aarch64
This commit is contained in:
parent
ae852750b3
commit
cb888971d2
|
@ -116,7 +116,7 @@ inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
|
||||||
inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
|
inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
|
||||||
if (likely(count <= 32)) {
|
if (likely(count <= 32)) {
|
||||||
if (unlikely(count >= 16)) {
|
if (unlikely(count >= 16)) {
|
||||||
return generic::Bcmp<16>::head_tail(p1, p2, count);
|
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
|
||||||
}
|
}
|
||||||
switch (count) {
|
switch (count) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -147,15 +147,15 @@ inline_bcmp_aarch64(CPtr p1, CPtr p2, size_t count) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count <= 64)
|
if (count <= 64)
|
||||||
return generic::Bcmp<32>::head_tail(p1, p2, count);
|
return aarch64::Bcmp<32>::head_tail(p1, p2, count);
|
||||||
|
|
||||||
// Aligned loop if > 256, otherwise normal loop
|
// Aligned loop if > 256, otherwise normal loop
|
||||||
if (count > 256) {
|
if (unlikely(count > 256)) {
|
||||||
if (auto value = generic::Bcmp<32>::block(p1, p2))
|
if (auto value = aarch64::Bcmp<32>::block(p1, p2))
|
||||||
return value;
|
return value;
|
||||||
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
|
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
|
||||||
}
|
}
|
||||||
return generic::Bcmp<32>::loop_and_tail(p1, p2, count);
|
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
|
||||||
}
|
}
|
||||||
#endif // defined(LLVM_LIBC_ARCH_AARCH64)
|
#endif // defined(LLVM_LIBC_ARCH_AARCH64)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue