forked from OSchip/llvm-project
OpenMP RTL cleanup: two PAUSEs per spin loop iteration replaced with single one
Differential Revision: https://reviews.llvm.org/D35490 llvm-svn: 308423
This commit is contained in:
parent
b05a55787a
commit
c7476ed0be
|
|
@ -1040,7 +1040,11 @@ extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
|
|||
#if KMP_ARCH_X86
|
||||
extern void __kmp_x86_pause(void);
|
||||
#elif KMP_MIC
|
||||
static void __kmp_x86_pause(void) { _mm_delay_32(100); }
|
||||
// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
|
||||
// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing
|
||||
// the delay from 100 to 300 showed even better performance than double PAUSE
|
||||
// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
|
||||
static void __kmp_x86_pause(void) { _mm_delay_32(300); }
|
||||
#else
|
||||
static void __kmp_x86_pause(void) { _mm_pause(); }
|
||||
#endif
|
||||
|
|
@ -1076,7 +1080,7 @@ static void __kmp_x86_pause(void) { _mm_pause(); }
|
|||
KMP_CPU_PAUSE(); \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
KMP_YIELD(cond); \
|
||||
__kmp_yield(cond); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
}
|
||||
|
|
@ -1085,7 +1089,7 @@ static void __kmp_x86_pause(void) { _mm_pause(); }
|
|||
KMP_CPU_PAUSE(); \
|
||||
(count) -= 2; \
|
||||
if (!(count)) { \
|
||||
KMP_YIELD(1); \
|
||||
__kmp_yield(1); \
|
||||
(count) = __kmp_yield_next; \
|
||||
} \
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ enum flag_type {
|
|||
*/
|
||||
template <typename P> class kmp_flag {
|
||||
volatile P
|
||||
*loc; /**< Pointer to the flag storage that is modified by another thread
|
||||
*loc; /**< Pointer to the flag storage that is modified by another thread
|
||||
*/
|
||||
flag_type t; /**< "Type" of the flag in loc */
|
||||
public:
|
||||
|
|
@ -225,11 +225,14 @@ __kmp_wait_template(kmp_info_t *this_thr, C *flag,
|
|||
|
||||
// If we are oversubscribed, or have waited a bit (and
|
||||
// KMP_LIBRARY=throughput), then yield
|
||||
KMP_YIELD(oversubscribed);
|
||||
// TODO: Should it be number of cores instead of thread contexts? Like:
|
||||
// KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
|
||||
// Need performance improvement data to make the change...
|
||||
KMP_YIELD_SPIN(spins);
|
||||
if (oversubscribed) {
|
||||
KMP_YIELD(1);
|
||||
} else {
|
||||
KMP_YIELD_SPIN(spins);
|
||||
}
|
||||
// Check if this thread was transferred from a team
|
||||
// to the thread pool (or vice-versa) while spinning.
|
||||
in_pool = !!TCR_4(this_thr->th.th_in_pool);
|
||||
|
|
|
|||
Loading…
Reference in New Issue