mirror of https://github.com/Jittor/Jittor
add __launch_bounds__ for cuda kernel
This commit is contained in:
parent
8bb698c225
commit
fb873bc50e
|
@ -7,7 +7,7 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
__version__ = '1.1.6.4'
|
||||
__version__ = '1.1.6.5'
|
||||
from . import lock
|
||||
with lock.lock_scope():
|
||||
from . import compiler
|
||||
|
|
|
@ -292,7 +292,7 @@ void ParallelPass::run() {
|
|||
&new_func_call->before
|
||||
);
|
||||
} else {
|
||||
new_func_def->get_attr("dtype") = "__global__ void";
|
||||
new_func_def->get_attr("dtype") = "__launch_bounds__("+S(cuda_thread_num)+") __global__ void";
|
||||
new_tid_def.push_front("int thread_id = blockIdx.x * blockDim.x + threadIdx.x;");
|
||||
// cuda kernel launch
|
||||
auto& code = func_call_code;
|
||||
|
|
Loading…
Reference in New Issue