mirror of https://github.com/Jittor/Jittor
Merge branch 'master' of https://github.com/Jittor/jittor
This commit is contained in:
commit
398746044a
|
@ -26,3 +26,4 @@ venv/
|
|||
python/jittor.egg-info
|
||||
dist/
|
||||
!doc/source/*
|
||||
__data__
|
|
@ -25,3 +25,4 @@ python/jittor.egg-info
|
|||
dist/
|
||||
!doc/source/*
|
||||
core
|
||||
__data__
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
exclude __data__
|
||||
exclude __pycache__
|
27
README.cn.md
27
README.cn.md
|
@ -217,13 +217,13 @@ jt.flags.use_cuda = 1
|
|||
```
|
||||
|
||||
|
||||
### 可选步骤五:进行完整测试
|
||||
### 可选步骤五:测试训练Resnet18
|
||||
|
||||
|
||||
要检查Jittor的完整性,您可以运行完整的测试。
|
||||
要检查Jittor的完整性,您可以运行Resnet18训练测试。
|
||||
|
||||
```bash
|
||||
python3.7 -m jittor.test -v
|
||||
python3.7 -m jittor.test.test_resnet
|
||||
```
|
||||
|
||||
如果这些测试失败,请为我们报告错误,我们十分欢迎您为Jittor做出贡献^ _ ^
|
||||
|
@ -360,10 +360,29 @@ Jittor还很年轻。 它可能存在错误和问题。 请在我们的错误跟
|
|||
|
||||
|
||||
|
||||
QQ 群:761222083
|
||||
|
||||
|
||||
|
||||
## 团队
|
||||
|
||||
|
||||
Jittor目前由来自[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)的梁盾,杨国烨,杨国炜,周文洋和国孟昊等博士生维护。 如果您也对Jittor感兴趣并希望对其进行改进,请加入我们!
|
||||
Jittor目前由[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)维护。 如果您也对Jittor感兴趣并希望对其进行改进,请加入我们!
|
||||
|
||||
|
||||
## 引用
|
||||
|
||||
```
|
||||
@article{hu2020jittor,
|
||||
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
|
||||
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
|
||||
journal={Information Sciences},
|
||||
volume={63},
|
||||
number={222103},
|
||||
pages={1--222103},
|
||||
year={2020}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## 版权声明
|
||||
|
|
28
README.md
28
README.md
|
@ -210,14 +210,14 @@ import jittor as jt
|
|||
jt.flags.use_cuda = 1
|
||||
```
|
||||
|
||||
### Optional Step 5: Run full tests
|
||||
### Optional Step 5: Test Resnet18 training
|
||||
|
||||
|
||||
To check the integrity of Jittor, you can run full tests.
|
||||
To check the integrity of Jittor, you can run Resnet18 training test.
|
||||
|
||||
|
||||
```bash
|
||||
python3.7 -m jittor.test -v
|
||||
python3.7 -m jittor.test.test_resnet
|
||||
```
|
||||
if those tests are failed, please report bugs for us, and feel free to contribute ^_^
|
||||
|
||||
|
@ -353,12 +353,32 @@ Email: jittor@qq.com
|
|||
|
||||
File an issue: https://github.com/Jittor/jittor/issues
|
||||
|
||||
QQ Group: 761222083
|
||||
|
||||
|
||||
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/news/2020-12-8-21-19-1_2_2/fig4.png" width="200"/>
|
||||
|
||||
## The Team
|
||||
|
||||
|
||||
Jittor is currently maintained by Dun Liang, Guo-Ye Yang, Guo-Wei Yang, Wen-Yang Zhou and Meng-Hao Guo etc. from the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
|
||||
Jittor is currently maintained by the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
|
||||
|
||||
|
||||
## Citation
|
||||
|
||||
|
||||
```
|
||||
@article{hu2020jittor,
|
||||
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
|
||||
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
|
||||
journal={Information Sciences},
|
||||
volume={63},
|
||||
number={222103},
|
||||
pages={1--222103},
|
||||
year={2020}
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
|
||||
|
|
|
@ -223,7 +223,7 @@ sudo apt install python3.7 python3.7-dev
|
|||
|
||||
The whole framework is compiled Just-in-time. Let's install jittor via pip
|
||||
|
||||
整个框架是及时编译的。 让我们通过pip安装jittor
|
||||
整个框架是即时编译的。 让我们通过pip安装jittor
|
||||
|
||||
```bash
|
||||
git clone https://github.com/Jittor/jittor.git
|
||||
|
@ -263,16 +263,16 @@ import jittor as jt
|
|||
jt.flags.use_cuda = 1
|
||||
```
|
||||
|
||||
### Optional Step 5: Run full tests
|
||||
### Optional Step 5: Test Resnet18 training
|
||||
|
||||
### 可选步骤五:进行完整测试
|
||||
### 可选步骤五:测试训练Resnet18
|
||||
|
||||
To check the integrity of Jittor, you can run full tests.
|
||||
To check the integrity of Jittor, you can run Resnet18 training test.
|
||||
|
||||
要检查Jittor的完整性,您可以运行完整的测试。
|
||||
要检查Jittor的完整性,您可以运行Resnet18训练测试。
|
||||
|
||||
```bash
|
||||
python3.7 -m jittor.test -v
|
||||
python3.7 -m jittor.test.test_resnet
|
||||
```
|
||||
if those tests are failed, please report bugs for us, and feel free to contribute ^_^
|
||||
|
||||
|
@ -453,13 +453,35 @@ Email: jittor@qq.com
|
|||
|
||||
File an issue: https://github.com/Jittor/jittor/issues
|
||||
|
||||
QQ Group: 761222083
|
||||
|
||||
QQ 群:761222083
|
||||
|
||||
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/news/2020-12-8-21-19-1_2_2/fig4.png" width="200"/>
|
||||
|
||||
## The Team
|
||||
|
||||
## 团队
|
||||
|
||||
Jittor is currently maintained by Dun Liang, Guo-Ye Yang, Guo-Wei Yang, Wen-Yang Zhou and Meng-Hao Guo etc. from the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
|
||||
Jittor is currently maintained by the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
|
||||
|
||||
Jittor目前由来自[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)的梁盾,杨国烨,杨国炜,周文洋和国孟昊等博士生维护。 如果您也对Jittor感兴趣并希望对其进行改进,请加入我们!
|
||||
Jittor目前由[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)维护。 如果您也对Jittor感兴趣并希望对其进行改进,请加入我们!
|
||||
|
||||
## Citation
|
||||
|
||||
## 引用
|
||||
|
||||
```
|
||||
@article{hu2020jittor,
|
||||
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
|
||||
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
|
||||
journal={Information Sciences},
|
||||
volume={63},
|
||||
number={222103},
|
||||
pages={1--222103},
|
||||
year={2020}
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include "cub_where_op.h"
|
||||
#ifdef JIT_cuda
|
||||
#include "executor.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include "helper_cuda.h"
|
||||
#include <assert.h>
|
||||
#include <executor.h>
|
||||
#include <cub/cub.cuh>
|
||||
|
@ -56,17 +58,6 @@ struct NonZeroOp
|
|||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct ConvertOp
|
||||
{
|
||||
const T div;
|
||||
const T dim_size;
|
||||
ConvertOp(T _div,T dim_size): div(_div),dim_size(dim_size){}
|
||||
__host__ __device__ __forceinline__ T operator()(const T& val) const {
|
||||
return (val/div) % dim_size;
|
||||
}
|
||||
};
|
||||
|
||||
__global__ static void where_kernel(
|
||||
int n,
|
||||
To* input
|
||||
|
@ -90,30 +81,25 @@ void CubWhereOp::jit_run(){
|
|||
int N = cond->num;
|
||||
size_t temp_storage_bytes=0;
|
||||
size_t num_nonzeros_allocation;
|
||||
auto num_nonzeros = exe.allocator->alloc(sizeof(int), num_nonzeros_allocation);
|
||||
cub::TransformInputIterator<bool, NonZeroOp<Ti>, Ti*> itr(cond->ptr<Ti>(), NonZeroOp<Ti>());
|
||||
cub::DeviceReduce::Sum(nullptr, temp_storage_bytes, itr, (int *)num_nonzeros, N);
|
||||
|
||||
auto num_nonzeros = exe.allocator->alloc(sizeof(To), num_nonzeros_allocation);
|
||||
|
||||
size_t temp_storage_allocation;
|
||||
auto temp_storage = exe.allocator->alloc(temp_storage_bytes, temp_storage_allocation);
|
||||
|
||||
cub::DeviceReduce::Sum(temp_storage, temp_storage_bytes, itr, (int *)num_nonzeros, N);
|
||||
exe.allocator->free(temp_storage, temp_storage_bytes, temp_storage_allocation);
|
||||
|
||||
int num_nonzeros_h;
|
||||
checkCudaErrors(cudaMemcpy(&num_nonzeros_h, num_nonzeros, sizeof(int), cudaMemcpyDeviceToHost));
|
||||
void* temp_storage;
|
||||
|
||||
To* out_temp = outs[0]->ptr<To>();
|
||||
|
||||
@for(i, 0, NDIM, outs[@i]->set_shape({num_nonzeros_h});)
|
||||
|
||||
cub::CountingInputIterator<To> counting_itr(0);
|
||||
cub::TransformInputIterator<bool, NonZeroOp<Ti>, Ti*> itr(cond->ptr<Ti>(), NonZeroOp<Ti>());
|
||||
temp_storage_bytes = 0;
|
||||
cub::DeviceSelect::Flagged(nullptr, temp_storage_bytes, counting_itr, itr,out_temp, (int*)num_nonzeros, N);
|
||||
checkCudaErrors(cub::DeviceSelect::Flagged(nullptr, temp_storage_bytes, counting_itr, itr, out_temp, (To*)num_nonzeros, N));
|
||||
temp_storage = exe.allocator->alloc(temp_storage_bytes, temp_storage_allocation);
|
||||
cub::DeviceSelect::Flagged(temp_storage, temp_storage_bytes, counting_itr, itr,out_temp, (int*)num_nonzeros, N);
|
||||
checkCudaErrors(cub::DeviceSelect::Flagged(temp_storage, temp_storage_bytes, counting_itr, itr,out_temp, (To*)num_nonzeros, N));
|
||||
exe.allocator->free(temp_storage, temp_storage_bytes, temp_storage_allocation);
|
||||
|
||||
To num_nonzeros_h;
|
||||
cudaMemcpy(&num_nonzeros_h, num_nonzeros, sizeof(To), cudaMemcpyDeviceToHost);
|
||||
@for(i, 0, NDIM, outs[@i]->set_shape({num_nonzeros_h});)
|
||||
|
||||
if (num_nonzeros_h > 0 && NDIM > 1) {
|
||||
int thread_num = std::min(1024, num_nonzeros_h);
|
||||
int block_num = std::max(1, num_nonzeros_h/1024);
|
||||
|
|
|
@ -39,11 +39,17 @@ VarPtr CublasBatchedMatmulOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
|||
// a [b,n,m] b [b,m,k], c[b,n,k]
|
||||
// c = a*b
|
||||
if (v_index == 0) {
|
||||
// da = dc*b^T
|
||||
return make_cublas_batched_matmul(dout, b, trans_a^0, trans_b^1);
|
||||
if (trans_a)
|
||||
return make_cublas_batched_matmul(b, dout, trans_b, 1);
|
||||
else
|
||||
// da = dc*b^T
|
||||
return make_cublas_batched_matmul(dout, b, 0, trans_b^1);
|
||||
} else {
|
||||
// db = a^T*dc
|
||||
return make_cublas_batched_matmul(a, dout, trans_a^1, trans_b^0);
|
||||
if (trans_b)
|
||||
return make_cublas_batched_matmul(dout, a, 1, trans_a);
|
||||
else
|
||||
// db = a^T*dc
|
||||
return make_cublas_batched_matmul(a, dout, trans_a^1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@
|
|||
|
||||
// CUDA and CUBLAS functions
|
||||
#include <helper_functions.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
|
||||
#ifndef min
|
||||
#define min(a,b) ((a < b) ? a : b)
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cublas.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
|
||||
#ifdef CUBLAS_API_H_
|
||||
// cuBLAS API errors
|
||||
|
|
|
@ -15,8 +15,12 @@ namespace jittor {
|
|||
|
||||
extern cudnnHandle_t cudnn_handle;
|
||||
extern int max_cache_size;
|
||||
extern float max_workspace_ratio;
|
||||
|
||||
// @pyjt(set_algorithm_cache_size)
|
||||
void set_algorithm_cache_size(int size);
|
||||
|
||||
// @pyjt(set_max_workspace_ratio)
|
||||
void set_max_workspace_ratio(float64 ratio);
|
||||
|
||||
} // jittor
|
||||
|
|
|
@ -198,7 +198,7 @@ void CudnnConvBackwardWOp::jit_run() {
|
|||
size_t sz;
|
||||
cudnnStatus_t ret = cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_, cudnnIdesc, cudnnOdesc, cudnnConvDesc, cudnnFdesc, algos[i], &sz);
|
||||
// continue if use too much workspace
|
||||
if (sz*4 > mem_info.total_cuda_ram) continue;
|
||||
if (sz > mem_info.total_cuda_ram * max_workspace_ratio) continue;
|
||||
if (CUDNN_STATUS_SUCCESS == ret && sz > max_ws_size) max_ws_size = sz;
|
||||
}
|
||||
size_t allocation;
|
||||
|
|
|
@ -178,7 +178,7 @@ void CudnnConvBackwardXOp::jit_run() {
|
|||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD,
|
||||
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED
|
||||
};
|
||||
int num_algos = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT;
|
||||
int num_algos = CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT;
|
||||
int perf_count;
|
||||
cudnnConvolutionBwdDataAlgoPerf_t perf_results[num_algos];
|
||||
cudnnConvolutionBwdDataAlgo_t algo;
|
||||
|
@ -199,7 +199,7 @@ void CudnnConvBackwardXOp::jit_run() {
|
|||
size_t sz;
|
||||
cudnnStatus_t ret = cudnnGetConvolutionBackwardDataWorkspaceSize(handle_, cudnnFdesc, cudnnOdesc, cudnnConvDesc, cudnnIdesc, algos[i], &sz);
|
||||
// continue if use too much workspace
|
||||
if (sz*4 > mem_info.total_cuda_ram) continue;
|
||||
if (sz > mem_info.total_cuda_ram * max_workspace_ratio) continue;
|
||||
if (CUDNN_STATUS_SUCCESS == ret && sz > max_ws_size) max_ws_size = sz;
|
||||
}
|
||||
size_t allocation;
|
||||
|
|
|
@ -203,7 +203,7 @@ void CudnnConvOp::jit_run() {
|
|||
handle_, cudnnIdesc, cudnnFdesc, cudnnConvDesc,
|
||||
cudnnOdesc, algos[i], &sz);
|
||||
// continue if use too much workspace
|
||||
if (sz*4 > mem_info.total_cuda_ram) continue;
|
||||
if (sz > mem_info.total_cuda_ram * max_workspace_ratio) continue;
|
||||
if (CUDNN_STATUS_SUCCESS == ret && sz > max_ws_size) max_ws_size = sz;
|
||||
}
|
||||
size_t allocation;
|
||||
|
|
|
@ -65,7 +65,7 @@
|
|||
#include <assert.h>
|
||||
|
||||
#include <cudnn.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "fp16_dev.h"
|
||||
#include "fp16_emu.h"
|
||||
|
||||
|
|
|
@ -10,11 +10,16 @@ namespace jittor {
|
|||
|
||||
cudnnHandle_t cudnn_handle;
|
||||
int max_cache_size = 100;
|
||||
float max_workspace_ratio = 0.25;
|
||||
|
||||
void set_algorithm_cache_size(int size) {
|
||||
max_cache_size = size;
|
||||
}
|
||||
|
||||
void set_max_workspace_ratio(float64 ratio) {
|
||||
max_workspace_ratio = ratio;
|
||||
}
|
||||
|
||||
struct cudnn_initer {
|
||||
|
||||
inline cudnn_initer() {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <cudnn.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
|
||||
const char *_cudaGetErrorEnum(cudnnStatus_t error) {
|
||||
return cudnnGetErrorString(error);
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "init.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "curand_random_op.h"
|
||||
#include "curand_warper.h"
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cublas.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include <curand.h>
|
||||
|
||||
// cuRAND API errors
|
||||
|
|
|
@ -6,16 +6,13 @@
|
|||
#include "var.h"
|
||||
#include "cutt_transpose_op.h"
|
||||
#include "ops/op_register.h"
|
||||
#include <iostream>
|
||||
|
||||
#ifdef JIT
|
||||
#include "cutt.h"
|
||||
#endif
|
||||
#include "cutt_warper.h"
|
||||
#include "misc/stack_vector.h"
|
||||
#include "helper_cuda.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
static auto make_transpose = get_op_info("cutt_transpose")
|
||||
.get_constructor<VarPtr, Var*, NanoVector>();
|
||||
|
||||
|
@ -58,52 +55,49 @@ VarPtr CuttTransposeOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
|||
return make_transpose(dout, reverse);
|
||||
}
|
||||
|
||||
void CuttTransposeOp::jit_prepare(JK& jk) {
|
||||
jk << _CS("[Tx:") << x->dtype();
|
||||
jk << _CS("][DIM=") << JK::hex1(axes.size());
|
||||
for (uint i=0; i<axes.size(); i++)
|
||||
jk << _CS("][AXES") << JK::hex1(axes[i]) << '=' << JK::hex1(i);
|
||||
jk << ']';
|
||||
}
|
||||
unordered_map<string, unsigned int> cutt_plan_cache;
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cuda
|
||||
|
||||
extern unordered_map<string, unsigned int> cutt_plan_cache;
|
||||
|
||||
void CuttTransposeOp::jit_run() {
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
vector<int> permutation, permutation2;
|
||||
vector<int> y_shape;
|
||||
vector<int> x_shape;
|
||||
@for(i, 0, DIM, permutation.push_back(DIM-1-AXES@i);)
|
||||
@for(i, 0, DIM, permutation2.push_back(permutation[DIM-1-@i@@]);)
|
||||
std::vector<int> reverse;
|
||||
reverse.reserve(permutation2.size());
|
||||
for (uint i=0; i<permutation2.size(); i++)
|
||||
reverse[permutation2[i]] = i;
|
||||
|
||||
@for(i, 0, DIM, x_shape.push_back(x->shape[DIM-1-@i@@]);)
|
||||
|
||||
void CuttTransposeOp::run() {
|
||||
auto* __restrict__ xp = x->mem_ptr;
|
||||
auto* __restrict__ yp = y->mem_ptr;
|
||||
StackVector<int> x_shape;
|
||||
StackVector<int> new_shape, new_axes, trans, reverse;
|
||||
int dim = x->shape.size();
|
||||
for (int i=0; i<dim; i++) {
|
||||
trans[i] = new_shape.size();
|
||||
if (x->shape[i] != 1)
|
||||
new_shape.push_back(x->shape[i]);
|
||||
}
|
||||
for (int i = 0; i < dim; ++i) {
|
||||
if (x->shape[axes[i]] != 1) {
|
||||
new_axes.push_back(trans[axes[i]]);
|
||||
}
|
||||
}
|
||||
dim = new_shape.size();
|
||||
for (int i=0; i<dim; i++)
|
||||
reverse[i] = dim-1-new_axes[dim-1-i];
|
||||
for (int i=0; i<dim; i++)
|
||||
x_shape[i] = new_shape[dim-1-i];
|
||||
if (dim == 1) {
|
||||
checkCudaErrors(cudaMemcpyAsync(yp, xp, x->size, cudaMemcpyDefault, 0));
|
||||
return;
|
||||
}
|
||||
jk.clear();
|
||||
jk << @DIM << ",";
|
||||
for (uint i=0; i<@DIM; i++) jk << x_shape[i] << ",";
|
||||
for (uint i=0; i<@DIM; i++) jk << reverse[i] << ",";
|
||||
jk << sizeof(Tx) << ".";
|
||||
jk << dim << ',';
|
||||
for (int i=0; i<dim; i++) jk << x_shape[i] << ',';
|
||||
for (int i=0; i<dim; i++) jk << reverse[i] << ',';
|
||||
jk << x->dtype().dsize() << '.';
|
||||
auto iter = cutt_plan_cache.find(jk.to_string());
|
||||
LOGvvv << "Run cutt_transpose with key:" << jk.to_string();
|
||||
|
||||
if (iter!=cutt_plan_cache.end()){
|
||||
cuttExecute(iter->second, xp, yp);
|
||||
} else {
|
||||
cuttHandle plan;
|
||||
cuttPlan(&plan, @DIM, x_shape.data(), reverse.data(), sizeof(Tx), 0);
|
||||
cuttPlan(&plan, dim, x_shape.data(), reverse.data(), x->dtype().dsize(), 0);
|
||||
cutt_plan_cache[jk.to_string()] = plan;
|
||||
cuttExecute(plan, xp, yp);
|
||||
}
|
||||
}
|
||||
#endif // JIT_cuda
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -19,7 +19,7 @@ struct CuttTransposeOp : Op {
|
|||
const char* name() const override { return "cutt_transpose"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
void infer_shape() override;
|
||||
DECLARE_jit_run;
|
||||
void run() override;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -101,11 +101,17 @@ const char *_cudaGetErrorEnum(NppStatus error);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
namespace jittor {
|
||||
extern bool peek_logged;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void peek(T result, char const *const func, const char *const file,
|
||||
int const line) {
|
||||
if (result) {
|
||||
// DEVICE_RESET
|
||||
if (jittor::peek_logged) return;
|
||||
jittor::peek_logged = 1;
|
||||
LOGe << "Peek CUDA error at" << file >> ":" >> line << " code="
|
||||
>> static_cast<unsigned int>(result) >> "(" << _cudaGetErrorEnum(result) << ")"
|
||||
<< func;
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
#include <cuda_runtime.h>
|
||||
#include <nccl.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
// These are CUDA Helper functions for initialization and error checking
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
|
||||
|
||||
#ifdef _CUFFT_H_
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
__version__ = '1.2.1.2'
|
||||
__version__ = '1.2.2.5'
|
||||
from . import lock
|
||||
with lock.lock_scope():
|
||||
ori_int = int
|
||||
|
@ -33,9 +33,38 @@ from collections import OrderedDict
|
|||
from collections.abc import Sequence, Mapping
|
||||
import types
|
||||
import pickle
|
||||
import sys
|
||||
import hashlib
|
||||
import sys, os
|
||||
import traceback
|
||||
|
||||
|
||||
def safepickle(obj, path):
|
||||
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
|
||||
checksum = hashlib.sha1(s).digest()
|
||||
s += bytes(checksum)
|
||||
s += b"HCAJSLHD"
|
||||
with open(path, 'wb') as f:
|
||||
f.write(s)
|
||||
|
||||
def safeunpickle(path):
|
||||
if path.startswith("jittorhub://"):
|
||||
path = path.replace("jittorhub://", "https://cg.cs.tsinghua.edu.cn/jittor/assets/build/checkpoints/")
|
||||
if path.startswith("https:") or path.startswith("http:"):
|
||||
base = path.split("/")[-1]
|
||||
fname = os.path.join(compiler.ck_path, base)
|
||||
from jittor.utils.misc import download_url_to_local
|
||||
download_url_to_local(path, base, compiler.ck_path, None)
|
||||
path = fname
|
||||
with open(path, "rb") as f:
|
||||
s = f.read()
|
||||
if not s.endswith(b"HCAJSLHD"):
|
||||
return pickle.loads(s)
|
||||
checksum = s[-28:-8]
|
||||
s = s[:-28]
|
||||
if hashlib.sha1(s).digest() != checksum:
|
||||
raise ValueError("Pickle checksum does not match! path: "+path)
|
||||
return pickle.loads(s)
|
||||
|
||||
class _call_no_record_scope:
|
||||
def __enter__(self): pass
|
||||
def __exit__(self, *exc): pass
|
||||
|
@ -92,6 +121,7 @@ class log_capture_scope(_call_no_record_scope):
|
|||
print(logs)
|
||||
"""
|
||||
def __init__(self, **jt_flags):
|
||||
jt_flags["use_parallel_op_compiler"] = 0
|
||||
self.fs = flag_scope(**jt_flags)
|
||||
|
||||
def __enter__(self):
|
||||
|
@ -435,8 +465,15 @@ def display_memory_info():
|
|||
core.display_memory_info(fileline)
|
||||
|
||||
def load(path):
|
||||
pkl_file = open(path, 'rb')
|
||||
model_dict = pickle.load(pkl_file)
|
||||
if path.endswith(".pth"):
|
||||
try:
|
||||
dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
except:
|
||||
raise RuntimeError("pytorch need to be installed when load pth format.")
|
||||
model_dict = torch.load(path, map_location=torch.device('cpu'))
|
||||
else:
|
||||
model_dict = safeunpickle(path)
|
||||
return model_dict
|
||||
|
||||
def _uniq(x):
|
||||
|
@ -559,6 +596,21 @@ class Module:
|
|||
return ret
|
||||
self.__class__.__call__ = new_call
|
||||
|
||||
def register_pre_forward_hook(self, func):
|
||||
cls = self.__class__
|
||||
self.__fhook2__ = func
|
||||
if hasattr(cls, "__hooked2__"):
|
||||
return
|
||||
cls.__hooked2__ = True
|
||||
origin_call = cls.__call__
|
||||
def new_call(self, *args, **kw):
|
||||
if hasattr(self, "__fhook2__"):
|
||||
if len(kw):
|
||||
self.__fhook2__(self, args, kw)
|
||||
else:
|
||||
self.__fhook2__(self, args)
|
||||
return origin_call(self, *args, **kw)
|
||||
self.__class__.__call__ = new_call
|
||||
|
||||
def children(self):
|
||||
cd = []
|
||||
|
@ -631,20 +683,10 @@ class Module:
|
|||
params_dict = {}
|
||||
for p in params:
|
||||
params_dict[p.name()] = p.data
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(params_dict, f, pickle.HIGHEST_PROTOCOL)
|
||||
safepickle(params_dict, path)
|
||||
|
||||
def load(self, path):
|
||||
if path.endswith(".pth"):
|
||||
try:
|
||||
dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
except:
|
||||
raise RuntimeError("pytorch need to be installed when load pth format.")
|
||||
self.load_parameters(torch.load(path, map_location=torch.device('cpu')))
|
||||
return
|
||||
with open(path, 'rb') as f:
|
||||
self.load_parameters(pickle.load(f))
|
||||
self.load_parameters(load(path))
|
||||
|
||||
def eval(self):
|
||||
def callback(parents, k, v, n):
|
||||
|
@ -789,6 +831,11 @@ can also be None)::
|
|||
def dfs(self, parents, k, callback, callback_leave=None):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def apply(cls, *args, **kw):
|
||||
func = cls()
|
||||
return func(*args, **kw)
|
||||
|
||||
|
||||
def make_module(func, exec_n_args=1):
|
||||
class MakeModule(Module):
|
||||
|
@ -864,8 +911,6 @@ def size(v, dim=None):
|
|||
return v.shape[dim]
|
||||
Var.size = size
|
||||
|
||||
def item(v):
|
||||
return v.data.item()
|
||||
|
||||
def to_int(v):
|
||||
dtype = str(v.dtype)
|
||||
|
@ -882,11 +927,15 @@ def to_bool(v):
|
|||
assert dtype.startswith("int") or dtype=="bool"
|
||||
return ori_bool(v.item())
|
||||
|
||||
Var.item = item
|
||||
Var.__int__ = to_int
|
||||
Var.__float__ = to_float
|
||||
Var.__bool__ = to_bool
|
||||
|
||||
def format(v, spec):
|
||||
return v.item().__format__(spec)
|
||||
Var.__format__ = format
|
||||
|
||||
|
||||
int = int32
|
||||
Var.int = Var.int32
|
||||
float = float32
|
||||
|
|
|
@ -78,29 +78,37 @@ def setup_mkl():
|
|||
|
||||
|
||||
def install_cub(root_folder):
|
||||
url = "https://github.com/NVlabs/cub/archive/v1.8.0.tar.gz"
|
||||
filename = "cub-1.8.0.tgz"
|
||||
url = "https://github.com/NVIDIA/cub/archive/1.11.0.tar.gz"
|
||||
filename = "cub-1.11.0.tgz"
|
||||
md5 = "97196a885598e40592100e1caaf3d5ea"
|
||||
fullname = os.path.join(root_folder, filename)
|
||||
dirname = os.path.join(root_folder, filename.replace(".tgz",""))
|
||||
|
||||
if not os.path.isfile(os.path.join(dirname, "examples", "test")):
|
||||
LOG.i("Downloading cub...")
|
||||
download_url_to_local(url, filename, root_folder, "9203ea2499b56782601fddf8a12e9b08")
|
||||
download_url_to_local(url, filename, root_folder, md5)
|
||||
import tarfile
|
||||
|
||||
with tarfile.open(fullname, "r") as tar:
|
||||
tar.extractall(root_folder)
|
||||
assert 0 == os.system(f"cd {dirname}/examples && "
|
||||
f"{nvcc_path} device/example_device_radix_sort.cu -O2 -I.. -o test")
|
||||
f"{nvcc_path} device/example_device_radix_sort.cu -O2 -I.. -std=c++14 -o test")
|
||||
if core.get_device_count():
|
||||
assert 0 == os.system(f"cd {dirname}/examples && ./test")
|
||||
return dirname
|
||||
|
||||
def setup_cub():
|
||||
global cub_home
|
||||
cub_home = ""
|
||||
from pathlib import Path
|
||||
cub_path = os.path.join(str(Path.home()), ".cache", "jittor", "cub")
|
||||
cub_home = install_cub(cub_path)
|
||||
setup_cuda_lib("cub", link=False, extra_flags=f"-I{cub_home}")
|
||||
cuda_version = int(get_version(nvcc_path)[1:-1].split('.')[0])
|
||||
extra_flags = ""
|
||||
if cuda_version < 11:
|
||||
cub_home = install_cub(cub_path)
|
||||
extra_flags = f"-I{cub_home}"
|
||||
cub_home += "/"
|
||||
setup_cuda_lib("cub", link=False, extra_flags=extra_flags)
|
||||
|
||||
def setup_cuda_extern():
|
||||
if not has_cuda: return
|
||||
|
|
|
@ -894,10 +894,12 @@ make_cache_dir(cache_path)
|
|||
make_cache_dir(os.path.join(cache_path, "jit"))
|
||||
make_cache_dir(os.path.join(cache_path, "obj_files"))
|
||||
make_cache_dir(os.path.join(cache_path, "gen"))
|
||||
ck_path = os.path.join(cache_path, "checkpoints")
|
||||
make_cache_dir(ck_path)
|
||||
|
||||
# build cache_compile
|
||||
cc_flags += pybind_include
|
||||
cc_flags += f" -I{jittor_path}/src "
|
||||
cc_flags += pybind_include
|
||||
check_cache_compile()
|
||||
LOG.v(f"Get cache_compile: {jit_utils.cc}")
|
||||
|
||||
|
@ -981,10 +983,11 @@ assert libname is not None, "openmp library not found"
|
|||
ctypes.CDLL(libname, os.RTLD_NOW | os.RTLD_GLOBAL)
|
||||
|
||||
version_file = os.path.join(jittor_path, "version")
|
||||
if os.path.isfile(version_file):
|
||||
if os.path.isfile(version_file) and not os.path.isdir(os.path.join(jittor_path, "src", "__data__")):
|
||||
with open(version_file, 'r') as f:
|
||||
version = f.read().strip()
|
||||
key = f"{version}-{cc_type}-{'cuda' if has_cuda else 'cpu'}.o"
|
||||
# key = f"{version}-{cc_type}-{'cuda' if has_cuda else 'cpu'}.o"
|
||||
key = f"{version}-g++-cpu.o"
|
||||
# TODO: open the website
|
||||
extra_obj = os.path.join(cache_path, key)
|
||||
url = os.path.join("https://cg.cs.tsinghua.edu.cn/jittor/assets/build/"+key)
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
import jittor as jt
|
||||
import numpy as np
|
||||
from jittor import pool
|
||||
from collections.abc import Sequence
|
||||
|
||||
def argmax_pool(x, size, stride, padding=0):
|
||||
return pool.pool(x, size, 'maximum', padding, stride)
|
||||
|
@ -180,8 +181,14 @@ jt.Var.__setitem__ = setitem
|
|||
def getitem(x, slices):
|
||||
if isinstance(slices, jt.Var) and slices.dtype == "bool":
|
||||
return getitem(x, slices.where())
|
||||
if isinstance(slices, list):
|
||||
slices = tuple(slices)
|
||||
if isinstance(slices, Sequence):
|
||||
ss = []
|
||||
for s in slices:
|
||||
if isinstance(s, jt.Var) and s.dtype == "bool":
|
||||
ss.extend(s.where())
|
||||
else:
|
||||
ss.append(s)
|
||||
slices = tuple(ss)
|
||||
return x.getitem(slices)
|
||||
|
||||
def setitem(x, slices, value):
|
||||
|
|
|
@ -101,7 +101,7 @@ class Dataset(object):
|
|||
Attrs:
|
||||
|
||||
* batch_size(int): batch size, default 16.
|
||||
* totol_len(int): totol lenght.
|
||||
* total_len(int): total lenght.
|
||||
* shuffle(bool): shuffle at each epoch, default False.
|
||||
* drop_last(bool): if true, the last batch of dataset might smaller than batch_size, default True.
|
||||
* num_workers: number of workers for loading data
|
||||
|
@ -267,6 +267,9 @@ Example::
|
|||
LOG.i('\n'.join(msg))
|
||||
|
||||
def _stop_all_workers(self):
|
||||
# stop workers
|
||||
for w in self.workers:
|
||||
w.buffer.stop()
|
||||
# wait until all workers idle
|
||||
if self.num_idle.value < self.num_workers:
|
||||
with self.gid.get_lock():
|
||||
|
|
|
@ -0,0 +1,321 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guoye Yang <498731903@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
#
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import jittor as jt
|
||||
from jittor import init
|
||||
from jittor import nn
|
||||
from jittor import Function
|
||||
|
||||
class DepthwiseConv(Function):
|
||||
def __init__(self, stride=1, padding=0, dilation=1):
|
||||
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
|
||||
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
|
||||
self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
|
||||
|
||||
def execute(self, x, weight):
|
||||
self.save_vars = x, weight
|
||||
N,C,H,W = x.shape
|
||||
o,i,Kh,Kw = weight.shape
|
||||
assert(o == C)
|
||||
oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1
|
||||
ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1
|
||||
filter_height, filter_width = Kh, Kw
|
||||
self.Khw = Kh, Kw
|
||||
output = jt.code(
|
||||
[N, C, oh, ow],
|
||||
x.dtype,
|
||||
[x, weight],
|
||||
cuda_header = """
|
||||
template <typename T,
|
||||
int filter_height,
|
||||
int filter_width,
|
||||
int stride_height,
|
||||
int stride_width>
|
||||
__global__ void KernelDepthwiseConv(
|
||||
const T *const input_data, const T *const filter_data, const int batch_size,
|
||||
const int output_channels, const int output_height,
|
||||
const int output_width, const int input_channels,
|
||||
const int input_height, const int input_width,
|
||||
const int padding_height, const int padding_width,
|
||||
const int dilate_height, const int dilate_width, T *const output_data) {
|
||||
const int kWeghtSize = filter_height * filter_width;
|
||||
T r_weight[kWeghtSize];
|
||||
const int batch = blockIdx.y;
|
||||
const int c_out = blockIdx.x;
|
||||
const T* weight = filter_data + c_out * filter_height * filter_width;
|
||||
for (int i = 0; i < filter_height * filter_width; i++) r_weight[i] = weight[i];
|
||||
|
||||
for (int w_out = threadIdx.x; w_out < output_width; w_out += blockDim.x) {
|
||||
for (int h_out = threadIdx.y; h_out < output_height; h_out += blockDim.y) {
|
||||
const int batch = blockIdx.y;
|
||||
const int c_out = blockIdx.x;
|
||||
|
||||
const int c_in = c_out;
|
||||
T value = 0;
|
||||
const int h_in_start = -padding_height + h_out * stride_height;
|
||||
const int w_in_start = -padding_width + w_out * stride_width;
|
||||
const int h_in_end = h_in_start + filter_height * dilate_height;
|
||||
const int w_in_end = w_in_start + filter_width * dilate_width;
|
||||
|
||||
const int in_offset =
|
||||
((batch * input_channels + c_in) * input_height) * input_width;
|
||||
|
||||
const int h_end = h_in_end < input_height ? h_in_end : input_height;
|
||||
const int w_end = w_in_end < input_width ? w_in_end : input_width;
|
||||
const int h_start = h_in_start > 0 ? h_in_start : 0;
|
||||
const int w_start = w_in_start > 0 ? w_in_start : 0;
|
||||
|
||||
for (int h_in = h_in_start, h_f = 0; h_f < filter_height;
|
||||
h_in += dilate_height, h_f++) {
|
||||
for (int w_in = w_in_start, w_f = 0; w_f < filter_width;
|
||||
w_in += dilate_width, w_f++) {
|
||||
if (h_in >= 0 && h_in < input_height && w_in >= 0 &&
|
||||
w_in < input_width) {
|
||||
const int offset = in_offset + h_in * input_width + w_in;
|
||||
value += r_weight[h_f * filter_width + w_f] * input_data[offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
int index =
|
||||
((batch * gridDim.x + c_out) * output_height + h_out) * output_width +
|
||||
w_out;
|
||||
output_data[index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
""",
|
||||
cuda_src=f"""
|
||||
@alias(input, in0)
|
||||
@alias(filter, in1)
|
||||
@alias(output, out)
|
||||
|
||||
const int batch_size = input_shape0;
|
||||
const int input_channels = input_shape1;
|
||||
const int input_height = input_shape2;
|
||||
const int input_width = input_shape3;
|
||||
const int output_channels = output_shape1;
|
||||
const int output_height = output_shape2;
|
||||
const int output_width = output_shape3;
|
||||
const int ksize_height = {Kh};
|
||||
const int ksize_width = {Kw};
|
||||
const int stride_height = {self.stride[0]};
|
||||
const int stride_width = {self.stride[1]};
|
||||
const int padding_height = {self.padding[0]};
|
||||
const int padding_width = {self.padding[1]};
|
||||
const int dilate_height = {self.dilation[0]};
|
||||
const int dilate_width = {self.dilation[1]};
|
||||
|
||||
int thread = 512;
|
||||
if (output_width > 1024 && output_width <= 2048)
|
||||
thread = (output_width - 1) / 2 + 1;
|
||||
else if (output_width > 512 && output_width <= 1024)
|
||||
thread = output_width;
|
||||
int blocks = std::min(std::max(thread / output_width, 1), output_height);
|
||||
dim3 threads(std::min(output_width, thread), blocks, 1);
|
||||
dim3 grid(output_channels, batch_size, 1);
|
||||
KernelDepthwiseConv<
|
||||
input_type, ksize_height, ksize_width,
|
||||
stride_height, stride_width>
|
||||
<<<grid, threads>>>(
|
||||
input_p, filter_p, batch_size, output_channels, output_height,
|
||||
output_width, input_channels, input_height, input_width,
|
||||
padding_height, padding_width, dilate_height,
|
||||
dilate_width, output_p);
|
||||
"""
|
||||
)
|
||||
return output
|
||||
|
||||
def grad(self, grad):
|
||||
x, weight = self.save_vars
|
||||
Kh, Kw = self.Khw
|
||||
return jt.code([x.shape, weight.shape], [x.dtype, weight.dtype], [x, weight, grad],
|
||||
cuda_header = f"#include <{jt.compile_extern.cub_home}cub/cub.cuh>"+"""
|
||||
template <typename T>
|
||||
__device__ __inline__ void CudaAtomicAddWithWarp(T* sum, T value) {
|
||||
typedef cub::WarpReduce<T> WarpReduce;
|
||||
typename WarpReduce::TempStorage temp_storage;
|
||||
value = WarpReduce(temp_storage).Sum(value);
|
||||
if (cub::LaneId() == 0)
|
||||
atomicAdd(sum, value);
|
||||
}
|
||||
|
||||
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
|
||||
template <typename T,
|
||||
int filter_height,
|
||||
int filter_width,
|
||||
int stride_height,
|
||||
int stride_width>
|
||||
__global__ void KernelDepthwiseConvInputGradCFilter(
|
||||
const T *const input_data, const T *const output_grad_data,
|
||||
const T *const filter_data, const int batch_size,
|
||||
const int output_channels, const int output_height,
|
||||
const int output_width, const int input_channels,
|
||||
const int input_height, const int input_width,
|
||||
const int padding_height, const int padding_width,
|
||||
const int dilate_height, const int dilate_width,
|
||||
T *const input_grad_data) {
|
||||
const int kWeghtSize = filter_height * filter_width + 1;
|
||||
T r_weight[kWeghtSize];
|
||||
const int batch = blockIdx.y;
|
||||
const int c_in = blockIdx.x;
|
||||
|
||||
const T* weight = filter_data + c_in * filter_height * filter_width;
|
||||
for (int i = 0; i < filter_height * filter_width; i++)
|
||||
r_weight[i] =
|
||||
weight[filter_height * filter_width - i - 1];
|
||||
|
||||
for (int w_in = threadIdx.x; w_in < input_width; w_in += blockDim.x) {
|
||||
for (int h_in = threadIdx.y; h_in < input_height; h_in += blockDim.y) {
|
||||
const int batch = blockIdx.y;
|
||||
const int c_in = blockIdx.x;
|
||||
|
||||
int h_out_start = h_in - (filter_height - 1) * dilate_height + padding_height;
|
||||
|
||||
int w_out_start = w_in - (filter_width - 1) * dilate_width + padding_width;
|
||||
|
||||
T value = 0;
|
||||
int index =
|
||||
((batch * gridDim.x + c_in) * input_height + h_in) * input_width +
|
||||
w_in;
|
||||
|
||||
for (int h_out = h_out_start, h_f = 0; h_f < filter_height;
|
||||
h_out += dilate_height, h_f++) {
|
||||
for (int w_out = w_out_start, w_f = 0; w_f < filter_width;
|
||||
w_out += dilate_width, w_f++) {
|
||||
int s_h_out = h_out / stride_height;
|
||||
int s_w_out = w_out / stride_width;
|
||||
if (h_out % stride_height == 0 && w_out % stride_width == 0 &&
|
||||
s_h_out >= 0 && s_h_out < output_height && s_w_out >= 0 &&
|
||||
s_w_out < output_width) {
|
||||
const int output_grad_offset =
|
||||
((batch * output_channels + c_in) * output_height +
|
||||
s_h_out) *
|
||||
output_width +
|
||||
s_w_out;
|
||||
value +=
|
||||
output_grad_data[output_grad_offset] *
|
||||
r_weight[h_f * filter_width + w_f];
|
||||
}
|
||||
}
|
||||
}
|
||||
input_grad_data[index] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cuda kernel to compute the depthwise convolution backprop w.r.t. filter.
|
||||
template <typename T>
|
||||
__global__ void KernelDepthwiseConvFilterGrad(
|
||||
const T* output_grad_data, const T* input_data, const int num,
|
||||
const int output_channels, const int output_height, const int output_width,
|
||||
const int input_channels, const int input_height, const int input_width,
|
||||
const int filter_height,
|
||||
const int filter_width, const int stride_height, const int stride_width,
|
||||
const int padding_height, const int padding_width, const int dilate_height,
|
||||
const int dilate_width, T* filter_grad_data) {
|
||||
T s = 0;
|
||||
|
||||
int gbid = (((blockIdx.z * blockDim.z + threadIdx.z) * gridDim.y) + blockIdx.y) * gridDim.x + blockIdx.x;
|
||||
|
||||
for (int image_w = threadIdx.x; image_w < output_width;
|
||||
image_w += blockDim.x) {
|
||||
for (int bid = 0; bid < num; bid++) {
|
||||
//for (int bid = threadIdx.z; bid < num; bid+=blockDim.z) {
|
||||
for (int image_h = threadIdx.y; image_h < output_height;
|
||||
image_h += blockDim.y) {
|
||||
int kernel_id = blockIdx.z;
|
||||
int kernel_h = blockIdx.y * dilate_height - padding_height;
|
||||
int kernel_w = blockIdx.x * dilate_width - padding_width;
|
||||
|
||||
int image_hk = image_h * stride_height + kernel_h;
|
||||
int image_wk = image_w * stride_width + kernel_w;
|
||||
if (image_hk < 0 || image_hk >= input_height) continue;
|
||||
if (image_wk < 0 || image_wk >= input_width) continue;
|
||||
#define gaid(N, C, H, W) \
|
||||
((((N)*gridDim.z + (C)) * output_height + (H)) * output_width + (W))
|
||||
int input_id = ((bid * gridDim.z +
|
||||
kernel_id) *
|
||||
input_height +
|
||||
image_hk) *
|
||||
input_width +
|
||||
image_wk;
|
||||
s += output_grad_data[gaid(bid, kernel_id, image_h, image_w)] *
|
||||
input_data[input_id];
|
||||
|
||||
#undef gaid
|
||||
}
|
||||
}
|
||||
}
|
||||
CudaAtomicAddWithWarp(&filter_grad_data[gbid], s);
|
||||
}
|
||||
""",
|
||||
cuda_src=f"""
|
||||
// source for backward to data
|
||||
@alias(input, in0)
|
||||
@alias(filter, in1)
|
||||
@alias(output_grad, in2)
|
||||
@alias(input_grad, out0)
|
||||
@alias(filter_grad, out1)
|
||||
|
||||
const int batch_size = input_shape0;
|
||||
const int input_channels = input_shape1;
|
||||
const int input_height = input_shape2;
|
||||
const int input_width = input_shape3;
|
||||
const int output_channels = output_grad_shape1;
|
||||
const int output_height = output_grad_shape2;
|
||||
const int output_width = output_grad_shape3;
|
||||
const int ksize_height = {Kh};
|
||||
const int ksize_width = {Kw};
|
||||
const int stride_height = {self.stride[0]};
|
||||
const int stride_width = {self.stride[1]};
|
||||
const int padding_height = {self.padding[0]};
|
||||
const int padding_width = {self.padding[1]};
|
||||
const int dilate_height = {self.dilation[0]};
|
||||
const int dilate_width = {self.dilation[1]};
|
||||
|
||||
int thread = 512;
|
||||
if (input_width > 1024 && input_width <= 2048)
|
||||
thread = (input_width - 1) / 2 + 1;
|
||||
else if (input_width > 512 && input_width <= 1024)
|
||||
thread = input_width;
|
||||
int blocks = std::min(std::max(thread / input_width, 1), input_height);
|
||||
dim3 threads(std::min(input_width, thread), blocks, 1);
|
||||
dim3 grid(input_channels, batch_size, 1);
|
||||
KernelDepthwiseConvInputGradCFilter<
|
||||
input_type, ksize_height, ksize_width
|
||||
, stride_height, stride_width>
|
||||
<<<grid, threads, 0>>>(
|
||||
input_p, output_grad_p, filter_p, batch_size,
|
||||
output_channels, output_height, output_width, input_channels,
|
||||
input_height, input_width, padding_height,
|
||||
padding_width, dilate_height, dilate_width, input_grad_p);
|
||||
|
||||
// source for backward to filter
|
||||
|
||||
int block_size = 512;
|
||||
if (output_width > 1024 && output_width <= 2048)
|
||||
block_size = (output_width - 1) / 2 + 1;
|
||||
else if (output_width > 512 && output_width <= 1024)
|
||||
block_size = output_width;
|
||||
int crop_output_height =
|
||||
std::min(std::max(block_size / output_width, 1), output_height);
|
||||
|
||||
grid = dim3(ksize_width, ksize_height, output_channels);
|
||||
threads = dim3(std::min(output_width, block_size), crop_output_height, 1);
|
||||
cudaMemsetAsync(filter_grad_p, 0, filter_grad->size);
|
||||
|
||||
KernelDepthwiseConvFilterGrad<
|
||||
input_type><<<grid, threads, 0>>>(
|
||||
output_grad_p, input_p, batch_size, output_channels,
|
||||
output_height, output_width, input_channels, input_height,
|
||||
input_width, ksize_height, ksize_width,
|
||||
stride_height, stride_width, padding_height, padding_width,
|
||||
dilate_height, dilate_width, filter_grad_p);
|
||||
"""
|
||||
)
|
|
@ -12,6 +12,35 @@ import numpy as np
|
|||
import math
|
||||
from collections.abc import Sequence,Iterable
|
||||
|
||||
def __copy__(x):
|
||||
return x.copy().detach()
|
||||
jt.Var.__copy__ = __copy__
|
||||
|
||||
def __deepcopy__(x,memo):
|
||||
result = x.copy().detach()
|
||||
memo[id(x)]=result
|
||||
return result
|
||||
jt.Var.__deepcopy__ = __deepcopy__
|
||||
|
||||
def __len__(x):
|
||||
return x.shape[0]
|
||||
jt.Var.__len__ = __len__
|
||||
|
||||
def __iter__(x):
|
||||
result = []
|
||||
for i in range(x.shape[0]):
|
||||
result.append(x[i])
|
||||
return result.__iter__()
|
||||
jt.Var.__iter__ = __iter__
|
||||
|
||||
def all(x, dim=[]):
|
||||
return x.all_(dim).bool()
|
||||
jt.Var.all = all
|
||||
|
||||
def any(x,dim):
|
||||
return x.any_(dim).bool()
|
||||
jt.Var.any = any
|
||||
|
||||
|
||||
def repeat(x, *shape):
|
||||
r'''
|
||||
|
@ -47,10 +76,24 @@ def repeat(x, *shape):
|
|||
x = x.broadcast(x_shape)
|
||||
elif len_x_shape > len_shape:
|
||||
rep_shape = (len_x_shape - len_shape) * [1] + shape
|
||||
|
||||
reshape_shape = []
|
||||
broadcast_shape = []
|
||||
for x_s,r_s in zip(x_shape,rep_shape):
|
||||
reshape_shape.append(1)
|
||||
reshape_shape.append(x_s)
|
||||
|
||||
broadcast_shape.append(r_s)
|
||||
broadcast_shape.append(1)
|
||||
|
||||
x = x.reshape(reshape_shape)
|
||||
x = x.broadcast(broadcast_shape)
|
||||
|
||||
tar_shape = (np.array(x_shape) * np.array(rep_shape)).tolist()
|
||||
dims = []
|
||||
for i in range(len(tar_shape)): dims.append(f"i{i}%{x_shape[i]}")
|
||||
return x.reindex(tar_shape, dims)
|
||||
|
||||
x = x.reshape(tar_shape)
|
||||
return x
|
||||
|
||||
jt.Var.repeat = repeat
|
||||
|
||||
def chunk(x, chunks, dim=0):
|
||||
|
@ -166,15 +209,18 @@ def flip(x, dim=0):
|
|||
>>> x.flip(1)
|
||||
[[4 3 2 1]]
|
||||
'''
|
||||
assert isinstance(dim, int)
|
||||
if dim<0:
|
||||
dim+=x.ndim
|
||||
assert dim>=0 and dim<len(x.shape)
|
||||
if isinstance(dim, int):
|
||||
dim = [dim]
|
||||
for i in range(len(dim)):
|
||||
if dim[i]<0:
|
||||
dim[i] += x.ndim
|
||||
assert dim[i]>=0 and dim[i]<x.ndim
|
||||
dim = set(dim)
|
||||
|
||||
tar_dims = []
|
||||
for i in range(len(x.shape)):
|
||||
if i == dim:
|
||||
tar_dims.append(f"{x.shape[dim]-1}-i{i}")
|
||||
if i in dim:
|
||||
tar_dims.append(f"xshape{i}-1-i{i}")
|
||||
else:
|
||||
tar_dims.append(f"i{i}")
|
||||
return x.reindex(x.shape, tar_dims)
|
||||
|
@ -302,6 +348,25 @@ def make_grid(x, nrow=8, padding=2, normalize=False, range=None, scale_each=Fals
|
|||
[f"i1/{padding+h}*{nrow}+i2/{padding+w}", "i0",
|
||||
f"i1-i1/{padding+h}*{padding+h}-{padding}", f"i2-i2/{padding+w}*{padding+w}-{padding}"], overflow_value=pad_value)
|
||||
|
||||
def save_image(
|
||||
x,
|
||||
filepath,
|
||||
nrow: int = 8,
|
||||
padding: int = 2,
|
||||
normalize: bool = False,
|
||||
range = None,
|
||||
scale_each = False,
|
||||
pad_value = 0,
|
||||
format = None
|
||||
):
|
||||
from PIL import Image
|
||||
grid = make_grid(x, nrow=nrow, padding=padding, pad_value=pad_value,
|
||||
normalize=normalize, range=range, scale_each=scale_each)
|
||||
|
||||
ndarr = (grid*255+0.5).clamp(0, 255).permute(1, 2, 0).uint8().numpy()
|
||||
im = Image.fromarray(ndarr)
|
||||
im.save(filepath, format=format)
|
||||
|
||||
|
||||
def _ntuple(n):
|
||||
def parse(x):
|
||||
|
@ -326,9 +391,8 @@ def unique(x):
|
|||
'''
|
||||
x = x.reshape(-1)
|
||||
_,x = jt.argsort(x)
|
||||
index2 = [i for i in range(1,x.shape[0])]
|
||||
index1 = [i for i in range(x.shape[0]-1)]
|
||||
y = x[1:][x[index2] != x[index1]]
|
||||
index,= jt.index((x.shape[0],))
|
||||
y = x[1:][x[index[1:]] != x[index[:-1]]]
|
||||
x = jt.contrib.concat([x[:1],y],dim=0)
|
||||
return x
|
||||
|
||||
|
@ -401,12 +465,6 @@ def log2(x):
|
|||
|
||||
jt.Var.log2 = log2
|
||||
|
||||
def item(x):
|
||||
assert x.ndim==1 and x.shape[0]==1
|
||||
return x.numpy().item()
|
||||
|
||||
jt.Var.item = item
|
||||
|
||||
def meshgrid(*tensors):
|
||||
r'''
|
||||
Take N tensors, each of which can be 1-dimensional vector, and create N n-dimensional grids,
|
||||
|
|
|
@ -61,6 +61,7 @@ class AlexNet(nn.Module):
|
|||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def alexnet(**kwargs):
|
||||
def alexnet(pretrained=False, **kwargs):
|
||||
model = AlexNet(**kwargs)
|
||||
if pretrained: model.load("jittorhub://alexnet.pkl")
|
||||
return model
|
||||
|
|
|
@ -21,7 +21,7 @@ def densenet121(pretrained=False, **kwargs):
|
|||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), **kwargs)
|
||||
assert not pretrained, "pretrained doesn't support now"
|
||||
if pretrained: model.load("jittorhub://densenet121.pkl")
|
||||
return model
|
||||
|
||||
def densenet161(pretrained=False, **kwargs):
|
||||
|
@ -32,7 +32,7 @@ def densenet161(pretrained=False, **kwargs):
|
|||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), **kwargs)
|
||||
assert not pretrained, "pretrained doesn't support now"
|
||||
if pretrained: model.load("jittorhub://densenet161.pkl")
|
||||
return model
|
||||
|
||||
def densenet169(pretrained=False, **kwargs):
|
||||
|
@ -43,7 +43,7 @@ def densenet169(pretrained=False, **kwargs):
|
|||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), **kwargs)
|
||||
assert not pretrained, "pretrained doesn't support now"
|
||||
if pretrained: model.load("jittorhub://densenet169.pkl")
|
||||
return model
|
||||
|
||||
def densenet201(pretrained=False, **kwargs):
|
||||
|
@ -54,7 +54,7 @@ def densenet201(pretrained=False, **kwargs):
|
|||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), **kwargs)
|
||||
assert not pretrained, "pretrained doesn't support now"
|
||||
if pretrained: model.load("jittorhub://densenet201.pkl")
|
||||
return model
|
||||
|
||||
|
||||
|
|
|
@ -12,8 +12,10 @@ from jittor import nn
|
|||
|
||||
__all__ = ['GoogLeNet', 'googlenet']
|
||||
|
||||
def googlenet(**kwargs):
|
||||
return GoogLeNet(**kwargs)
|
||||
def googlenet(pretrained=False, **kwargs):
|
||||
model = GoogLeNet(**kwargs)
|
||||
if pretrained: model.load("jittorhub://googlenet.pkl")
|
||||
return model
|
||||
|
||||
class GoogLeNet(nn.Module):
|
||||
""" GoogLeNet model architecture.
|
||||
|
|
|
@ -4,7 +4,9 @@ from jittor import nn
|
|||
__all__ = ['Inception3', 'inception_v3']
|
||||
|
||||
def inception_v3(pretrained=False, progress=True, **kwargs):
|
||||
return Inception3(**kwargs)
|
||||
model = Inception3(**kwargs)
|
||||
if pretrained: model.load("jittorhub://inception_v3.pkl")
|
||||
return model
|
||||
|
||||
class Inception3(nn.Module):
|
||||
""" Inceptionv3 model architecture.
|
||||
|
|
|
@ -90,18 +90,22 @@ class MNASNet(nn.Module):
|
|||
x = x.mean([2, 3])
|
||||
return self.classifier(x)
|
||||
|
||||
def mnasnet0_5(**kwargs):
|
||||
def mnasnet0_5(pretrained=False, **kwargs):
|
||||
model = MNASNet(0.5, **kwargs)
|
||||
if pretrained: model.load("jittorhub://mnasnet0_5.pkl")
|
||||
return model
|
||||
|
||||
def mnasnet0_75(**kwargs):
|
||||
def mnasnet0_75(pretrained=False, **kwargs):
|
||||
model = MNASNet(0.75, **kwargs)
|
||||
if pretrained: model.load("jittorhub://mnasnet0_75.pkl")
|
||||
return model
|
||||
|
||||
def mnasnet1_0(**kwargs):
|
||||
def mnasnet1_0(pretrained=False, **kwargs):
|
||||
model = MNASNet(1.0, **kwargs)
|
||||
if pretrained: model.load("jittorhub://mnasnet1_0.pkl")
|
||||
return model
|
||||
|
||||
def mnasnet1_3(**kwargs):
|
||||
def mnasnet1_3(pretrained=False, **kwargs):
|
||||
model = MNASNet(1.3, **kwargs)
|
||||
if pretrained: model.load("jittorhub://mnasnet1_3.pkl")
|
||||
return model
|
||||
|
|
|
@ -93,7 +93,8 @@ class MobileNetV2(nn.Module):
|
|||
def execute(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def mobilenet_v2():
|
||||
def mobilenet_v2(pretrained=False):
|
||||
model = MobileNetV2()
|
||||
if pretrained: model.load("jittorhub://mobilenet_v2.pkl")
|
||||
return model
|
||||
|
||||
|
|
|
@ -175,10 +175,10 @@ class Res2Net(Module):
|
|||
x = self.layer4(x)
|
||||
return x, low_level_feat
|
||||
|
||||
def res2net50(output_stride):
|
||||
def res2net50(output_stride=16):
|
||||
model = Res2Net(Bottle2neck, [3,4,6,3], output_stride)
|
||||
return model
|
||||
|
||||
def res2net101(output_stride):
|
||||
def res2net101(output_stride=16):
|
||||
model = Res2Net(Bottle2neck, [3,4,23,3], output_stride)
|
||||
return model
|
||||
|
|
|
@ -143,7 +143,7 @@ class ResNet(nn.Module):
|
|||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.avgpool(x)
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = jt.reshape(x, (x.shape[0], -1))
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
|
@ -154,19 +154,26 @@ def _resnet(block, layers, **kwargs):
|
|||
model = ResNet(block, layers, **kwargs)
|
||||
return model
|
||||
|
||||
def Resnet18(**kwargs):
|
||||
return _resnet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
def Resnet18(pretrained=False, **kwargs):
|
||||
model = _resnet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnet18.pkl")
|
||||
return model
|
||||
resnet18 = Resnet18
|
||||
|
||||
def Resnet34(**kwargs):
|
||||
return _resnet( BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
def Resnet34(pretrained=False, **kwargs):
|
||||
model = _resnet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnet34.pkl")
|
||||
return model
|
||||
resnet34 = Resnet34
|
||||
|
||||
def Resnet50(**kwargs):
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
def Resnet50(pretrained=False, **kwargs):
|
||||
model = _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnet50.pkl")
|
||||
return model
|
||||
|
||||
resnet50 = Resnet50
|
||||
|
||||
def Resnet101(**kwargs):
|
||||
def Resnet101(pretrained=False, **kwargs):
|
||||
"""
|
||||
ResNet-101 model architecture.
|
||||
|
||||
|
@ -180,28 +187,38 @@ def Resnet101(**kwargs):
|
|||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
resnet101 = Resnet101
|
||||
|
||||
def Resnet152(**kwargs):
|
||||
return _resnet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
def Resnet152(pretrained=False, **kwargs):
|
||||
model = _resnet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnet152.pkl")
|
||||
return model
|
||||
resnet152 = Resnet152
|
||||
|
||||
def Resnext50_32x4d(**kwargs):
|
||||
def Resnext50_32x4d(pretrained=False, **kwargs):
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 4
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
model = _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnext50_32x4d.pkl")
|
||||
return model
|
||||
resnext50_32x4d = Resnext50_32x4d
|
||||
|
||||
def Resnext101_32x8d(**kwargs):
|
||||
def Resnext101_32x8d(pretrained=False, **kwargs):
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 8
|
||||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
model = _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://resnext101_32x8d.pkl")
|
||||
return model
|
||||
resnext101_32x8d = Resnext101_32x8d
|
||||
|
||||
def Wide_resnet50_2(**kwargs):
|
||||
def Wide_resnet50_2(pretrained=False, **kwargs):
|
||||
kwargs['width_per_group'] = (64 * 2)
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
model = _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://wide_resnet50_2.pkl")
|
||||
return model
|
||||
wide_resnet50_2 = Wide_resnet50_2
|
||||
|
||||
def Wide_resnet101_2(**kwargs):
|
||||
def Wide_resnet101_2(pretrained=False, **kwargs):
|
||||
kwargs['width_per_group'] = (64 * 2)
|
||||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
model = _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained: model.load("jittorhub://wide_resnet101_2.pkl")
|
||||
return model
|
||||
wide_resnet101_2 = Wide_resnet101_2
|
|
@ -93,14 +93,22 @@ def _shufflenetv2(arch, *args):
|
|||
model = ShuffleNetV2(*args)
|
||||
return model
|
||||
|
||||
def shufflenet_v2_x0_5():
|
||||
return _shufflenetv2('shufflenetv2_x0.5', [4, 8, 4], [24, 48, 96, 192, 1024])
|
||||
def shufflenet_v2_x0_5(pretrained=False):
|
||||
model = _shufflenetv2('shufflenetv2_x0.5', [4, 8, 4], [24, 48, 96, 192, 1024])
|
||||
if pretrained: model.load("jittorhub://shufflenet_v2_x0_5.pkl")
|
||||
return model
|
||||
|
||||
def shufflenet_v2_x1_0():
|
||||
return _shufflenetv2('shufflenetv2_x1.0', [4, 8, 4], [24, 116, 232, 464, 1024])
|
||||
def shufflenet_v2_x1_0(pretrained=False):
|
||||
model = _shufflenetv2('shufflenetv2_x1.0', [4, 8, 4], [24, 116, 232, 464, 1024])
|
||||
if pretrained: model.load("jittorhub://shufflenet_v2_x1_0.pkl")
|
||||
return model
|
||||
|
||||
def shufflenet_v2_x1_5():
|
||||
return _shufflenetv2('shufflenetv2_x1.5', [4, 8, 4], [24, 176, 352, 704, 1024])
|
||||
def shufflenet_v2_x1_5(pretrained=False):
|
||||
model = _shufflenetv2('shufflenetv2_x1.5', [4, 8, 4], [24, 176, 352, 704, 1024])
|
||||
if pretrained: model.load("jittorhub://shufflenet_v2_x1_5.pkl")
|
||||
return model
|
||||
|
||||
def shufflenet_v2_x2_0():
|
||||
return _shufflenetv2('shufflenetv2_x2.0', [4, 8, 4], [24, 244, 488, 976, 2048])
|
||||
def shufflenet_v2_x2_0(pretrained=False):
|
||||
model = _shufflenetv2('shufflenetv2_x2.0', [4, 8, 4], [24, 244, 488, 976, 2048])
|
||||
if pretrained: model.load("jittorhub://shufflenet_v2_x2_0.pkl")
|
||||
return model
|
||||
|
|
|
@ -83,8 +83,12 @@ def _squeezenet(version, **kwargs):
|
|||
model = SqueezeNet(version, **kwargs)
|
||||
return model
|
||||
|
||||
def squeezenet1_0(**kwargs):
|
||||
return _squeezenet('1_0', **kwargs)
|
||||
def squeezenet1_0(pretrained=False, **kwargs):
|
||||
model = _squeezenet('1_0', **kwargs)
|
||||
if pretrained: model.load("jittorhub://squeezenet1_0.pkl")
|
||||
return model
|
||||
|
||||
def squeezenet1_1(**kwargs):
|
||||
return _squeezenet('1_1', **kwargs)
|
||||
def squeezenet1_1(pretrained=False, **kwargs):
|
||||
model = _squeezenet('1_1', **kwargs)
|
||||
if pretrained: model.load("jittorhub://squeezenet1_1.pkl")
|
||||
return model
|
||||
|
|
|
@ -67,33 +67,49 @@ def _vgg(arch, cfg, batch_norm, **kwargs):
|
|||
return model
|
||||
|
||||
|
||||
def vgg11(**kwargs):
|
||||
return _vgg('vgg11', 'A', False, **kwargs)
|
||||
def vgg11(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg11', 'A', False, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg11.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg11_bn(**kwargs):
|
||||
return _vgg('vgg11_bn', 'A', True, **kwargs)
|
||||
def vgg11_bn(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg11_bn', 'A', True, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg11_bn.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg13(**kwargs):
|
||||
return _vgg('vgg13', 'B', False, **kwargs)
|
||||
def vgg13(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg13', 'B', False, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg13.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg13_bn(**kwargs):
|
||||
return _vgg('vgg13_bn', 'B', True, **kwargs)
|
||||
def vgg13_bn(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg13_bn', 'B', True, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg13_bn.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg16(**kwargs):
|
||||
return _vgg('vgg16', 'D', False, **kwargs)
|
||||
def vgg16(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg16', 'D', False, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg16.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg16_bn(**kwargs):
|
||||
return _vgg('vgg16_bn', 'D', True, **kwargs)
|
||||
def vgg16_bn(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg16_bn', 'D', True, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg16_bn.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg19(**kwargs):
|
||||
return _vgg('vgg19', 'E', False, **kwargs)
|
||||
def vgg19(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg19', 'E', False, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg19.pkl")
|
||||
return model
|
||||
|
||||
|
||||
def vgg19_bn(**kwargs):
|
||||
return _vgg('vgg19_bn', 'E', True, **kwargs)
|
||||
def vgg19_bn(pretrained=False, **kwargs):
|
||||
model = _vgg('vgg19_bn', 'E', True, **kwargs)
|
||||
if pretrained: model.load("jittorhub://vgg19_bn.pkl")
|
||||
return model
|
|
@ -151,9 +151,9 @@ jt.Var.__imatmul__ = lambda a,b: a.assign(matmul(a,b))
|
|||
def get_init_var_rand(shape, dtype):
|
||||
return jt.array(np.random.normal(0.0, 1.0, shape).astype(np.float32))
|
||||
|
||||
def relu(x): return jt.maximum(x, 0)
|
||||
def relu(x): return jt.ternary((x>0.0), x, jt.broadcast_var(0.0, x))
|
||||
def leaky_relu(x, scale=0.01): return jt.ternary(x>0, x, x*scale)
|
||||
def relu6(x): return jt.minimum(jt.maximum(x, 0), 6)
|
||||
def relu6(x): return jt.minimum(jt.maximum(x, 0.0), 6.0)
|
||||
def sign(x):
|
||||
one = jt.ones(x.shape)
|
||||
x = jt.ternary(x>0, one, x)
|
||||
|
@ -264,17 +264,29 @@ class L1Loss(Module):
|
|||
def execute(self, output, target):
|
||||
return l1_loss(output, target)
|
||||
|
||||
class BCEWithLogitsLoss(Module):
|
||||
def __init__(self, weight=None, size_average=True):
|
||||
self.sigmoid = Sigmoid()
|
||||
self.bce = BCELoss(weight, size_average)
|
||||
def execute(self, output, target):
|
||||
output = self.sigmoid(output)
|
||||
output = self.bce(output, target)
|
||||
return output
|
||||
def binary_cross_entropy_with_logits(output, target, weight=None, pos_weight=None, size_average=True):
|
||||
max_val = jt.clamp(-output,min_v=0)
|
||||
if pos_weight is not None:
|
||||
log_weight = (pos_weight-1)*target + 1
|
||||
loss = (1-target)*output+(log_weight*(((-max_val).exp()+(-output - max_val).exp()).log()+max_val))
|
||||
else:
|
||||
loss = (1-target)*output+max_val+((-max_val).exp()+(-output -max_val).exp()).log()
|
||||
if weight is not None:
|
||||
loss *=weight
|
||||
|
||||
def binary_cross_entropy_with_logits(input, target, weight=None, size_average=True):
|
||||
return BCEWithLogitsLoss(weight, size_average)(input, target)
|
||||
if size_average:
|
||||
return loss.mean()
|
||||
else:
|
||||
return loss.sum()
|
||||
|
||||
class BCEWithLogitsLoss(Module):
|
||||
def __init__(self, weight=None, pos_weight=None, size_average=True):
|
||||
self.pos_weight = pos_weight
|
||||
self.weight = weight
|
||||
self.size_average = size_average
|
||||
|
||||
def execute(self, output, target):
|
||||
return binary_cross_entropy_with_logits(output,target,self.weight,self.pos_weight,self.size_average)
|
||||
|
||||
def softmax(x, dim = None):
|
||||
if dim is None:
|
||||
|
@ -340,82 +352,39 @@ class BatchNorm(Module):
|
|||
self.eps = eps
|
||||
self.momentum = momentum
|
||||
self.affine = affine
|
||||
if affine:
|
||||
self.weight = init.constant((num_features,), "float32", 1.0)
|
||||
self.bias = init.constant((num_features,), "float32", 0.0)
|
||||
self.weight = init.constant((num_features,), "float32", 1.0) if affine else 1.0
|
||||
self.bias = init.constant((num_features,), "float32", 0.0) if affine else 0.0
|
||||
self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
|
||||
self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
|
||||
|
||||
def execute(self, x):
|
||||
dims = [0]+list(range(2,x.ndim))
|
||||
if self.is_train:
|
||||
xmean = jt.mean(x, dims=[0,2,3], keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1)
|
||||
xmean = jt.mean(x, dims=dims)
|
||||
x2mean = jt.mean(x*x, dims=dims)
|
||||
if self.sync and jt.in_mpi:
|
||||
xmean = xmean.mpi_all_reduce("mean")
|
||||
x2mean = x2mean.mpi_all_reduce("mean")
|
||||
|
||||
xvar = x2mean-xmean*xmean
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
xvar = (x2mean-xmean*xmean).maximum(0.0)
|
||||
w = self.weight / jt.sqrt(xvar+self.eps)
|
||||
b = self.bias - xmean * w
|
||||
norm_x = x * w.broadcast(x, dims) + b.broadcast(x, dims)
|
||||
|
||||
self.running_mean.update(self.running_mean +
|
||||
(xmean.reshape((-1,)) - self.running_mean) * self.momentum)
|
||||
self.running_var.update(self.running_var +
|
||||
(xvar.reshape((-1,))-self.running_var)*self.momentum)
|
||||
else:
|
||||
running_mean = self.running_mean.broadcast(x, [0,2,3])
|
||||
running_var = self.running_var.broadcast(x, [0,2,3])
|
||||
norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
|
||||
if not self.affine:
|
||||
return norm_x
|
||||
w = self.weight.broadcast(x, [0,2,3])
|
||||
b = self.bias.broadcast(x, [0,2,3])
|
||||
return norm_x * w + b
|
||||
|
||||
BatchNorm2d = BatchNorm
|
||||
|
||||
class BatchNorm1d(Module):
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, is_train=True, sync=True):
|
||||
self.sync = sync
|
||||
self.num_features = num_features
|
||||
self.is_train = is_train
|
||||
self.eps = eps
|
||||
self.momentum = momentum
|
||||
self.affine = affine
|
||||
if affine:
|
||||
self.weight = init.constant((num_features,), "float32", 1.0)
|
||||
self.bias = init.constant((num_features,), "float32", 0.0)
|
||||
self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
|
||||
self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
|
||||
|
||||
def execute(self, x):
|
||||
if len(x.shape) == 3:
|
||||
dims = [0, 2]
|
||||
else:
|
||||
dims = [0]
|
||||
if self.is_train:
|
||||
xmean = jt.mean(x, dims=dims, keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=dims, keepdims=1)
|
||||
|
||||
if self.sync and jt.in_mpi:
|
||||
xmean = xmean.mpi_all_reduce("mean")
|
||||
x2mean = x2mean.mpi_all_reduce("mean")
|
||||
|
||||
xvar = x2mean-xmean*xmean
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
self.running_mean.update(self.running_mean +
|
||||
(xmean.sum(dims)-self.running_mean)*self.momentum)
|
||||
self.running_var.update(self.running_var +
|
||||
(xvar.sum(dims)-self.running_var)*self.momentum)
|
||||
else:
|
||||
running_mean = self.running_mean.broadcast(x, dims)
|
||||
running_var = self.running_var.broadcast(x, dims)
|
||||
norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
|
||||
if not self.affine:
|
||||
w = self.weight / jt.sqrt(self.running_var+self.eps)
|
||||
b = self.bias - self.running_mean * w
|
||||
norm_x = x * w.broadcast(x, dims) + b.broadcast(x, dims)
|
||||
return norm_x
|
||||
w = self.weight.broadcast(x, dims)
|
||||
b = self.bias.broadcast(x, dims)
|
||||
return norm_x * w + b
|
||||
|
||||
class InstanceNorm2d(Module):
|
||||
BatchNorm2d = BatchNorm1d = BatchNorm
|
||||
|
||||
class InstanceNorm(Module):
|
||||
def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, is_train=True, sync=True):
|
||||
self.sync = sync
|
||||
self.num_features = num_features
|
||||
|
@ -424,47 +393,43 @@ class InstanceNorm2d(Module):
|
|||
self.momentum = momentum
|
||||
|
||||
self.affine = affine
|
||||
if self.affine:
|
||||
self.weight = init.constant((num_features,), "float32", 1.0)
|
||||
self.bias = init.constant((num_features,), "float32", 0.0)
|
||||
self.weight = init.constant((num_features,), "float32", 1.0) if affine else 1.0
|
||||
self.bias = init.constant((num_features,), "float32", 0.0) if affine else 0.0
|
||||
|
||||
def execute(self, x):
|
||||
xmean = jt.mean(x, dims=[2,3], keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=[2,3], keepdims=1)
|
||||
if self.sync and jt.in_mpi:
|
||||
xmean = xmean.mpi_all_reduce("mean")
|
||||
x2mean = x2mean.mpi_all_reduce("mean")
|
||||
dims = list(range(2,x.ndim))
|
||||
xmean = jt.mean(x, dims=dims)
|
||||
x2mean = jt.mean(x*x, dims=dims)
|
||||
|
||||
xvar = jt.maximum(x2mean-xmean*xmean, 0)
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
if not self.affine:
|
||||
return norm_x
|
||||
w = self.weight.broadcast(x, [0,2,3])
|
||||
b = self.bias.broadcast(x, [0,2,3])
|
||||
return norm_x * w + b
|
||||
xvar = (x2mean-xmean*xmean).maximum(0.0)
|
||||
w = self.weight / jt.sqrt(xvar+self.eps)
|
||||
b = self.bias - xmean * w
|
||||
return x * w.broadcast(x, dims) + b.broadcast(x, dims)
|
||||
|
||||
InstanceNorm2d = InstanceNorm1d = InstanceNorm
|
||||
|
||||
class LayerNorm(Module):
|
||||
def __init__(self, normalized_shape, eps: float = 1e-5, elementwise_affine: bool = True) -> None:
|
||||
super(LayerNorm, self).__init__()
|
||||
if isinstance(normalized_shape, int):
|
||||
normalized_shape = (normalized_shape,)
|
||||
self.normalized_shape = tuple(normalized_shape)
|
||||
self.eps = eps
|
||||
self.elementwise_affine = elementwise_affine
|
||||
if self.elementwise_affine:
|
||||
self.weight = init.constant(normalized_shape, "float32", 1.0)
|
||||
self.bias = init.constant(normalized_shape, "float32", 0.0)
|
||||
self.weight = init.constant(normalized_shape, "float32", 1.0) if elementwise_affine else 1.0
|
||||
self.bias = init.constant(normalized_shape, "float32", 0.0) if elementwise_affine else 0.0
|
||||
|
||||
def execute(self,x):
|
||||
def execute(self, x):
|
||||
dims = [-i for i in range(len(self.normalized_shape), 0, -1)]
|
||||
mean = jt.mean(x,dims=dims,keepdims=1)
|
||||
numerator = x-mean
|
||||
variance = jt.mean(numerator.sqr(),dims=dims,keepdims=1)
|
||||
denominator = jt.sqrt(variance+self.eps)
|
||||
norm_x = numerator/denominator
|
||||
if self.elementwise_affine:
|
||||
norm_x = norm_x * self.weight+self.bias
|
||||
return norm_x
|
||||
xmean = jt.mean(x, dims=dims, keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=dims, keepdims=1)
|
||||
|
||||
xvar = (x2mean-xmean*xmean).maximum(0.0)
|
||||
w = self.weight / jt.sqrt(xvar+self.eps)
|
||||
b = self.bias - xmean * w
|
||||
return x * w + b
|
||||
|
||||
|
||||
LayerNorm2d = LayerNorm1d = LayerNorm
|
||||
|
||||
class GroupNorm(Module):
|
||||
def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, is_train=True):
|
||||
|
@ -473,28 +438,32 @@ class GroupNorm(Module):
|
|||
self.eps = eps
|
||||
|
||||
self.affine = affine
|
||||
if self.affine:
|
||||
self.weight = init.constant((num_channels,), "float32", 1.0)
|
||||
self.bias = init.constant((num_channels,), "float32", 0.0)
|
||||
self.weight = init.constant((num_channels,), "float32", 1.0) if affine else 1.0
|
||||
self.bias = init.constant((num_channels,), "float32", 0.0) if affine else 0.0
|
||||
|
||||
def execute(self, x):
|
||||
N = x.shape[0]
|
||||
C = self.num_channels
|
||||
output_shape = (N,-1)
|
||||
# TODO: 3d group norm
|
||||
# TODO: 3d group norm
|
||||
if x.ndim==4:
|
||||
output_shape = x.shape
|
||||
assert C % self.num_groups == 0
|
||||
x = x.reshape((N, self.num_groups, int(C/self.num_groups), -1))
|
||||
xmean = jt.mean(x, dims=[2,3], keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=[2,3], keepdims=1)
|
||||
xvar = jt.maximum(x2mean-xmean*xmean, 0)
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
if not self.affine:
|
||||
return norm_x.reshape(output_shape)
|
||||
w = self.weight.reshape((1,self.num_groups,C//self.num_groups,1))
|
||||
b = self.bias.reshape((1,self.num_groups,C//self.num_groups,1))
|
||||
return (norm_x * w + b).reshape(output_shape)
|
||||
x = x.reshape((N, self.num_groups, C//self.num_groups, -1))
|
||||
xmean = jt.mean(x, dims=[2,3]).reshape((N, self.num_groups, 1))
|
||||
x2mean = jt.mean(x*x, dims=[2,3]).reshape((N, self.num_groups, 1))
|
||||
xvar = (x2mean-xmean*xmean).maximum(0.0)
|
||||
|
||||
if self.affine:
|
||||
w = self.weight.reshape((1, self.num_groups, -1))
|
||||
b = self.bias.reshape((1, self.num_groups, -1))
|
||||
else:
|
||||
w = 1
|
||||
b = 0
|
||||
w = w / jt.sqrt(xvar+self.eps)
|
||||
b = b - xmean * w
|
||||
x = x * w.broadcast(x, [3]) + b.broadcast(x, [3])
|
||||
return x.reshape(output_shape)
|
||||
|
||||
Relu = jt.make_module(relu)
|
||||
ReLU = Relu
|
||||
|
@ -504,6 +473,8 @@ ReLU6 = jt.make_module(relu6)
|
|||
Softmax = jt.make_module(softmax, 2)
|
||||
GELU = jt.make_module(gelu)
|
||||
|
||||
from jittor.depthwise_conv import DepthwiseConv
|
||||
|
||||
class Conv(Module):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
|
||||
self.in_channels = in_channels
|
||||
|
@ -513,6 +484,9 @@ class Conv(Module):
|
|||
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
|
||||
self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
|
||||
self.groups = groups
|
||||
self.is_depthwise_conv = self.groups == self.out_channels and self.groups == self.in_channels
|
||||
if self.is_depthwise_conv and jt.flags.use_cuda:
|
||||
self.depthwise_conv = DepthwiseConv(stride, padding, dilation)
|
||||
assert in_channels % groups == 0, 'in_channels must be divisible by groups'
|
||||
assert out_channels % groups == 0, 'out_channels must be divisible by groups'
|
||||
Kh, Kw = self.kernel_size
|
||||
|
@ -532,7 +506,13 @@ class Conv(Module):
|
|||
self.bias = None
|
||||
|
||||
def execute(self, x):
|
||||
if self.groups == 1:
|
||||
if self.is_depthwise_conv and jt.flags.use_cuda:
|
||||
y = self.depthwise_conv(x, self.weight)
|
||||
if self.bias is not None:
|
||||
b = self.bias.broadcast(y.shape, [0,2,3])
|
||||
y = y + b
|
||||
return y
|
||||
elif self.groups == 1:
|
||||
N,C,H,W = x.shape
|
||||
Kh, Kw = self.kernel_size
|
||||
assert C==self.in_channels
|
||||
|
@ -566,7 +546,6 @@ class Conv(Module):
|
|||
f'i4*{self.stride[0]}-{self.padding[0]}+i6*{self.dilation[0]}', # Hid+Khid
|
||||
f'i5*{self.stride[1]}-{self.padding[1]}+i7*{self.dilation[1]}', # Wid+KWid
|
||||
])
|
||||
xx.compile_options = {"G":G}
|
||||
# w: [oc, CpG, Kh, Kw]
|
||||
ww = self.weight.reindex([N, G, oc//G, CpG, oh, ow, Kh, Kw], [
|
||||
f'i1*{oc//G}+i2',
|
||||
|
@ -574,6 +553,7 @@ class Conv(Module):
|
|||
'i6',
|
||||
'i7'
|
||||
])
|
||||
ww.compile_options = xx.compile_options = {"G":G,"C":C}
|
||||
yy = xx*ww
|
||||
y = yy.reindex_reduce('add', [N, oc, oh, ow], [
|
||||
'i0',
|
||||
|
@ -727,6 +707,45 @@ class ConvTranspose(Module):
|
|||
y = y + b
|
||||
return y
|
||||
|
||||
def conv_transpose(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1):
|
||||
x = input
|
||||
N,C,H,W = x.shape
|
||||
i,o,h,w = weight.shape
|
||||
assert C==i
|
||||
assert groups==1, "Group conv not supported yet."
|
||||
stride = stride if isinstance(stride, tuple) else (stride, stride)
|
||||
dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
|
||||
# added
|
||||
padding = padding if isinstance(padding, tuple) else (padding, padding)
|
||||
output_padding = output_padding if isinstance (output_padding, tuple) else (output_padding, output_padding)
|
||||
assert output_padding[0] < max(stride[0], dilation[0]) and \
|
||||
output_padding[1] < max(stride[1], dilation[1]), \
|
||||
"output padding must be smaller than max(stride, dilation)"
|
||||
|
||||
stride_h, stride_w = stride
|
||||
padding_h, padding_w = padding
|
||||
dilation_h, dilation_w = dilation
|
||||
|
||||
h_out = (H-1) * stride_h + output_padding[0] - 2*padding_h + 1 + (h-1)*dilation_h
|
||||
w_out = (W-1) * stride_w + output_padding[1] - 2*padding_w + 1 + (w-1)*dilation_w
|
||||
out_shape = (N, o, h_out, w_out)
|
||||
shape = (N, i, o, H, W, h, w)
|
||||
xx = x.broadcast(shape, (2, 5, 6)) # i,h,w
|
||||
ww = weight.broadcast(shape, (0, 3, 4)) # N,H,W
|
||||
y = (ww*xx).reindex_reduce("add", out_shape, [
|
||||
'i0', # N
|
||||
'i2', # o
|
||||
f'i3*{stride_h}-{padding_h}+i5*{dilation_h}', # Hid+Khid
|
||||
f'i4*{stride_w}-{padding_w}+i6*{dilation_w}', # Wid+KWid
|
||||
])
|
||||
if isinstance(bias, jt.Var):
|
||||
b = bias.broadcast(y.shape, [0,2,3])
|
||||
y = y + b
|
||||
else:
|
||||
assert not bias, "Bias should be none or jittor var"
|
||||
return y
|
||||
|
||||
conv_transpose2d = conv_transpose
|
||||
|
||||
def pad(x,padding, mode='constant', value=0):
|
||||
assert mode in ['constant','replicate','reflect','circular'],'only support constant,replicate,reflect,circular pad'
|
||||
|
|
|
@ -33,6 +33,9 @@ class Optimizer(object):
|
|||
assert isinstance(pg, dict)
|
||||
self.param_groups.append(pg)
|
||||
self.n_step = 0
|
||||
|
||||
def add_param_group(self, group):
|
||||
self.param_groups.append(group)
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
|
@ -210,3 +213,64 @@ class Adam(Optimizer):
|
|||
v.update(b1 * v + (1-b1) * g * g)
|
||||
step_size = lr * jt.sqrt(1-b1**n) / (1-b0 ** n)
|
||||
p.update(p - m * step_size / (jt.sqrt(v) + eps))
|
||||
|
||||
|
||||
class LRScheduler:
|
||||
def __init__(self,optimizer, last_epoch=-1):
|
||||
assert isinstance(optimizer,Optimizer)
|
||||
self.optimizer = optimizer
|
||||
|
||||
if last_epoch==-1:
|
||||
for gp in optimizer.param_groups:
|
||||
gp.setdefault('initial_lr',gp.get('lr',optimizer.lr))
|
||||
else:
|
||||
for gp in optimizer.param_groups:
|
||||
assert 'initial_lr' in gp
|
||||
|
||||
self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
|
||||
self.last_epoch = last_epoch
|
||||
self.optimizer._step_count = 0
|
||||
self._step_count = 0
|
||||
self.step()
|
||||
|
||||
def get_lr(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def get_last_lr(self):
|
||||
return self._last_lr
|
||||
|
||||
def step(self,epoch=None):
|
||||
self._step_count += 1
|
||||
|
||||
if epoch is None:
|
||||
self.last_epoch += 1
|
||||
values = self.get_lr()
|
||||
else:
|
||||
self.last_epoch = epoch
|
||||
values = self.get_lr()
|
||||
|
||||
for i, data in enumerate(zip(self.optimizer.param_groups, values)):
|
||||
param_group, lr = data
|
||||
param_group['lr'] = lr
|
||||
|
||||
self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
|
||||
|
||||
|
||||
class LambdaLR(LRScheduler):
|
||||
|
||||
def __init__(self, optimizer, lr_lambda, last_epoch=-1):
|
||||
if not isinstance(lr_lambda, list) and not isinstance(lr_lambda, tuple):
|
||||
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
|
||||
else:
|
||||
if len(lr_lambda) != len(optimizer.param_groups):
|
||||
raise ValueError("Expected {} lr_lambdas, but got {}".format(len(optimizer.param_groups), len(lr_lambda)))
|
||||
|
||||
self.lr_lambdas = list(lr_lambda)
|
||||
|
||||
super(LambdaLR, self).__init__(optimizer, last_epoch)
|
||||
|
||||
|
||||
|
||||
def get_lr(self):
|
||||
return [base_lr * lmbda(self.last_epoch)
|
||||
for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)]
|
|
@ -40,6 +40,7 @@ class Pool(Module):
|
|||
count = f"int count = {self.kernel_size*self.kernel_size};"
|
||||
else:
|
||||
count = "int count = (k2_ - k2) * (k3_ - k3);"
|
||||
count += "float32 rcount = 1.0f / count;"
|
||||
else:
|
||||
count = ""
|
||||
forward_body = f'''{{
|
||||
|
@ -168,7 +169,9 @@ class AdaptiveAvgPool2d(Module):
|
|||
oh = x.shape[2] if self.output_size[0] is None else self.output_size[0]
|
||||
ow = x.shape[3] if self.output_size[1] is None else self.output_size[1]
|
||||
else:
|
||||
raise TypeError(f"AdaptiveAvgPool2d only support int, typle or list input. Not support {type(self.output_size)} yet.")
|
||||
raise TypeError(f"AdaptiveAvgPool2d only support int, tuple or list input. Not support {type(self.output_size)} yet.")
|
||||
if oh == 1 and ow == 1:
|
||||
return x.reduce("mean", [2,3], keepdims=True)
|
||||
N,C,H,W = x.shape
|
||||
self.sh = math.floor(H / oh)
|
||||
self.sw = math.floor(W / ow)
|
||||
|
|
|
@ -0,0 +1,221 @@
|
|||
import sys, os
|
||||
|
||||
suffix = ""
|
||||
|
||||
import jittor as jt
|
||||
import time
|
||||
from pathlib import Path
|
||||
home_path = str(Path.home())
|
||||
perf_path = os.path.join(home_path, ".cache", "jittor_perf")
|
||||
|
||||
def main():
|
||||
os.makedirs(perf_path+"/src/jittor", exist_ok=True)
|
||||
os.makedirs(perf_path+"/src/jittor_utils", exist_ok=True)
|
||||
os.system(f"cp -rL {jt.flags.jittor_path} {perf_path+'/src/'}")
|
||||
os.system(f"cp -rL {jt.flags.jittor_path}/../jittor_utils {perf_path+'/src/'}")
|
||||
use_torch_1_4 = os.environ.get("use_torch_1_4", "0") == "1"
|
||||
dockerfile_src = r"""
|
||||
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install \
|
||||
pybind11 \
|
||||
numpy \
|
||||
tqdm \
|
||||
pillow \
|
||||
astunparse
|
||||
|
||||
RUN pip3 install torch torchvision
|
||||
"""
|
||||
global suffix
|
||||
if use_torch_1_4:
|
||||
suffix = "_1_4"
|
||||
dockerfile_src = dockerfile_src.replace("torch ", "torch==1.4.0 ")
|
||||
dockerfile_src = dockerfile_src.replace("torchvision", "torchvision==0.5.0")
|
||||
with open("/tmp/perf_dockerfile", 'w') as f:
|
||||
f.write(dockerfile_src)
|
||||
assert os.system("sudo nvidia-smi -lgc 1500") == 0
|
||||
assert os.system(f"sudo docker build --tag jittor/jittor-perf{suffix} -f /tmp/perf_dockerfile .") == 0
|
||||
# run once for compile source
|
||||
jt_fps = test_main("jittor", "resnet50", 1)
|
||||
|
||||
logs = ""
|
||||
# resnext50_32x4d with bs=8 cannot pass this test
|
||||
#### inference test
|
||||
for model_name in ["resnet50", "wide_resnet50_2", # "resnext50_32x4d",
|
||||
"resnet152", "wide_resnet101_2", "resnext101_32x8d",
|
||||
"alexnet", "vgg11", "squeezenet1_1", "mobilenet_v2",
|
||||
"densenet121", "densenet169", "densenet201",
|
||||
"res2net50", "res2net101"]:
|
||||
for bs in [1, 2, 4, 8, 16, 32, 64, 128]:
|
||||
jt_fps = test_main("jittor", model_name, bs)
|
||||
logs += f"jittor-{model_name}-{bs} {jt_fps}\n"
|
||||
tc_fps = test_main("torch", model_name, bs)
|
||||
logs += f"torch-{model_name}-{bs} {tc_fps}\n"
|
||||
logs += f"compare-{model_name}-{bs} {jt_fps/tc_fps}\n"
|
||||
print(logs)
|
||||
#### train test
|
||||
for model_name in ["train_resnet50", "train_resnet101"
|
||||
]:
|
||||
for bs in [1, 2, 4, 8, 16, 32, 64, 128]:
|
||||
jt_fps = test_main("jittor", model_name, bs)
|
||||
logs += f"jittor-{model_name}-{bs} {jt_fps}\n"
|
||||
tc_fps = test_main("torch", model_name, bs)
|
||||
logs += f"torch-{model_name}-{bs} {tc_fps}\n"
|
||||
logs += f"compare-{model_name}-{bs} {jt_fps/tc_fps}\n"
|
||||
print(logs)
|
||||
with open(f"{perf_path}/jittor-perf{suffix}-latest.txt", "w") as f:
|
||||
f.write(logs)
|
||||
from datetime import datetime
|
||||
with open(f"{perf_path}/jittor-perf{suffix}-{datetime.now()}.txt", "w") as f:
|
||||
f.write(logs)
|
||||
|
||||
def test_main(name, model_name, bs):
|
||||
cmd = f"sudo docker run --gpus all --rm -v {perf_path}:/root/.cache/jittor --network host jittor/jittor-perf{suffix} bash -c 'PYTHONPATH=/root/.cache/jittor/src python3.7 /root/.cache/jittor/src/jittor/test/perf/perf.py {name} {model_name} {bs}'"
|
||||
fps = -1
|
||||
try:
|
||||
print("run cmd:", cmd)
|
||||
if os.system(cmd) == 0:
|
||||
with open(f"{perf_path}/{name}-{model_name}-{bs}.txt", 'r') as f:
|
||||
fps = float(f.read().split()[3])
|
||||
except:
|
||||
pass
|
||||
return fps
|
||||
|
||||
def time_iter(duration=2, min_iter=5):
|
||||
start = time.time()
|
||||
for i in range(10000000):
|
||||
yield i
|
||||
end = time.time()
|
||||
if end-start>duration and i>=min_iter:
|
||||
return
|
||||
|
||||
def test(name, model_name, bs):
|
||||
print("hello", name, model_name, bs)
|
||||
import numpy as np
|
||||
import time
|
||||
is_train = False
|
||||
_model_name = model_name
|
||||
if model_name.startswith("train_"):
|
||||
is_train = True
|
||||
model_name = model_name[6:]
|
||||
if name == "torch":
|
||||
import torch
|
||||
import torchvision.models as tcmodels
|
||||
from torch import optim
|
||||
from torch import nn
|
||||
torch.backends.cudnn.deterministic = False
|
||||
torch.backends.cudnn.benchmark = True
|
||||
model = tcmodels.__dict__[model_name]()
|
||||
model = model.cuda()
|
||||
else:
|
||||
import jittor as jt
|
||||
from jittor import optim
|
||||
from jittor import nn
|
||||
jt.flags.use_cuda = 1
|
||||
jt.cudnn.set_algorithm_cache_size(10000)
|
||||
import jittor.models as jtmodels
|
||||
model = jtmodels.__dict__[model_name]()
|
||||
if (model == "resnet152" or model == "resnet101") and bs == 128 and is_train:
|
||||
jt.cudnn.set_max_workspace_ratio(0.05)
|
||||
if is_train:
|
||||
model.train()
|
||||
else:
|
||||
model.eval()
|
||||
img_size = 224
|
||||
if model_name == "inception_v3":
|
||||
img_size = 300
|
||||
test_img = np.random.random((bs, 3, img_size, img_size)).astype("float32")
|
||||
if is_train:
|
||||
label = (np.random.random((bs,)) * 1000).astype("int32")
|
||||
if name == "torch":
|
||||
test_img = torch.Tensor(test_img).cuda()
|
||||
if is_train:
|
||||
label = torch.LongTensor(label).cuda()
|
||||
opt = optim.SGD(model.parameters(), 0.001)
|
||||
sync = lambda: torch.cuda.synchronize()
|
||||
jt = torch
|
||||
else:
|
||||
test_img = jt.array(test_img).stop_grad()
|
||||
if is_train:
|
||||
label = jt.array(label).stop_grad()
|
||||
opt = optim.SGD(model.parameters(), 0.001)
|
||||
sync = lambda: jt.sync_all(True)
|
||||
|
||||
sync()
|
||||
use_profiler = os.environ.get("use_profiler", "0") == "1"
|
||||
if hasattr(jt, "nograd"):
|
||||
ng = jt.no_grad()
|
||||
ng.__enter__()
|
||||
def iter():
|
||||
x = model(test_img)
|
||||
if isinstance(x, tuple):
|
||||
x = x[0]
|
||||
if is_train:
|
||||
loss = nn.CrossEntropyLoss()(x, label)
|
||||
if name == "jittor":
|
||||
opt.step(loss)
|
||||
else:
|
||||
opt.zero_grad()
|
||||
loss.backward()
|
||||
opt.step()
|
||||
else:
|
||||
x.sync()
|
||||
sync()
|
||||
for i in time_iter():
|
||||
iter()
|
||||
sync()
|
||||
for i in time_iter():
|
||||
iter()
|
||||
sync()
|
||||
if use_profiler:
|
||||
if name == "torch":
|
||||
prof = torch.autograd.profiler.profile(use_cuda=True)
|
||||
else:
|
||||
prof = jt.profile_scope()
|
||||
prof.__enter__()
|
||||
if name == "jittor":
|
||||
if hasattr(jt.flags, "use_parallel_op_compiler"):
|
||||
jt.flags.use_parallel_op_compiler = 0
|
||||
start = time.time()
|
||||
for i in time_iter(10):
|
||||
iter()
|
||||
sync()
|
||||
end = time.time()
|
||||
if use_profiler:
|
||||
prof.__exit__(None,None,None)
|
||||
if name == "torch":
|
||||
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=30))
|
||||
total_iter = i+1
|
||||
print("duration:", end-start, "FPS:", total_iter*bs/(end-start))
|
||||
fpath = f"{home_path}/.cache/jittor/{name}-{_model_name}-{bs}.txt"
|
||||
with open(fpath, 'w') as f:
|
||||
f.write(f"duration: {end-start} FPS: {total_iter*bs/(end-start)}")
|
||||
os.chmod(fpath, 0x666)
|
||||
|
||||
if len(sys.argv) <= 1:
|
||||
main()
|
||||
else:
|
||||
name, model, bs = sys.argv[1:]
|
||||
bs = int(bs)
|
||||
test(name, model, bs)
|
|
@ -0,0 +1,6 @@
|
|||
bash python/jittor/test/system/test_cuda10.0_ubuntu16.04.sh
|
||||
bash python/jittor/test/system/test_cuda10.0_ubuntu18.04.sh
|
||||
bash python/jittor/test/system/test_cuda11.1_ubuntu16.04.sh
|
||||
bash python/jittor/test/system/test_cuda11.1_ubuntu18.04.sh
|
||||
bash python/jittor/test/system/test_cuda11.1_ubuntu20.04.sh
|
||||
bash python/jittor/test/system/test_nocuda_ubuntu18.04.sh
|
|
@ -0,0 +1,41 @@
|
|||
cat > /tmp/cuda10.0-ubuntu16.04.dockerfile <<\EOF
|
||||
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3.7 -m pip install ./jittor
|
||||
RUN python3.7 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor-cuda:10.0-16.04 -f /tmp/cuda10.0-ubuntu16.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor-cuda:10.0-18.04 bash -c \
|
||||
"python3.7 -m jittor.test.test_example && \
|
||||
python3.7 -m jittor.test.test_resnet && \
|
||||
python3.7 -m jittor.test.test_parallel_pass && \
|
||||
python3.7 -m jittor.test.test_atomic_tuner && \
|
||||
python3.7 -m jittor.test.test_where_op"
|
|
@ -0,0 +1,41 @@
|
|||
cat > /tmp/cuda10.0-ubuntu18.04.dockerfile <<\EOF
|
||||
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3.7 -m pip install ./jittor
|
||||
RUN python3.7 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor-cuda:10.0-18.04 -f /tmp/cuda10.0-ubuntu18.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor-cuda:10.0-18.04 bash -c \
|
||||
"python3.7 -m jittor.test.test_example && \
|
||||
python3.7 -m jittor.test.test_resnet && \
|
||||
python3.7 -m jittor.test.test_parallel_pass && \
|
||||
python3.7 -m jittor.test.test_atomic_tuner && \
|
||||
python3.7 -m jittor.test.test_where_op"
|
|
@ -0,0 +1,41 @@
|
|||
cat > /tmp/cuda11.1-ubuntu16.04.dockerfile <<\EOF
|
||||
FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu16.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3.7 -m pip install ./jittor
|
||||
RUN python3.7 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor-cuda:11.1-16.04 -f /tmp/cuda11.1-ubuntu16.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor-cuda:11.1-16.04 bash -c \
|
||||
"python3.7 -m jittor.test.test_example && \
|
||||
python3.7 -m jittor.test.test_resnet && \
|
||||
python3.7 -m jittor.test.test_parallel_pass && \
|
||||
python3.7 -m jittor.test.test_atomic_tuner && \
|
||||
python3.7 -m jittor.test.test_where_op"
|
|
@ -0,0 +1,41 @@
|
|||
cat > /tmp/cuda11.1-ubuntu18.04.dockerfile <<\EOF
|
||||
FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3.7 -m pip install ./jittor
|
||||
RUN python3.7 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor-cuda:11.1-18.04 -f /tmp/cuda11.1-ubuntu18.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor-cuda:11.1-18.04 bash -c \
|
||||
"python3.7 -m jittor.test.test_example && \
|
||||
python3.7 -m jittor.test.test_resnet && \
|
||||
python3.7 -m jittor.test.test_parallel_pass && \
|
||||
python3.7 -m jittor.test.test_atomic_tuner && \
|
||||
python3.7 -m jittor.test.test_where_op"
|
|
@ -0,0 +1,39 @@
|
|||
cat > /tmp/cuda11.1-ubuntu20.04.dockerfile <<\EOF
|
||||
FROM nvidia/cuda:11.1-devel-ubuntu20.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update || true
|
||||
RUN apt install g++ build-essential libomp-dev python3-dev python3-pip wget -y
|
||||
RUN python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
WORKDIR /usr/src/
|
||||
|
||||
RUN wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64/libcudnn8_8.0.5.39-1+cuda11.1_amd64.deb && \
|
||||
wget https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu2004/x86_64/libcudnn8-dev_8.0.5.39-1+cuda11.1_amd64.deb && \
|
||||
dpkg -i ./libcudnn8_8.0.5.39-1+cuda11.1_amd64.deb ./libcudnn8-dev_8.0.5.39-1+cuda11.1_amd64.deb && \
|
||||
rm *.deb
|
||||
RUN ls
|
||||
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3 -m pip install ./jittor
|
||||
RUN python3 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor-cuda:11.1-20.04 -f /tmp/cuda11.1-ubuntu20.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor-cuda:11.1-20.04 bash -c \
|
||||
"python3 -m jittor.test.test_example && \
|
||||
python3 -m jittor.test.test_resnet && \
|
||||
python3 -m jittor.test.test_parallel_pass && \
|
||||
python3 -m jittor.test.test_atomic_tuner && \
|
||||
python3 -m jittor.test.test_where_op"
|
|
@ -0,0 +1,40 @@
|
|||
cat > /tmp/ubuntu18.04.dockerfile <<\EOF
|
||||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt update && apt install ca-certificates -y
|
||||
|
||||
RUN echo \
|
||||
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
||||
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
||||
|
||||
# RUN rm -rf /var/lib/apt/lists/*
|
||||
RUN apt update
|
||||
|
||||
RUN apt install wget \
|
||||
python3.7 python3.7-dev \
|
||||
g++ build-essential -y
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
||||
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
||||
|
||||
# change tsinghua mirror
|
||||
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
|
||||
RUN pip3 uninstall jittor -y
|
||||
|
||||
COPY . jittor
|
||||
RUN python3.7 -m pip install ./jittor
|
||||
RUN python3.7 -m jittor.test.test_core
|
||||
EOF
|
||||
|
||||
sudo docker build --tag jittor/jittor:18.04 -f /tmp/ubuntu18.04.dockerfile .
|
||||
sudo docker run --gpus all --rm jittor/jittor:18.04 bash -c \
|
||||
"python3.7 -m jittor.test.test_example && \
|
||||
python3.7 -m jittor.test.test_parallel_pass && \
|
||||
python3.7 -m jittor.test.test_atomic_tuner && \
|
||||
python3.7 -m jittor.test.test_where_op"
|
|
@ -60,6 +60,41 @@ class TestConvTranspose(unittest.TestCase):
|
|||
check((4, 5, 100, 100), (5, 6, 5, 5), 1, 2)
|
||||
check((4, 5, 100, 100), (5, 6, 5, 5), 2, 2)
|
||||
check((4, 5, 100, 100), (5, 6, 5, 5), 2, 3)
|
||||
|
||||
def test_function(self):
|
||||
def check(data_shape, weights_shape, stride=1, dilation=1):
|
||||
N,C,H,W = data_shape
|
||||
i,o,h,w = weights_shape
|
||||
img = np.random.rand(N,C,H,W).astype("float32")
|
||||
weights = np.random.rand(i,o,h,w).astype("float32")
|
||||
m1 = jt.nn.ConvTranspose(i,o,h, stride=stride, dilation=dilation, bias=False)
|
||||
m2 = torch.nn.ConvTranspose2d(i,o,h, stride=stride, dilation=dilation, bias=False)
|
||||
m1.weight.data = weights
|
||||
m2.weight.data = torch.Tensor(weights)
|
||||
x = jt.array(img)
|
||||
# out1 = m1(x)
|
||||
out1 = jt.nn.conv_transpose2d(x, m1.weight, stride=stride, dilation=dilation, bias=False)
|
||||
mask = jt.random(out1.shape)
|
||||
out1 = out1*mask
|
||||
tx = torch.Tensor(img)
|
||||
tx.requires_grad = True
|
||||
out2 = m2(tx) * torch.Tensor(mask.data)
|
||||
with jt.log_capture_scope(log_silent=1,
|
||||
log_vprefix="var_re=0,conv=0,op.cc=100") as logs:
|
||||
assert np.allclose(out1.data, out2.data)
|
||||
dx, dw = jt.grad(out1, [x, m1.weight])
|
||||
jt.sync([dx, dw])
|
||||
out2.sum().backward()
|
||||
assert np.allclose(dw.data, m2.weight.grad.numpy(), 1e-3)
|
||||
assert np.allclose(dx.data, tx.grad.numpy())
|
||||
assert len(find_log_with_re(logs, "conv")) == 3
|
||||
check((4, 5, 10, 10), (5, 6, 3, 3))
|
||||
check((4, 5, 10, 10), (5, 6, 3, 3), 2)
|
||||
check((4, 5, 100, 100), (5, 6, 4, 4), 2)
|
||||
check((4, 5, 100, 100), (5, 6, 4, 4), 3)
|
||||
check((4, 5, 100, 100), (5, 6, 5, 5), 1, 2)
|
||||
check((4, 5, 100, 100), (5, 6, 5, 5), 2, 2)
|
||||
check((4, 5, 100, 100), (5, 6, 5, 5), 2, 3)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -30,7 +30,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
|||
for perm in perms:
|
||||
with jt.log_capture_scope(
|
||||
log_silent=1,
|
||||
log_v=0, log_vprefix="op.cc=100"
|
||||
log_v=0, log_vprefix="cutt=100"
|
||||
) as raw_log:
|
||||
if perm:
|
||||
x = np.transpose(a, perm)
|
||||
|
@ -39,7 +39,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
|||
x = np.transpose(a)
|
||||
y = jt.transpose(a).data
|
||||
self.assertEqual(x.shape, y.shape)
|
||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + "cutt_transpose" + ".*)")
|
||||
logs = find_log_with_re(raw_log, "(Run cutt_transpose with key.*)")
|
||||
if perm is None:
|
||||
continue
|
||||
last = -1
|
||||
|
@ -53,7 +53,7 @@ class TestCuttTransposeOp(unittest.TestCase):
|
|||
last = perm[i]
|
||||
if not in_order:
|
||||
assert len(logs)==1
|
||||
assert (x==y).all(), f"\n{x}\n{y}"
|
||||
assert (x==y).all(), f"\n{x}\n{y}\n{perm}\n{a.shape}"
|
||||
|
||||
ia = [gen_data([5, 7]), gen_data([2,2,2]), gen_data([2,3,4,5]), gen_data([5,3]), gen_data([3,1,5,3,1])]
|
||||
for a in ia: check(a)
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guoye Yang <498731903@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
import jittor.models as jtmodels
|
||||
|
||||
def load_parameters(m1, m2):
|
||||
m1.save('/tmp/temp.pk')
|
||||
m2.load('/tmp/temp.pk')
|
||||
|
||||
def compare_parameters(m1, m2):
|
||||
ps1 = m1.parameters()
|
||||
ps2 = m2.parameters()
|
||||
for i in range(len(ps1)):
|
||||
x = ps1[i].data + 1e-8
|
||||
y = ps2[i].data + 1e-8
|
||||
relative_error = abs(x - y) / abs(y)
|
||||
diff = relative_error.mean()
|
||||
assert diff < 1e-4, (diff, 'backward', ps2[i].name(), ps1[i].mean(), ps1[i].std(), ps2[i].mean(), ps2[i].std())
|
||||
|
||||
class TestDepthwiseConv(unittest.TestCase):
|
||||
@unittest.skipIf(not jt.has_cuda, "Cuda not found")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_data(self):
|
||||
test_img = np.random.random((64,3,224,224)).astype('float32')
|
||||
jittor_test_img = jt.array(test_img)
|
||||
lr = 100
|
||||
|
||||
jittor_model = jtmodels.__dict__['mobilenet_v2']()
|
||||
jittor_model2 = jtmodels.__dict__['mobilenet_v2']()
|
||||
# Set eval to avoid dropout layer & bn errors
|
||||
jittor_model.train()
|
||||
jittor_model.classifier[0].eval()
|
||||
for m in jittor_model.modules():
|
||||
if isinstance(m, jt.nn.BatchNorm):
|
||||
m.eval()
|
||||
|
||||
jittor_model2.train()
|
||||
jittor_model2.classifier[0].eval()
|
||||
for m in jittor_model2.modules():
|
||||
if isinstance(m, jt.nn.BatchNorm):
|
||||
m.eval()
|
||||
|
||||
load_parameters(jittor_model2, jittor_model)
|
||||
for m in jittor_model.modules():
|
||||
if isinstance(m, jt.nn.Conv):
|
||||
m.is_depthwise_conv = False
|
||||
cnt = 0
|
||||
for m in jittor_model2.modules():
|
||||
if isinstance(m, jt.nn.Conv):
|
||||
if (m.is_depthwise_conv):
|
||||
cnt += 1
|
||||
assert cnt == 17, (cnt, '!=', 17)
|
||||
jt_optimizer = jt.nn.SGD(jittor_model.parameters(), lr = lr)
|
||||
jt_optimizer2 = jt.nn.SGD(jittor_model2.parameters(), lr = lr)
|
||||
|
||||
jittor_result = jittor_model(jittor_test_img)
|
||||
mask = jt.random(jittor_result.shape, jittor_result.dtype)
|
||||
loss = jittor_result * mask
|
||||
jt_optimizer.step(loss)
|
||||
jt.sync_all(True)
|
||||
|
||||
jittor_result2 = jittor_model2(jittor_test_img)
|
||||
loss = jittor_result2 * mask
|
||||
|
||||
x = jittor_result2.data + 1e-8
|
||||
y = jittor_result.data + 1e-8
|
||||
relative_error = abs(x - y) / abs(y)
|
||||
diff = relative_error.mean()
|
||||
assert diff < 1e-4, (diff, 'forword')
|
||||
|
||||
jt_optimizer2.step(loss)
|
||||
jt.sync_all(True)
|
||||
compare_parameters(jittor_model, jittor_model2)
|
||||
|
||||
|
||||
jt.clean()
|
||||
jt.gc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -26,6 +26,19 @@ class TestFunction(unittest.TestCase):
|
|||
da = jt.grad(b, a)
|
||||
assert da.data == -1
|
||||
|
||||
def test_apply(self):
|
||||
class MyFunc(Function):
|
||||
def execute(self, x):
|
||||
return x+1
|
||||
|
||||
def grad(self, grad):
|
||||
return grad-2
|
||||
a = jt.ones(1)
|
||||
func = MyFunc.apply
|
||||
b = func(a)
|
||||
da = jt.grad(b, a)
|
||||
assert da.data == -1
|
||||
|
||||
def test2(self):
|
||||
class MyFunc(Function):
|
||||
def execute(self, x):
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import jittor as jt
|
||||
import numpy as numpy
|
||||
|
||||
class TestMergeLoopVarPass(unittest.TestCase):
|
||||
def test(self):
|
||||
a = jt.ones([10,10,10,10])
|
||||
a.sync()
|
||||
with jt.profile_scope() as rep:
|
||||
b = a.sum([2,3])
|
||||
b.sync()
|
||||
with open(rep[1][1]) as f:
|
||||
src = f.read()
|
||||
assert "range01" in src
|
||||
assert "range23" in src
|
||||
|
||||
def test2(self):
|
||||
a = jt.ones([10,10,10,10])
|
||||
a.sync()
|
||||
with jt.profile_scope() as rep:
|
||||
b = a + 1
|
||||
b.sync()
|
||||
with open(rep[1][1]) as f:
|
||||
src = f.read()
|
||||
assert "range0123" in src
|
||||
|
||||
def test3(self):
|
||||
a = jt.ones([10,10,10,10])
|
||||
x = jt.ones([1,10,1,1])
|
||||
a.sync(), x.sync()
|
||||
with jt.profile_scope() as rep:
|
||||
b = a + x
|
||||
b.sync()
|
||||
with open(rep[1][1]) as f:
|
||||
src = f.read()
|
||||
assert "range23" in src
|
||||
|
||||
def test4(self):
|
||||
# don't optimize reindex like op yet
|
||||
a = jt.ones([10,10,10,10])
|
||||
a.sync()
|
||||
with jt.profile_scope() as rep:
|
||||
b = a.reindex_reduce("add", [10,10], ["i0","i1"])
|
||||
b.sync()
|
||||
with open(rep[1][1]) as f:
|
||||
src = f.read()
|
||||
assert "range23" not in src
|
||||
|
||||
def test5(self):
|
||||
a = jt.ones([10,10,10,10])
|
||||
a.sync()
|
||||
with jt.profile_scope() as rep:
|
||||
b = a.sum([1])
|
||||
b.sync()
|
||||
with open(rep[1][1]) as f:
|
||||
src = f.read()
|
||||
assert "range01" not in src
|
||||
assert "range23" in src
|
||||
|
||||
@unittest.skipIf(not jt.compiler.has_cuda, "No CUDA found")
|
||||
class TestMergeLoopVarPassCuda(TestMergeLoopVarPass):
|
||||
def setUp(self):
|
||||
jt.flags.use_cuda = 1
|
||||
def tearDown(self):
|
||||
jt.flags.use_cuda = 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -54,6 +54,7 @@ class TestPad(unittest.TestCase):
|
|||
check_equal(torch.Tensor(arr).flip(1), jt.array(arr).flip(1))
|
||||
check_equal(torch.Tensor(arr).flip(2), jt.array(arr).flip(2))
|
||||
check_equal(torch.Tensor(arr).flip(3), jt.array(arr).flip(3))
|
||||
check_equal(torch.Tensor(arr).flip([2,3]), jt.array(arr).flip([2,3]))
|
||||
print('pass flip test ...')
|
||||
|
||||
def test_cross(self):
|
||||
|
@ -85,6 +86,10 @@ class TestPad(unittest.TestCase):
|
|||
check_equal(torchvision.utils.make_grid(torch.Tensor(arr), nrow=3, normalize=True, padding=4, pad_value=-1), jt.make_grid(jt.array(arr), nrow=3, normalize=True, padding=4, pad_value=-1))
|
||||
print('pass make_grid test ...')
|
||||
|
||||
def test_save_image(self):
|
||||
arr = jt.array(np.random.randn(16,3,10,10))
|
||||
jt.save_image(arr, "/tmp/a.jpg")
|
||||
|
||||
def test_unbind(self):
|
||||
arr = np.random.randn(2,3,4)
|
||||
for dim in range(len(arr.shape)):
|
||||
|
|
|
@ -120,7 +120,7 @@ class TestMklConvOp(unittest.TestCase):
|
|||
|
||||
with jt.flag_scope(
|
||||
enable_tuner=0,
|
||||
compile_options={"test_mkl_conv":1}
|
||||
# compile_options={"test_mkl_conv":1}
|
||||
):
|
||||
c_jt = conv(a_jt, b_jt, 1, 1) * da
|
||||
gs=jt.grad(c_jt,[a_jt,b_jt])
|
||||
|
@ -166,7 +166,7 @@ class TestMklConvOp(unittest.TestCase):
|
|||
|
||||
with jt.flag_scope(
|
||||
enable_tuner=0,
|
||||
compile_options={"test_mkl_conv":1}
|
||||
# compile_options={"test_mkl_conv":1}
|
||||
):
|
||||
c_jt = conv_nhwc_hwio(a_jt, b_jt, 1, 1) * da
|
||||
gs=jt.grad(c_jt,[a_jt,b_jt])
|
||||
|
|
|
@ -120,7 +120,7 @@ class TestOpCompiler(unittest.TestCase):
|
|||
OP1
|
||||
1+2
|
||||
std::max(T(1), T(2))
|
||||
((1)+T(2)*(T(1)/T(count)))''')
|
||||
((1)+T(2)*(T(rcount)))''')
|
||||
expect_error(lambda: jit_precompile(vars, "@{a"))
|
||||
expect_error(lambda: jit_precompile(vars, "@for(a"))
|
||||
expect_error(lambda: jit_precompile(vars, "@for(i,l,r)"))
|
||||
|
|
|
@ -97,7 +97,7 @@ class TestParallelPass3(unittest.TestCase):
|
|||
def check(ndim, depth, tdim):
|
||||
a = jt.random([16]*ndim)
|
||||
a.sync()
|
||||
compile_options = {"parallel":1}
|
||||
compile_options = {"parallel":1, "merge_loop_var": self.merge_loop_var}
|
||||
if depth is not None:
|
||||
compile_options["max_parallel_depth"] = depth
|
||||
with jt.profile_scope(compile_options=compile_options) as rep:
|
||||
|
@ -110,6 +110,7 @@ class TestParallelPass3(unittest.TestCase):
|
|||
for i in range(tdim):
|
||||
assert f"tnum{i}" in src
|
||||
assert f"tnum{tdim}" not in src
|
||||
self.merge_loop_var = 0
|
||||
check(1, None, 0)
|
||||
check(2, None, 1)
|
||||
check(3, None, 2)
|
||||
|
@ -134,7 +135,7 @@ class TestParallelPass3(unittest.TestCase):
|
|||
a = jt.random(shape)
|
||||
a.sync()
|
||||
config = {
|
||||
"parallel":1, "max_parallel_depth":depth
|
||||
"parallel":1, "max_parallel_depth":depth, "merge_loop_var": self.merge_loop_var
|
||||
}
|
||||
for k in args:
|
||||
config[k] = args[k]
|
||||
|
@ -164,6 +165,7 @@ class TestParallelPass3(unittest.TestCase):
|
|||
assert np.allclose(a.data.sum(rdim), b), (b.sum(), a.data.sum())
|
||||
|
||||
def test_reduce(self):
|
||||
self.merge_loop_var = 0
|
||||
check = lambda *a, **kw: self.reduce_check(*a, **kw)
|
||||
check(1, 2, 1, 0, 1)
|
||||
check(2, 1, 1, 1, 0)
|
||||
|
@ -185,6 +187,29 @@ class TestParallelPass3(unittest.TestCase):
|
|||
check(4, 2, 2, [2,3], 0)
|
||||
check(4, 2, 2, [0,3], 1)
|
||||
|
||||
def test_reduce_with_merge_loop_var(self):
|
||||
self.merge_loop_var = 1
|
||||
check = lambda *a, **kw: self.reduce_check(*a, **kw)
|
||||
check(1, 2, 1, 0, 1)
|
||||
check(2, 1, 1, 1, 0)
|
||||
check(2, 1, 1, 0, 1)
|
||||
check(2, 1, 1, 0, 1, [0,0])
|
||||
check(2, 1, 1, 0, 0, [0,1])
|
||||
check(2, 1, 1, 0, 0, [0,1], [0,64])
|
||||
check(2, 1, 1, [0,1], 1, [0,1])
|
||||
check(3, 1, 1, [1,2], 0)
|
||||
check(3, 1, 1, [0,1], 1)
|
||||
check(3, 1, 1, [0,1], 0, [0,0,2])
|
||||
check(3, 2, 1, [2], 0)
|
||||
if jt.flags.use_cuda:
|
||||
# loop is not merged so parallel depth 2
|
||||
check(3, 2, 2, [1], 1)
|
||||
else:
|
||||
check(3, 2, 1, [1], 0)
|
||||
check(3, 2, 2, [1], 1, merge=0)
|
||||
check(4, 2, 1, [2,3], 0)
|
||||
check(4, 2, 2, [0,3], 1)
|
||||
|
||||
@unittest.skipIf(not jt.compiler.has_cuda, "No CUDA found")
|
||||
def test_reduce_cuda(self):
|
||||
with jt.flag_scope(use_cuda=1):
|
||||
|
|
|
@ -96,7 +96,7 @@ class TestResnet(unittest.TestCase):
|
|||
-jt.flags.stat_allocator_total_free_byte
|
||||
# assert mem_used < 4e9, mem_used
|
||||
# TODO: why bigger?
|
||||
assert mem_used < 5.5e9, mem_used
|
||||
assert mem_used < 5.6e9, mem_used
|
||||
# example log:
|
||||
# Train Epoch: 0 [0/100 (0%)] Loss: 2.352903 Acc: 0.110000
|
||||
# Train Epoch: 0 [1/100 (1%)] Loss: 2.840830 Acc: 0.080000
|
||||
|
@ -115,9 +115,9 @@ class TestResnet(unittest.TestCase):
|
|||
# Train Epoch: 0 [50/100 (50%)] Loss: 2.055014 Acc: 0.290000
|
||||
|
||||
if jt.in_mpi:
|
||||
assert jt.core.number_of_lived_vars() < 7500, jt.core.number_of_lived_vars()
|
||||
assert jt.core.number_of_lived_vars() < 7800, jt.core.number_of_lived_vars()
|
||||
else:
|
||||
assert jt.core.number_of_lived_vars() < 6500, jt.core.number_of_lived_vars()
|
||||
assert jt.core.number_of_lived_vars() < 6700, jt.core.number_of_lived_vars()
|
||||
|
||||
jt.sync_all(True)
|
||||
assert np.mean(loss_list[-50:])<0.5
|
||||
|
|
|
@ -43,7 +43,7 @@ class TestWhereOp(unittest.TestCase):
|
|||
x = a.reindex_var(self.where(a>0.1))
|
||||
x = x.reindex_var(self.where(x<0.9))
|
||||
na = a.data
|
||||
assert (na[np.logical_and(na>0.1, na<0.9)]==x.data).all()
|
||||
assert np.allclose(na[np.logical_and(na>0.1, na<0.9)], x.data)
|
||||
|
||||
def test_reduce_dep(self):
|
||||
a = jt.random([100,100])
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
from flask import Flask
|
||||
from flask import request
|
||||
from flask import jsonify
|
||||
app = Flask(__name__)
|
||||
import json
|
||||
|
||||
from jittor.utils.pytorch_converter import convert
|
||||
|
||||
@app.route('/', methods=["GET", "POST"])
|
||||
def hello():
|
||||
msg = request
|
||||
data = msg.data.decode("utf-8")
|
||||
try:
|
||||
data = json.loads(data)
|
||||
src = data["src"]
|
||||
pjmap = json.loads(data["pjmap"])
|
||||
jt_src = convert(src, pjmap)
|
||||
except Exception as e:
|
||||
jt_src = str(e)
|
||||
response = jsonify(jt_src=jt_src)
|
||||
|
||||
# Enable Access-Control-Allow-Origin
|
||||
response.headers.add("Access-Control-Allow-Origin", "*")
|
||||
return response
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host="0.0.0.0")
|
|
@ -38,7 +38,7 @@ def download_url_to_local(url, filename, root_folder, md5):
|
|||
ensure_dir(root_folder)
|
||||
file_path = os.path.join(root_folder, filename)
|
||||
if check_file_exist(file_path, md5):
|
||||
print("Data file has been downloaded and verified")
|
||||
return
|
||||
else:
|
||||
try:
|
||||
print('Downloading ' + url + ' to ' + file_path)
|
||||
|
|
|
@ -22,49 +22,36 @@ from jittor.compiler import run_cmd
|
|||
from jittor_utils import translator
|
||||
import sys
|
||||
|
||||
jittor_path = os.path.realpath(os.path.join(jt.flags.jittor_path, "..", ".."))
|
||||
|
||||
polish_path = os.path.join(jittor_path, "..", "jittor-polish")
|
||||
polish_path = os.path.realpath(polish_path)
|
||||
build_path = polish_path + "/build"
|
||||
LOG.i("Polish path:", polish_path)
|
||||
if not os.path.isdir(polish_path):
|
||||
# create jittor-polish repo
|
||||
os.mkdir(polish_path)
|
||||
jittor_path = jt.flags.jittor_path
|
||||
root_path = os.path.realpath(os.path.join(jt.flags.jittor_path, "..", ".."))
|
||||
data_path = os.path.join(jittor_path, "src", "__data__")
|
||||
build_path = os.path.join(data_path, "build")
|
||||
if not os.path.isdir(build_path):
|
||||
os.mkdir(build_path)
|
||||
run_cmd("git init . && git remote add origin git@github.com:Jittor/Jittor.git", polish_path)
|
||||
status = run_cmd("git status", data_path)
|
||||
print(status)
|
||||
if "working tree clean" not in status:
|
||||
LOG.f("__data__ has untracked files")
|
||||
|
||||
# copy jittor src into it
|
||||
names = "extern notebook python script src README.md README.src.md README.cn.md LICENSE.txt setup.py .gitignore".split()
|
||||
for name in names:
|
||||
run_cmd(f"rsync -a {jittor_path}/{name} {polish_path}/")
|
||||
|
||||
git_version = run_cmd("git rev-parse HEAD", jittor_path)
|
||||
git_version = run_cmd("git rev-parse HEAD", data_path)
|
||||
LOG.i("git_version", git_version)
|
||||
run_cmd(f"git rev-parse HEAD > {polish_path}/python/jittor/version", jittor_path)
|
||||
|
||||
run_cmd(f"git rev-parse HEAD > {jittor_path}/version", data_path)
|
||||
|
||||
# remove files
|
||||
files = jt.compiler.files
|
||||
file_to_delete = [ name for name in files
|
||||
if name.startswith("src") and \
|
||||
len(name.split("/"))==2 and name.endswith("node.cc")
|
||||
data_files = [ name for name in files
|
||||
if "__data__" in name
|
||||
]
|
||||
LOG.i("file_to_delete", file_to_delete)
|
||||
run_cmd(f"rm {' '.join(file_to_delete)}", polish_path)
|
||||
LOG.i("data_files", data_files)
|
||||
|
||||
# commit jittor-polish
|
||||
run_cmd(f"git add .", polish_path)
|
||||
status = run_cmd(f"git status", polish_path)
|
||||
if "new file" not in status:
|
||||
LOG.i("Nothing change, exit...")
|
||||
else:
|
||||
run_cmd(f"git commit -a -m 'version {git_version}'", polish_path)
|
||||
|
||||
# compile delete files
|
||||
# compile data files
|
||||
from pathlib import Path
|
||||
home = str(Path.home())
|
||||
for cc_type in ["g++", "clang"]:
|
||||
for device in ["cpu", "cuda"]:
|
||||
# for cc_type in ["g++", "clang"]:
|
||||
# for device in ["cpu", "cuda"]:
|
||||
for cc_type in ["g++"]:
|
||||
for device in ["cpu"]:
|
||||
key = f"{git_version}-{cc_type}-{device}"
|
||||
env = f"cache_name=build/{cc_type}/{device} cc_path="
|
||||
cname = "g++" if cc_type=="g++" else "clang-8"
|
||||
|
@ -84,7 +71,7 @@ for cc_type in ["g++", "clang"]:
|
|||
|
||||
obj_path = home + f"/.cache/jittor/build/{cc_type}/{device}/{cname}/obj_files"
|
||||
obj_files = []
|
||||
for name in file_to_delete:
|
||||
for name in data_files:
|
||||
name = name.split("/")[-1]
|
||||
fname = f"{obj_path}/{name}.o"
|
||||
assert os.path.isfile(fname), fname
|
||||
|
@ -94,14 +81,17 @@ for cc_type in ["g++", "clang"]:
|
|||
# compress source
|
||||
# tar -cvzf build/jittor.tgz . --exclude build --exclude .git --exclude .ipynb_checkpoints --exclude __pycache__
|
||||
# mkdir -p jittor && tar -xvf ./jittor.tgz -C jittor
|
||||
assert os.system(f"cd {polish_path} && tar --exclude=build --exclude=.git --exclude=.ipynb_checkpoints --exclude=__pycache__ -cvzf build/jittor.tgz . ")==0
|
||||
assert os.system(f"cd {root_path} && tar --exclude=build --exclude=.git --exclude=.ipynb_checkpoints --exclude=__pycache__ --exclude=__data__ --exclude=my --exclude=dist --exclude=.vscode --exclude=.github -cvzf {build_path}/jittor.tgz * ")==0
|
||||
|
||||
# rsync to build-server
|
||||
jittor_web_base_dir = "Documents/jittor-blog/assets/"
|
||||
jittor_web_build_dir = jittor_web_base_dir + "build/"
|
||||
assert os.system(f"rsync -avPu {polish_path}/build/ jittor-web:{jittor_web_build_dir}")==0
|
||||
jittor_web_build_dir = jittor_web_base_dir
|
||||
assert os.system(f"rsync -avPu {build_path} jittor-web:{jittor_web_build_dir}")==0
|
||||
assert os.system(f"ssh jittor-web Documents/jittor-blog.git/hooks/post-update")==0
|
||||
|
||||
|
||||
# sys.exit(0)
|
||||
|
||||
# push to github
|
||||
# assert os.system(f"cd {polish_path} && git push -f origin master")==0
|
||||
|
||||
|
|
|
@ -179,6 +179,18 @@ pjmap = {
|
|||
'links': {},
|
||||
'extras': {'affine': 'None'},
|
||||
},
|
||||
'Parameter':{
|
||||
'pytorch': {
|
||||
'args': "data,require_grad=True"
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'jt',
|
||||
'name': 'array',
|
||||
'args': 'data,dtype=None',
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'Dropout2d': {
|
||||
'pytorch': {
|
||||
'args': 'p=0.5, inplace=False',
|
||||
|
@ -351,6 +363,32 @@ pjmap = {
|
|||
}
|
||||
}
|
||||
|
||||
unsupport_ops = [
|
||||
# ***************************************************************
|
||||
# torch.nn
|
||||
# ***************************************************************
|
||||
'ModuleDict', 'ParameterList', 'ParameterDict',
|
||||
'Conv1d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose3d', 'Unfold', 'Fold',
|
||||
'MaxPool1d', 'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'AvgPool1d',
|
||||
'AvgPool3d', 'FractionalMaxPool2d', 'LPPool1d', 'LPPool2d', 'AdaptiveMaxPool1d',
|
||||
'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool3d',
|
||||
'ReflectionPad1d', 'ReplicationPad1d', 'ReplicationPad3d', 'ConstantPad1d', 'ConstantPad3d',
|
||||
'ELU', 'Hardshrink', 'Hardtanh', 'LogSigmoid', 'MultiheadAttention',
|
||||
'RReLU', 'SELU', 'CELU', 'GELU', 'Softshrink', 'Softsign', 'Tanhshrink',
|
||||
'Threshold', 'Softmin', 'Softmax2d', 'LogSoftmax', 'AdaptiveLogSoftmaxWithLoss',
|
||||
'BatchNorm3d', 'SyncBatchNorm', 'InstanceNorm1d', 'InstanceNorm3d', 'LocalResponseNorm',
|
||||
'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell', 'Transformer', 'TransformerEncoder',
|
||||
'TransformerDecoder', 'TransformerEncoderLayer', 'TransformerDecoderLayer', 'Identity', 'Bilinear',
|
||||
'Dropout3d', 'AlphaDropout', 'EmbeddingBag', 'CosineSimilarity', 'PairwiseDistance', 'CTCLoss', 'NLLLoss', 'PoissonNLLLoss', 'KLDivLoss', 'BCEWithLogitsLoss',
|
||||
'MarginRankingLoss', 'HingeEmbeddingLoss', 'MultiLabelMarginLoss', 'SmoothL1Loss', 'SoftMarginLoss',
|
||||
'MultiLabelSoftMarginLoss', 'CosineEmbeddingLoss', 'MultiMarginLoss', 'TripletMarginLoss', 'UpsamplingNearest2d',
|
||||
'UpsamplingBilinear2d', 'DataParallel', 'DistributedDataParallel', 'clip_grad_norm_', 'clip_grad_value_',
|
||||
'parameters_to_vector', 'vector_to_parameters', 'BasePruningMethod', 'PruningContainer', 'Identity',
|
||||
'RandomUnstructured', 'L1Unstructured', 'RandomStructured', 'LnStructured', 'CustomFromMask', 'identity',
|
||||
'random_unstructured', 'l1_unstructured', 'random_structured', 'ln_structured', 'global_unstructured',
|
||||
'custom_from_mask', 'remove', 'is_pruned', 'weight_norm', 'remove_weight_norm', 'spectral_norm',
|
||||
'remove_spectral_norm', 'PackedSequence', 'pack_padded_sequence', 'pad_packed_sequence', 'pad_sequence', 'pack_sequence'
|
||||
]
|
||||
|
||||
def pjmap_append(pytorch_func_name, pytorch_args, jittor_func_module, jittor_func_name, jittor_args, extras=None, links=None, delete=None):
|
||||
''' adding map to pjmap for converting new function, example: convert AvgPool2d to Pool
|
||||
|
@ -393,58 +431,268 @@ def pjmap_append(pytorch_func_name, pytorch_args, jittor_func_module, jittor_fun
|
|||
'delete': delete,
|
||||
}
|
||||
|
||||
unsupport_ops = [
|
||||
# ***************************************************************
|
||||
# torch.nn
|
||||
# ***************************************************************
|
||||
'Parameter', 'ModuleDict', 'ParameterList', 'ParameterDict',
|
||||
'Conv1d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose3d', 'Unfold', 'Fold',
|
||||
'MaxPool1d', 'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'AvgPool1d',
|
||||
'AvgPool3d', 'FractionalMaxPool2d', 'LPPool1d', 'LPPool2d', 'AdaptiveMaxPool1d',
|
||||
'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool3d',
|
||||
'ReflectionPad1d', 'ReplicationPad1d', 'ReplicationPad3d', 'ConstantPad1d', 'ConstantPad3d',
|
||||
'ELU', 'Hardshrink', 'Hardtanh', 'LogSigmoid', 'MultiheadAttention',
|
||||
'RReLU', 'SELU', 'CELU', 'GELU', 'Softshrink', 'Softsign', 'Tanhshrink',
|
||||
'Threshold', 'Softmin', 'Softmax2d', 'LogSoftmax', 'AdaptiveLogSoftmaxWithLoss',
|
||||
'BatchNorm3d', 'SyncBatchNorm', 'InstanceNorm1d', 'InstanceNorm3d', 'LocalResponseNorm',
|
||||
'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell', 'Transformer', 'TransformerEncoder',
|
||||
'TransformerDecoder', 'TransformerEncoderLayer', 'TransformerDecoderLayer', 'Identity', 'Bilinear',
|
||||
'Dropout3d', 'AlphaDropout', 'EmbeddingBag', 'CosineSimilarity', 'PairwiseDistance', 'CTCLoss', 'NLLLoss', 'PoissonNLLLoss', 'KLDivLoss', 'BCEWithLogitsLoss',
|
||||
'MarginRankingLoss', 'HingeEmbeddingLoss', 'MultiLabelMarginLoss', 'SmoothL1Loss', 'SoftMarginLoss',
|
||||
'MultiLabelSoftMarginLoss', 'CosineEmbeddingLoss', 'MultiMarginLoss', 'TripletMarginLoss', 'UpsamplingNearest2d',
|
||||
'UpsamplingBilinear2d', 'DataParallel', 'DistributedDataParallel', 'clip_grad_norm_', 'clip_grad_value_',
|
||||
'parameters_to_vector', 'vector_to_parameters', 'BasePruningMethod', 'PruningContainer', 'Identity',
|
||||
'RandomUnstructured', 'L1Unstructured', 'RandomStructured', 'LnStructured', 'CustomFromMask', 'identity',
|
||||
'random_unstructured', 'l1_unstructured', 'random_structured', 'ln_structured', 'global_unstructured',
|
||||
'custom_from_mask', 'remove', 'is_pruned', 'weight_norm', 'remove_weight_norm', 'spectral_norm',
|
||||
'remove_spectral_norm', 'PackedSequence', 'pack_padded_sequence', 'pad_packed_sequence', 'pad_sequence', 'pack_sequence'
|
||||
]
|
||||
|
||||
support_ops = {}
|
||||
for key in pjmap.keys():
|
||||
module = pjmap[key]['jittor']['module']
|
||||
name = pjmap[key]['jittor']['name']
|
||||
if module == 'nn':
|
||||
support_ops[key] = name
|
||||
def raise_unsupport(name, ori_src):
|
||||
ret = f"raise RuntimeError('''original source: <{ori_src.strip()}>, {name} is not supported in Jittor yet. We will appreciate it if you provide an implementation of {name} and make pull request at https://github.com/Jittor/jittor.''')"
|
||||
print(ret+'\n')
|
||||
ret = ast.parse(ret).body[0]
|
||||
return ret
|
||||
|
||||
def raise_unsupport(name):
|
||||
raise RuntimeError(f'{name} is not supported in Jittor yet. We will appreciate it if you provide an implementation of {name} and make pull request at https://github.com/Jittor/jittor.')
|
||||
class Converter:
|
||||
def __init__(self, ex_pjmap):
|
||||
import copy
|
||||
self.pjmap = copy.deepcopy(pjmap)
|
||||
if ex_pjmap:
|
||||
self.pjmap.update(ex_pjmap)
|
||||
self.unsupport_ops = set(unsupport_ops)
|
||||
support_ops = {}
|
||||
for key in self.pjmap.keys():
|
||||
module = self.pjmap[key]['jittor']['module']
|
||||
name = self.pjmap[key]['jittor']['name']
|
||||
if module == 'nn':
|
||||
support_ops[key] = name
|
||||
if key in self.unsupport_ops:
|
||||
self.unsupport_ops.remove(key)
|
||||
self.support_ops = support_ops
|
||||
self.import_flag = []
|
||||
|
||||
def replace(a):
|
||||
if hasattr(a, "attr") and a.attr in unsupport_ops:
|
||||
raise_unsupport(a.attr)
|
||||
|
||||
if hasattr(a, "id") and a.id in unsupport_ops:
|
||||
raise_unsupport(a.id)
|
||||
|
||||
if hasattr(a, "attr"):
|
||||
if a.attr in support_ops.keys(): a.attr = support_ops[a.attr]
|
||||
def replace(self, a):
|
||||
if hasattr(a, "attr") and a.attr in self.unsupport_ops:
|
||||
ori_src = astunparse.unparse(a)
|
||||
return raise_unsupport(a.attr, ori_src)
|
||||
|
||||
if hasattr(a, "id"):
|
||||
if a.id in support_ops.keys(): a.id = support_ops[a.id]
|
||||
if hasattr(a, "id") and a.id in self.unsupport_ops:
|
||||
ori_src = astunparse.unparse(a)
|
||||
return raise_unsupport(a.id, ori_src)
|
||||
|
||||
import_flag = []
|
||||
def convert(code):
|
||||
if hasattr(a, "attr"):
|
||||
if a.attr in self.support_ops.keys(): a.attr = self.support_ops[a.attr]
|
||||
|
||||
if hasattr(a, "id"):
|
||||
if a.id in self.support_ops.keys(): a.id = self.support_ops[a.id]
|
||||
|
||||
return None
|
||||
|
||||
def convert_(self, prefix, func_name, ags, kws, ori_src):
|
||||
info = self.pjmap[func_name]
|
||||
p_prefix = info['pytorch']['prefix'] if 'prefix' in info['pytorch'].keys() else None
|
||||
if p_prefix is not None and prefix in p_prefix:
|
||||
p_ags = info['pytorch']['args_prefix']
|
||||
j_ags = info['jittor']['args_prefix']
|
||||
else:
|
||||
p_ags = info['pytorch']['args']
|
||||
j_ags = info['jittor']['args']
|
||||
if 'delete' in info.keys():
|
||||
delete = info['delete']
|
||||
else:
|
||||
delete = None
|
||||
j_prefix = info['jittor']['prefix'] if 'prefix' in info['jittor'].keys() else None
|
||||
j_module = info['jittor']['module']
|
||||
j_name = info['jittor']['name']
|
||||
links = info['links']
|
||||
extras = info['extras']
|
||||
jj_ags = []
|
||||
jj_kws = {}
|
||||
pp_ags = []
|
||||
pp_kws = {}
|
||||
if j_ags == '' and p_ags == '':
|
||||
# no args in Pytorch and Jittor.
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}()"
|
||||
else:
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}()"
|
||||
else:
|
||||
return f"{prefix}.{j_name}()"
|
||||
else:
|
||||
j_ags = j_ags.replace(' ','').split(',')
|
||||
for j_ag in j_ags:
|
||||
if '=' in j_ag:
|
||||
k,v = j_ag.split('=')
|
||||
jj_kws[k] = v
|
||||
else:
|
||||
jj_ags.append(j_ag)
|
||||
p_ags = p_ags.replace(' ','').split(',')
|
||||
for p_ag in p_ags:
|
||||
if '=' in p_ag:
|
||||
k,v = p_ag.split('=')
|
||||
pp_kws[k] = v
|
||||
else:
|
||||
pp_ags.append(p_ag)
|
||||
if len(jj_ags) == 0 and len(pp_ags) != 0:
|
||||
return f"raise AttributeError('''origin source: <{ori_src.strip()}>, {func_name} in Jittor has no Attribute {pp_ags[0]}''')"
|
||||
# raise AttributeError(f"{func_name} in Jittor has no Attribute {pp_ags[0]}")
|
||||
if delete is not None:
|
||||
for d in delete:
|
||||
if d in pp_ags:
|
||||
jj_ags.append(d)
|
||||
if d in pp_kws.keys():
|
||||
jj_kws[d] = None
|
||||
if len(pp_ags) > len(ags) + len(kws):
|
||||
return f"raise RuntimeError('''origin source: <{ori_src.strip()}>, There are needed {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you only provide {len(ags) + len(kws)}''')"
|
||||
# raise RuntimeError(f'There are needed {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you only provide {len(ags) + len(kws)}')
|
||||
ags_ = []
|
||||
for i in range(len(pp_ags)):
|
||||
if i < len(ags):
|
||||
if '*' in pp_ags[i]:
|
||||
ags_.append('(' + ', '.join(ags[i:]) + ')')
|
||||
ags = ags_
|
||||
break
|
||||
else:
|
||||
ags_.append(ags[i])
|
||||
else:
|
||||
break
|
||||
if len(pp_ags) + len(list(pp_kws.keys())) < len(ags) + len(kws):
|
||||
return f"raise RuntimeError('''origin source: <{ori_src.strip()}>,There are only {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you provide {len(ags) + len(kws)}''')"
|
||||
# raise RuntimeError(f'There are only {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you provide {len(ags) + len(kws)}')
|
||||
j_ags_flag = np.zeros(len(jj_ags))
|
||||
j_ags_values = {}
|
||||
j_kws_values = {}
|
||||
for i,ag in enumerate(ags):
|
||||
if len(pp_ags) == 0:
|
||||
ag_name = list(pp_kws.keys())[i]
|
||||
elif i < len(pp_ags):
|
||||
ag_name = pp_ags[i]
|
||||
elif i >= len(pp_ags) and (i-len(pp_ags)) <= len(list(pp_kws.keys())):
|
||||
ag_name = list(pp_kws.keys())[i-len(pp_ags)]
|
||||
else:
|
||||
return f"raise RuntimeError('''origin source: <{ori_src.strip()}>,The args number is not matc{func_name} in Jittor has no Attribute {ag_name}''')"
|
||||
# raise RuntimeError(f'The args number is not matc{func_name} in Jittor has no Attribute {ag_name}')
|
||||
if ag_name in links.keys():
|
||||
ag_name = links[ag_name]
|
||||
if ag_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(ag_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(ag_name))] = ag
|
||||
elif ag_name in jj_kws.keys():
|
||||
j_kws_values[ag_name] = ag
|
||||
else:
|
||||
return f"raise AttributeError('''origin source: <{ori_src.strip()}>, {func_name} in Jittor has no Attribute {ag_name}''')"
|
||||
# raise AttributeError(f'{func_name} in Jittor has no Attribute {ag_name}')
|
||||
for i,kw in enumerate(kws):
|
||||
kw_name, kw_value = kw.split('=')
|
||||
if kw_name in links.keys():
|
||||
kw_name = links[kw_name]
|
||||
if kw_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(kw_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(kw_name))] = kw_value
|
||||
elif kw_name in jj_kws.keys():
|
||||
j_kws_values[kw_name] = kw_value
|
||||
else:
|
||||
return f"raise AttributeError('''origin source: <{ori_src.strip()}>, {func_name} in Jittor has no Attribute {kw_name}''')"
|
||||
# raise AttributeError(f'{func_name} in Jittor has no Attribute {kw_name}')
|
||||
len_jj_ags = len(jj_ags) if len(jj_ags) == 0 or jj_ags[0] != '' else 0
|
||||
if j_ags_flag.sum() < len_jj_ags:
|
||||
missing_args = []
|
||||
for i in range(len(jj_ags)):
|
||||
if j_ags_flag[i] == 0:
|
||||
missing_args.append(jj_ags[i])
|
||||
return f"raise AttributeError('''origin source: <{ori_src.strip()}>, the needed args of {func_name} in Jittor is {', '.join(jj_ags)}, so you need to give value of {', '.join(missing_args)}.''')"
|
||||
# raise AttributeError(f"the needed args of {func_name} in Jittor is {', '.join(jj_ags)}, so you need to give value of {', '.join(missing_args)}.")
|
||||
if extras:
|
||||
for k in extras.keys():
|
||||
if k in jj_ags:
|
||||
j_ags_values[str(jj_ags.index(k))] = extras[k]
|
||||
elif k in jj_kws.keys():
|
||||
j_kws_values[k] = extras[k]
|
||||
else:
|
||||
return f"raise AttributeError('''origin source: <{ori_src.strip()}>, there is not attribute named {k} in Jittor {func_name}, you should delete it in {func_name} extras.''')"
|
||||
# raise AttributeError(f"there is not attribute named {k} in Jittor {func_name}, you should delete it in {func_name} extras.")
|
||||
if delete is not None:
|
||||
for d in delete:
|
||||
if d in j_ags_values:
|
||||
del j_ags_values[d]
|
||||
if d in j_kws_values.keys():
|
||||
j_kws_values.pop(d)
|
||||
j_ags_ = [j_ags_values[str(i)] for i in range(len(list(j_ags_values.keys())))]
|
||||
j_kws_ = [key + "=" + j_kws_values[key] for key in j_kws_values.keys()]
|
||||
j_func = f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
return f"{prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
return j_func
|
||||
|
||||
def dfs(self, a):
|
||||
if isinstance(a, ast.Import):
|
||||
if 'torch' in astunparse.unparse(a) and 'init' in astunparse.unparse(a):
|
||||
self.import_flag.append('init')
|
||||
return ast.parse('from jittor import init').body[0]
|
||||
if 'torch' in astunparse.unparse(a) and a.names[0].asname == 'nn':
|
||||
self.import_flag.append('nn')
|
||||
return ast.parse('from jittor import nn').body[0]
|
||||
if 'torch' in a.names[0].name:
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.ImportFrom):
|
||||
if 'torch' in a.module:
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.Call):
|
||||
for idx, ag in enumerate(a.args):
|
||||
ret = self.dfs(ag)
|
||||
if ret is not None:
|
||||
a.args[idx] = ret
|
||||
for idx, kw in enumerate(a.keywords):
|
||||
ret = self.dfs(kw)
|
||||
if ret is not None:
|
||||
a.keywords[idx] = ret
|
||||
ori_src = astunparse.unparse(a)
|
||||
func = astunparse.unparse(a.func).strip('\n').split('.')
|
||||
prefix = '.'.join(func[0:-1])
|
||||
func_name = func[-1]
|
||||
if func_name in self.unsupport_ops:
|
||||
ret = raise_unsupport(func_name, ori_src)
|
||||
return ret
|
||||
if func_name in self.pjmap:
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
kws = [astunparse.unparse(kw).strip('\n') for kw in a.keywords]
|
||||
ret = self.convert_(prefix, func_name, ags, kws, ori_src)
|
||||
ret_tmp = ret
|
||||
ret = ast.parse(ret).body[0]
|
||||
if hasattr(ret,'value'):
|
||||
return ret.value
|
||||
else:
|
||||
print(ret_tmp+'\n')
|
||||
return ret
|
||||
if ".load_state_dict" in astunparse.unparse(a.func):
|
||||
a.func.attr = 'load_parameters'
|
||||
if astunparse.unparse(a.func).strip('\n').endswith(".size"):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
if len(ags) != 0:
|
||||
con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']'
|
||||
else:
|
||||
con = astunparse.unparse(a.func).replace('size', 'shape')
|
||||
return ast.parse(con).body[0].value
|
||||
elif isinstance(a, ast.Expr): pass
|
||||
elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name):
|
||||
ret = self.replace(a)
|
||||
if ret is not None:
|
||||
print(ret)
|
||||
return ret
|
||||
elif isinstance(a, ast.FunctionDef):
|
||||
if a.name == 'forward': a.name = 'execute'
|
||||
if hasattr(a, '__dict__'):
|
||||
for k in a.__dict__.keys():
|
||||
if isinstance(a.__dict__[k], list):
|
||||
delete_flag = []
|
||||
for i,a_ in enumerate(a.__dict__[k]):
|
||||
ret = self.dfs(a_)
|
||||
if ret == 'delete':
|
||||
delete_flag.append(True)
|
||||
continue
|
||||
if ret is not None:
|
||||
a.__dict__[k][i] = ret
|
||||
delete_flag.append(False)
|
||||
tmp = [a_ for i,a_ in enumerate(a.__dict__[k]) if delete_flag[i] == False]
|
||||
a.__dict__[k] = tmp
|
||||
else:
|
||||
ret = self.dfs(a.__dict__[k])
|
||||
if ret is not None:
|
||||
a.__dict__[k] = ret
|
||||
|
||||
|
||||
def convert(code, ex_pjmaps=None):
|
||||
''' Model code converter, example:
|
||||
|
||||
from jittor.utils.pytorch_converter import convert
|
||||
|
@ -469,209 +717,13 @@ def convert(code):
|
|||
model = Model()
|
||||
print("## Jittor model:", model)
|
||||
'''
|
||||
|
||||
a = ast.parse(code)
|
||||
dfs(a)
|
||||
converter = Converter(ex_pjmaps)
|
||||
converter.dfs(a)
|
||||
a.body.insert(0, ast.parse('import jittor as jt').body[0])
|
||||
if 'init' not in import_flag:
|
||||
if 'init' not in converter.import_flag:
|
||||
a.body.insert(1, ast.parse('from jittor import init').body[0])
|
||||
if 'nn' not in import_flag:
|
||||
if 'nn' not in converter.import_flag:
|
||||
a.body.insert(2, ast.parse('from jittor import nn').body[0])
|
||||
return astunparse.unparse(a)
|
||||
|
||||
def convert_(prefix, func_name, ags, kws):
|
||||
info = pjmap[func_name]
|
||||
p_prefix = info['pytorch']['prefix'] if 'prefix' in info['pytorch'].keys() else None
|
||||
if p_prefix is not None and prefix in p_prefix:
|
||||
p_ags = info['pytorch']['args_prefix']
|
||||
j_ags = info['jittor']['args_prefix']
|
||||
else:
|
||||
p_ags = info['pytorch']['args']
|
||||
j_ags = info['jittor']['args']
|
||||
if 'delete' in info.keys():
|
||||
delete = info['delete']
|
||||
else:
|
||||
delete = None
|
||||
j_prefix = info['jittor']['prefix'] if 'prefix' in info['jittor'].keys() else None
|
||||
j_module = info['jittor']['module']
|
||||
j_name = info['jittor']['name']
|
||||
links = info['links']
|
||||
extras = info['extras']
|
||||
jj_ags = []
|
||||
jj_kws = {}
|
||||
pp_ags = []
|
||||
pp_kws = {}
|
||||
if j_ags == '' and p_ags == '':
|
||||
# no args in Pytorch and Jittor.
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}()"
|
||||
else:
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}()"
|
||||
else:
|
||||
return f"{prefix}.{j_name}()"
|
||||
else:
|
||||
j_ags = j_ags.replace(' ','').split(',')
|
||||
for j_ag in j_ags:
|
||||
if '=' in j_ag:
|
||||
k,v = j_ag.split('=')
|
||||
jj_kws[k] = v
|
||||
else:
|
||||
jj_ags.append(j_ag)
|
||||
p_ags = p_ags.replace(' ','').split(',')
|
||||
for p_ag in p_ags:
|
||||
if '=' in p_ag:
|
||||
k,v = p_ag.split('=')
|
||||
pp_kws[k] = v
|
||||
else:
|
||||
pp_ags.append(p_ag)
|
||||
if len(jj_ags) == 0 and len(pp_ags) != 0:
|
||||
raise AttributeError(f"{func_name} in Jittor has no Attribute {pp_ags[0]}")
|
||||
if delete is not None:
|
||||
for d in delete:
|
||||
if d in pp_ags:
|
||||
jj_ags.append(d)
|
||||
if d in pp_kws.keys():
|
||||
jj_kws[d] = None
|
||||
if len(pp_ags) > len(ags) + len(kws):
|
||||
raise RuntimeError(f'There are needed {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you only provide {len(ags) + len(kws)}')
|
||||
ags_ = []
|
||||
for i in range(len(pp_ags)):
|
||||
if i < len(ags):
|
||||
if '*' in pp_ags[i]:
|
||||
ags_.append('(' + ', '.join(ags[i:]) + ')')
|
||||
ags = ags_
|
||||
break
|
||||
else:
|
||||
ags_.append(ags[i])
|
||||
else:
|
||||
break
|
||||
if len(pp_ags) + len(list(pp_kws.keys())) < len(ags) + len(kws):
|
||||
raise RuntimeError(f'There are only {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you provide {len(ags) + len(kws)}')
|
||||
j_ags_flag = np.zeros(len(jj_ags))
|
||||
j_ags_values = {}
|
||||
j_kws_values = {}
|
||||
for i,ag in enumerate(ags):
|
||||
if len(pp_ags) == 0:
|
||||
ag_name = list(pp_kws.keys())[i]
|
||||
elif i < len(pp_ags):
|
||||
ag_name = pp_ags[i]
|
||||
elif i >= len(pp_ags) and (i-len(pp_ags)) <= len(list(pp_kws.keys())):
|
||||
ag_name = list(pp_kws.keys())[i-len(pp_ags)]
|
||||
else:
|
||||
raise RuntimeError(f'The args number is not matc{func_name} in Jittor has no Attribute {ag_name}')
|
||||
if ag_name in links.keys():
|
||||
ag_name = links[ag_name]
|
||||
if ag_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(ag_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(ag_name))] = ag
|
||||
elif ag_name in jj_kws.keys():
|
||||
j_kws_values[ag_name] = ag
|
||||
else:
|
||||
raise AttributeError(f'{func_name} in Jittor has no Attribute {ag_name}')
|
||||
for i,kw in enumerate(kws):
|
||||
kw_name, kw_value = kw.split('=')
|
||||
if kw_name in links.keys():
|
||||
kw_name = links[kw_name]
|
||||
if kw_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(kw_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(kw_name))] = kw_value
|
||||
elif kw_name in jj_kws.keys():
|
||||
j_kws_values[kw_name] = kw_value
|
||||
else:
|
||||
raise AttributeError(f'{func_name} in Jittor has no Attribute {kw_name}')
|
||||
len_jj_ags = len(jj_ags) if len(jj_ags) == 0 or jj_ags[0] != '' else 0
|
||||
if j_ags_flag.sum() < len_jj_ags:
|
||||
missing_args = []
|
||||
for i in range(len(jj_ags)):
|
||||
if j_ags_flag[i] == 0:
|
||||
missing_args.append(jj_ags[i])
|
||||
raise AttributeError(f"the needed args of {func_name} in Jittor is {', '.join(jj_ags)}, so you need to give value of {', '.join(missing_args)}.")
|
||||
if extras:
|
||||
for k in extras.keys():
|
||||
if k in jj_ags:
|
||||
j_ags_values[str(jj_ags.index(k))] = extras[k]
|
||||
elif k in jj_kws.keys():
|
||||
j_kws_values[k] = extras[k]
|
||||
else:
|
||||
raise AttributeError(f"there is not attribute named {k} in Jittor {func_name}, you should delete it in {func_name} extras.")
|
||||
if delete is not None:
|
||||
for d in delete:
|
||||
if d in j_ags_values:
|
||||
j_ags_values.remove(d)
|
||||
if d in j_kws_values.keys():
|
||||
j_kws_values.pop(d)
|
||||
j_ags_ = [j_ags_values[str(i)] for i in range(len(list(j_ags_values.keys())))]
|
||||
j_kws_ = [key + "=" + j_kws_values[key] for key in j_kws_values.keys()]
|
||||
j_func = f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
return f"{prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
return j_func
|
||||
|
||||
def dfs(a):
|
||||
if isinstance(a, ast.Import):
|
||||
if 'torch' in astunparse.unparse(a) and 'init' in astunparse.unparse(a):
|
||||
import_flag.append('init')
|
||||
return ast.parse('from jittor import init').body[0]
|
||||
if 'torch' in astunparse.unparse(a) and a.names[0].asname == 'nn':
|
||||
import_flag.append('nn')
|
||||
return ast.parse('from jittor import nn').body[0]
|
||||
if 'torch' in a.names[0].name:
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.ImportFrom):
|
||||
if 'torch' in a.module:
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.Call):
|
||||
for idx, ag in enumerate(a.args):
|
||||
ret = dfs(ag)
|
||||
if ret is not None:
|
||||
a.args[idx] = ret
|
||||
for idx, kw in enumerate(a.keywords):
|
||||
ret = dfs(kw)
|
||||
if ret is not None:
|
||||
a.keywords[idx] = ret
|
||||
func = astunparse.unparse(a.func).strip('\n').split('.')
|
||||
prefix = '.'.join(func[0:-1])
|
||||
func_name = func[-1]
|
||||
if func_name in unsupport_ops:
|
||||
raise_unsupport(func_name)
|
||||
if func_name in pjmap.keys():
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
kws = [astunparse.unparse(kw).strip('\n') for kw in a.keywords]
|
||||
ret = convert_(prefix, func_name, ags, kws)
|
||||
return ast.parse(ret).body[0].value
|
||||
if ".load_state_dict" in astunparse.unparse(a.func):
|
||||
a.func.attr = 'load_parameters'
|
||||
if astunparse.unparse(a.func).strip('\n').endswith(".size"):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
if len(ags) != 0:
|
||||
con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']'
|
||||
else:
|
||||
con = astunparse.unparse(a.func).replace('size', 'shape')
|
||||
return ast.parse(con).body[0].value
|
||||
elif isinstance(a, ast.Expr): pass
|
||||
elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name): replace(a)
|
||||
elif isinstance(a, ast.FunctionDef):
|
||||
if a.name == 'forward': a.name = 'execute'
|
||||
if hasattr(a, '__dict__'):
|
||||
for k in a.__dict__.keys():
|
||||
if isinstance(a.__dict__[k], list):
|
||||
delete_flag = []
|
||||
for i,a_ in enumerate(a.__dict__[k]):
|
||||
ret = dfs(a_)
|
||||
if ret is 'delete':
|
||||
delete_flag.append(True)
|
||||
continue
|
||||
if ret is not None:
|
||||
a.__dict__[k][i] = ret
|
||||
delete_flag.append(False)
|
||||
tmp = [a_ for i,a_ in enumerate(a.__dict__[k]) if delete_flag[i] == False]
|
||||
a.__dict__[k] = tmp
|
||||
else:
|
||||
ret = dfs(a.__dict__[k])
|
||||
if ret is not None:
|
||||
a.__dict__[k] = ret
|
|
@ -1 +1 @@
|
|||
f9e290160bead0d5892754da56b9ad63bc316320
|
||||
84596508776983dce645fc4ef77c7f35700549d5
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
cat > /tmp/converter_server.dockerfile <<\EOF
|
||||
FROM jittor/jittor
|
||||
|
||||
RUN python3.7 -m pip install flask
|
||||
RUN apt update && apt install git -y
|
||||
EOF
|
||||
|
||||
docker build --tag jittor/converter_server -f /tmp/converter_server.dockerfile .
|
||||
|
||||
# docker run --rm -it -m 16g --cpus=8 -p 0.0.0.0:5000:5000 jittor/converter_server bash -c "python3.7 -m pip install -U git+https://github.com/Jittor/jittor.git && python3.7 -m jittor.utils.converter_server"
|
||||
while true; do
|
||||
timeout --foreground 24h docker run --rm -it -m 16g --cpus=8 -p 0.0.0.0:5000:5000 jittor/converter_server bash -c "python3.7 -m pip install -U git+https://github.com/Jittor/jittor.git && python3.7 -m jittor.utils.converter_server"
|
||||
sleep 10
|
||||
done
|
|
@ -7,7 +7,7 @@
|
|||
#include <functional>
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator/cuda_dual_allocator.h"
|
||||
#include "event_queue.h"
|
||||
#endif
|
||||
|
@ -41,29 +41,46 @@ void load_fused_op(FusedOp& fused_op, vector<int>& fuse_ops, vector<Op*>& ops, i
|
|||
op->tflag = ntt;
|
||||
fused_op.ops.push_back(op);
|
||||
}
|
||||
for (Op* op : fused_op.ops) {
|
||||
uint fid1 = op->custom_data;
|
||||
uint oid = 0;
|
||||
for (Var* v : op->outputs()) {
|
||||
oid++;
|
||||
if (v->tflag != tt) {
|
||||
// this var node not belong to current execution
|
||||
// this will happend in multiple outputs fuseable op
|
||||
// v->custom_data = 0 represents this var cannot be fused
|
||||
v->custom_data = 0;
|
||||
continue;
|
||||
}
|
||||
for (auto o : v->outputs_with_index()) {
|
||||
Op* op2 = o.op;
|
||||
uint iid = o.index;
|
||||
if (op2->tflag != ntt) continue;
|
||||
uint fid2 = op2->custom_data;
|
||||
fused_op.edges.emplace_back(fid1, oid-1, fid2, iid);
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGvvv << "Prepare fused_op" << fused_op.ops;
|
||||
fused_op.update_ops();
|
||||
for (Op* op : fused_op.ops) {
|
||||
uint fid1 = op->custom_data;
|
||||
int iid = 0;
|
||||
for (Var* v : op->inputs()) {
|
||||
iid++;
|
||||
int iop_id;
|
||||
int iv_id;
|
||||
if (v->_inputs.size() && v->input()->tflag == ntt) {
|
||||
auto e = v->_inputs.front();
|
||||
iop_id = e.node->custom_data;
|
||||
iv_id = e.back->index;
|
||||
} else {
|
||||
iv_id = v->custom_data >> 2;
|
||||
// add iv_id, prevent iv_id jit key overflow
|
||||
iop_id = fused_op.ops.size() + iv_id;
|
||||
}
|
||||
fused_op.edges.emplace_back(iop_id, iv_id, fid1, iid-1);
|
||||
}
|
||||
// TODO: can we remove this?
|
||||
// uint oid = 0;
|
||||
// for (Var* v : op->outputs()) {
|
||||
// oid++;
|
||||
// if (v->tflag != tt) {
|
||||
// // this var node not belong to current execution
|
||||
// // this will happend in multiple outputs fuseable op
|
||||
// // v->custom_data = 0 represents this var cannot be fused
|
||||
// v->custom_data = 0;
|
||||
// continue;
|
||||
// }
|
||||
// // for (auto o : v->outputs_with_index()) {
|
||||
// // Op* op2 = o.op;
|
||||
// // uint iid = o.index;
|
||||
// // if (op2->tflag != ntt) continue;
|
||||
// // uint fid2 = op2->custom_data;
|
||||
// // fused_op.edges.emplace_back(fid1, oid-1, fid2, iid);
|
||||
// // }
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
void Executor::run_sync(vector<Var*> vars, bool device_sync) {
|
||||
|
@ -429,6 +446,10 @@ void Executor::run_sync(vector<Var*> vars, bool device_sync) {
|
|||
// record trace data
|
||||
if (PREDICT_BRANCH_NOT_TAKEN(trace_py_var==2)) {
|
||||
trace_data.record_execution(op, is_fused_op, jkl);
|
||||
#ifdef HAS_CUDA
|
||||
if (use_cuda)
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
#endif
|
||||
}
|
||||
LOGvvv << "Finished Op(" >> op->name() << rid >>
|
||||
"/" >> queue.size() >> ") output:" << op->outputs();
|
||||
|
|
|
@ -7,10 +7,6 @@
|
|||
#pragma once
|
||||
#include "common.h"
|
||||
#include "mem/allocator.h"
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#endif
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
|
196
src/fuser.cc
196
src/fuser.cc
|
@ -1,196 +0,0 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. Authors:
|
||||
// Guowei Yang <471184555@qq.com>
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include "fuser.h"
|
||||
#include "var.h"
|
||||
#include "op.h"
|
||||
#include "mem/allocator.h"
|
||||
#include "graph.h"
|
||||
#include "fused_op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#define PREVENT_LARGE_FUSED_OP 16
|
||||
|
||||
void count_fuse(int64_t tt, int start_var_num, const vector<Op*>& ops, const vector<Var*>& vars, vector<int> &father, vector<int> &var_fused) {
|
||||
vector<int> dis(ops.size(), -1);
|
||||
|
||||
auto find_fa = [&](int i) -> int {
|
||||
int j=i;
|
||||
while (father[j] != j) j = father[j];
|
||||
while (i != j) {
|
||||
int tmp = father[i];
|
||||
father[i] = j;
|
||||
i = tmp;
|
||||
}
|
||||
return j;
|
||||
};
|
||||
|
||||
auto can_fuse = [&](Var* v, Op* op1, Op* op2, int fuse_type) -> bool {
|
||||
if (v->flags.get(NodeFlags::_stop_fuse))
|
||||
return false;
|
||||
if (fuse_type == 1) {
|
||||
// if v is output, do not fuse
|
||||
if (v->custom_data < start_var_num)
|
||||
return false;
|
||||
// op2 ---> v ---> op1
|
||||
if (op1->type() == OpType::other || op2->type() == OpType::other)
|
||||
return false;
|
||||
if (v->flags.get(NodeFlags::_force_fuse))
|
||||
return true;
|
||||
// Do not fuse op after reduce(has reduce)
|
||||
// TODO: better fuse strategy
|
||||
if (op2->type() == OpType::reduce)
|
||||
return false;
|
||||
// Do not fuse op before broadcast
|
||||
// TODO: better fuse strategy
|
||||
if (op1->type() == OpType::broadcast)
|
||||
return false;
|
||||
return op2->type() == OpType::element ||
|
||||
op2->type() == OpType::broadcast;
|
||||
} else if (fuse_type == 0) {
|
||||
#ifdef PREVENT_LARGE_FUSED_OP
|
||||
// This statement prevent fuse large ops
|
||||
if (v->outputs().size()>=PREVENT_LARGE_FUSED_OP) return false;
|
||||
#endif
|
||||
|
||||
// v ---> op1
|
||||
// |
|
||||
// +----> op2 ( prev of op1 )
|
||||
if (op1->type() == OpType::other || op2->type() == OpType::other)
|
||||
return false;
|
||||
// Do not fuse op after reduce(has reduce)
|
||||
// TODO: better fuse strategy
|
||||
if (op2->type() == OpType::broadcast || op1->type() == OpType::broadcast)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
auto for_each_edge = [&](Op* op, int forward, auto&& func){
|
||||
auto e=op->_inputs.begin();
|
||||
for (Var* v : op->inputs()) {
|
||||
if ((forward && (*e).back!=std::prev(v->_outputs.end())) ||
|
||||
(!forward && (*e).back!=v->_outputs.begin())){
|
||||
Op* next_op = forward ? std::next((*e).back)->node->op() : std::prev((*e).back)->node->op();
|
||||
if (next_op && next_op->tflag==tt
|
||||
&& next_op->custom_data != op->custom_data
|
||||
&& can_fuse(v, next_op, op, 0))
|
||||
func(v, next_op, 0);
|
||||
}
|
||||
e = std::next(e);
|
||||
}
|
||||
|
||||
if (forward) {
|
||||
for (Var* sv : op->outputs())
|
||||
if (sv && sv->tflag == tt)
|
||||
for (Op* next_op: sv->outputs())
|
||||
if (next_op && next_op->tflag==tt) func(sv, next_op, 1);
|
||||
} else {
|
||||
for (Var* sv : op->inputs())
|
||||
if (sv && sv->tflag == tt) func(sv, sv->input(), 1);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
vector<int> queue;
|
||||
vector<int> deps;
|
||||
deps.reserve(ops.size());
|
||||
queue.reserve(ops.size());
|
||||
for (uint i=0; i<ops.size(); i++) {
|
||||
deps.push_back(0);
|
||||
Op* op = ops[i];
|
||||
|
||||
for_each_edge(op, 1, [&](Var* v, Op* next_op, int real_edge) {
|
||||
deps[i]++;
|
||||
});
|
||||
|
||||
if (!deps[i]) {
|
||||
queue.push_back(i);
|
||||
dis[i]=0;
|
||||
}
|
||||
}
|
||||
|
||||
uint head=0;
|
||||
while (head<queue.size()) {
|
||||
int op_id=queue[head++];
|
||||
Op* op = ops[op_id];
|
||||
|
||||
for_each_edge(op, 1, [&](Var* v, Op* next_op, int real_edge) {
|
||||
int next_id = next_op->custom_data;
|
||||
if (dis[next_id] == dis[op_id]){
|
||||
int next_fa = find_fa(next_id);
|
||||
father[next_fa] = op_id;
|
||||
}
|
||||
});
|
||||
|
||||
for_each_edge(op, 0, [&](Var* v, Op* next_op, int real_edge) {
|
||||
int next_id = next_op->custom_data;
|
||||
int lon=0;
|
||||
if (real_edge && !can_fuse(v, op, next_op, 1)) lon=1;
|
||||
if (dis[op_id]+lon>dis[next_id])
|
||||
dis[next_id]=dis[op_id]+lon;
|
||||
if (!--deps[next_id]) queue.push_back(next_id);
|
||||
});
|
||||
}
|
||||
|
||||
if (V_ON(1000)) {
|
||||
for (uint i=0; i<ops.size(); i++)
|
||||
LOGvvvv << ops[i] << dis[i] << deps[i];
|
||||
}
|
||||
for (uint i=0; i<vars.size(); i++) {
|
||||
Var* v = vars[i];
|
||||
if (!v || v->tflag!=tt) {
|
||||
var_fused[i]=1;
|
||||
continue;
|
||||
}
|
||||
// sf: input op's father id
|
||||
int sf = -1;
|
||||
// vf: is input op can be fused with all output op
|
||||
int vf = 1;
|
||||
// all outputs are reduce
|
||||
int all_reduce = 1;
|
||||
Op* iop = v->input();
|
||||
// if (iop && iop->tflag==tt)
|
||||
sf = find_fa(iop->custom_data);
|
||||
|
||||
for (Op* sop : v->outputs())
|
||||
if (sop->tflag==tt) {
|
||||
if (vf && !can_fuse(v,sop,iop,1))
|
||||
vf = 0;
|
||||
if (sop->type()!=OpType::reduce)
|
||||
all_reduce = 0;
|
||||
// in two different fused op
|
||||
if (find_fa(sop->custom_data)!=sf) {
|
||||
var_fused[i]=1;
|
||||
}
|
||||
}
|
||||
if (vf==0)
|
||||
// cannot fused
|
||||
var_fused[i]=1;
|
||||
else if (var_fused[i]) {
|
||||
if (iop->type()==OpType::broadcast ||
|
||||
all_reduce ||
|
||||
v->flags.get(NodeFlags::_force_fuse))
|
||||
// strong fused
|
||||
var_fused[i] = 3;
|
||||
else
|
||||
// weak fused
|
||||
var_fused[i] = 2;
|
||||
// var_fused[i] = 3;
|
||||
}
|
||||
}
|
||||
// output vars can not be fused
|
||||
for (int i=0; i<start_var_num; i++)
|
||||
var_fused[i] = 1;
|
||||
}
|
||||
|
||||
} // jittor
|
|
@ -177,7 +177,8 @@ vector<VarPtr> grad(Var* loss, vector<Var*> targets) {
|
|||
Var* dout = grads[id];
|
||||
trace_grad_op = op;
|
||||
VarPtr dvar = make_grad(op, out, dout, var, index);
|
||||
if (dvar && dvar->num>=0 && var->num)
|
||||
if (dvar && dvar->num>=0 && var->num>0)
|
||||
// var->num == 0 represents a any match var
|
||||
ASSERT(dvar->num==var->num && dvar->shape.size()==var->shape.size())
|
||||
<< "dvar" << dvar << "var" << var;
|
||||
if (!grad)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#endif
|
||||
#include <random>
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator/cuda_device_allocator.h"
|
||||
|
||||
namespace jittor {
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include <mutex>
|
||||
#include <cstring>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
#include "var.h"
|
||||
#include "mem/allocator.h"
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator/cuda_host_allocator.h"
|
||||
|
||||
namespace jittor {
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator/cuda_managed_allocator.h"
|
||||
|
||||
namespace jittor {
|
||||
|
|
|
@ -280,4 +280,20 @@ inline bool operator!=(const NanoVector& a, const NanoVector& b) {
|
|||
return ne(a, b);
|
||||
}
|
||||
|
||||
inline bool operator<(const NanoVector& a, const NanoVector& b) {
|
||||
return a.data < b.data || (a.data == b.data && a.offset < b.offset);
|
||||
}
|
||||
|
||||
} // jittor
|
||||
|
||||
|
||||
namespace std {
|
||||
template<> struct hash<jittor::NanoVector> {
|
||||
inline std::size_t operator()(jittor::NanoVector const& s) const noexcept {
|
||||
std::size_t h1 = std::hash<jittor::int64>{}(s.data);
|
||||
std::size_t h2 = std::hash<jittor::int64>{}(s.offset);
|
||||
return h1 ^ (h2 << 1);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ JIT_TEST(ring_buffer_benchmark) {
|
|||
LOGi << tt << tt*1.0/n;
|
||||
LOGi << s << (n*(n-1)/2);
|
||||
ASSERTop(s,==,(n*(n-1)/2));
|
||||
ASSERTop(tt*1.0/n,<=,50);
|
||||
ASSERTop(tt*1.0/n,<=,100);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -88,6 +88,8 @@ struct RingBuffer {
|
|||
static RingBuffer* make_ring_buffer(uint64 size, bool multiprocess);
|
||||
static void free_ring_buffer(RingBuffer* rb);
|
||||
|
||||
inline void clear() { l = r = is_stop = 0; }
|
||||
|
||||
inline void wait() {
|
||||
if (is_stop) {
|
||||
throw std::runtime_error("stop");
|
||||
|
|
|
@ -17,6 +17,7 @@ struct StackVector {
|
|||
inline T& front() { return a[0]; }
|
||||
inline T& back() { return a[n-1]; }
|
||||
inline int size() { return n;}
|
||||
inline T* data() { return a;}
|
||||
inline StackVector(int n=0) : n(n) {}
|
||||
|
||||
struct Iter {
|
||||
|
|
|
@ -287,8 +287,12 @@ std::ostream& operator<<(std::ostream& os, const Op* op) {
|
|||
os << ')';
|
||||
#ifdef NODE_MEMCHECK
|
||||
os << '<' << op->__id() << '>';
|
||||
print_node_trace(op, os);
|
||||
#endif
|
||||
if (trace_py_var) {
|
||||
os << '{';
|
||||
print_node_trace(op, os);
|
||||
os << '}';
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "mem/allocator.h"
|
||||
#include "mem/allocator/cuda_dual_allocator.h"
|
||||
#include "event_queue.h"
|
||||
|
@ -75,8 +75,19 @@ ArrayOp::ArrayOp(ArrayArgs&& args) {
|
|||
}
|
||||
|
||||
void ArrayOp::jit_prepare(JK& jk) {
|
||||
if (output->flags.get(NodeFlags::_force_fuse))
|
||||
if (output->flags.get(NodeFlags::_force_fuse)) {
|
||||
jk << _CS("[T:") << output->dtype() << ']';
|
||||
|
||||
// fill or find cbuffer for const var pass
|
||||
if (output->dtype().dsize() == 4) {
|
||||
auto x = abs(ptr<int32>()[0]);
|
||||
auto y = abs(ptr<float32>()[0]);
|
||||
auto z = ptr<uint32>()[0];
|
||||
if ((x<=2) || (y==1.0f || y==2.0f))
|
||||
jk << _CS("[o:") << z << ']';
|
||||
}
|
||||
// end of fill cbuffer
|
||||
}
|
||||
}
|
||||
|
||||
void ArrayOp::run() {
|
||||
|
|
|
@ -93,6 +93,9 @@ VarPtr dirty_clone_broadcast(Var* v) {
|
|||
if (op && !v->is_finished() && v->shape.size() > 4 && op->type() == OpType::broadcast) {
|
||||
auto vp = op->duplicate();
|
||||
if (vp) {
|
||||
// TODO: loop options should be set to op, rather than var
|
||||
if (v->loop_options)
|
||||
vp->loop_options = v->loop_options;
|
||||
return vp;
|
||||
}
|
||||
}
|
||||
|
@ -126,8 +129,8 @@ VarPtr BinaryOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
|||
}
|
||||
if (ns == ns_maximum || ns == ns_minimum) {
|
||||
auto zeros = make_number(0, dout);
|
||||
auto cond = make_binary(x, y, ns_greater_equal);
|
||||
if ((ns == ns_maximum) == (v_index==0))
|
||||
auto cond = make_binary(y, z, ns_equal);
|
||||
if (v_index==1)
|
||||
return make_ternary(cond, dout, zeros);
|
||||
else
|
||||
return make_ternary(cond, zeros, dout);
|
||||
|
|
|
@ -38,7 +38,7 @@ namespace jittor {
|
|||
#define bitwise_and(T,a,b) ((a)&(b))
|
||||
#define bitwise_or(T,a,b) ((a)|(b))
|
||||
#define bitwise_xor(T,a,b) ((a)^(b))
|
||||
#define mean(T,a,b) ((a)+T(b)*(T(1)/T(count)))
|
||||
#define mean(T,a,b) ((a)+T(b)*(T(rcount)))
|
||||
|
||||
#ifdef JIT_cuda
|
||||
#define init_maximum(T) ::numeric_min<T>()
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include "var.h"
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
#include "ops/copy_op.h"
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
#endif
|
||||
|
||||
namespace jittor {
|
||||
|
@ -36,14 +37,14 @@ void CopyOp::run() {
|
|||
auto size = x->size;
|
||||
auto x_ptr = x->mem_ptr;
|
||||
auto y_ptr = outputs().front()->mem_ptr;
|
||||
if (flags.get(NodeFlags::_cpu)) {
|
||||
#ifdef HAS_CUDA
|
||||
if (flags.get(NodeFlags::_cuda)) {
|
||||
checkCudaErrors(cudaMemcpyAsync(y_ptr, x_ptr, size, cudaMemcpyDefault, 0));
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
std::memcpy(y_ptr, x_ptr, size);
|
||||
}
|
||||
#ifdef HAS_CUDA
|
||||
else {
|
||||
checkCudaErrors(cudaMemcpyAsync(y_ptr, x_ptr, size, cudaMemcpyDefault, 0));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#include <mutex>
|
||||
#include "misc/cuda_flags.h"
|
||||
#include "mem/allocator/sfrl_allocator.h"
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include "ops/op_register.h"
|
||||
#ifdef JIT_cuda
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#endif
|
||||
#ifndef JIT
|
||||
#include "misc/stack_vector.h"
|
||||
|
|
|
@ -72,9 +72,9 @@ void IndexOp::jit_run() {
|
|||
@for(d, 0, XDIM, for (index_t i@d=0; i@d < x0shape@d; i@d++)) {
|
||||
auto xid = @for(d, 0, XDIM, + i@d * x0stride@d);
|
||||
@if(DIM==XDIM,
|
||||
@for(i,0,XDIM, x@i@@p[xid] = i@i;)
|
||||
@for(i,0,XDIM, T x@i@@id = i@i; x@i@@p[xid] = x@i@@id;)
|
||||
,
|
||||
x0p[xid] = i@DIM;
|
||||
T x@DIM@@id = i@DIM; x0p[xid] = x@DIM@@id;
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,9 +34,9 @@ unordered_set<string> reduce_ops = {
|
|||
"add",
|
||||
// @pybind(prod, product, reduce_multiply)
|
||||
"multiply",
|
||||
// @pybind(reduce_logical_and, all)
|
||||
// @pybind(reduce_logical_and, all_)
|
||||
"logical_and",
|
||||
// @pybind(reduce_logical_or, any)
|
||||
// @pybind(reduce_logical_or, any_)
|
||||
"logical_or",
|
||||
"logical_xor",
|
||||
"bitwise_and",
|
||||
|
@ -65,7 +65,8 @@ ReduceOp::ReduceOp(Var* x, NanoString op, NanoVector dims, bool keepdims)
|
|||
reduce_mask |= 1<<dim;
|
||||
}
|
||||
}
|
||||
if (x->dtype() == ns_bool && ns == ns_add)
|
||||
// if (x->dtype() == ns_bool && ns == ns_add)
|
||||
if (x->dtype() == ns_bool)
|
||||
y = create_output(nullptr, ns_int32);
|
||||
else
|
||||
y = create_output(nullptr, binary_dtype_infer(ns, x, x));
|
||||
|
@ -157,6 +158,7 @@ void ReduceOp::jit_run() {
|
|||
index_t xstride@{DIM-1} = 1;
|
||||
@for(i, DIM-2, -1, -1, auto xstride@i = xstride@{i+1} * xshape@{i+1};)
|
||||
Ty count = Ty(x->num) / Ty(y->num);
|
||||
Ty rcount = Ty(y->num) / Ty(x->num);
|
||||
@for(d, 0, DIM,@if(REDUCE>>d&1,, for (index_t xi@d=0; xi@d < xshape@d; xi@d++))) {
|
||||
auto yid = 0 @for(d, 0, DIM,@if(REDUCE>>d&1,, + xi@d * ystride@d));
|
||||
yp[yid] = @expand_macro(init_@OP, Ty);
|
||||
|
@ -169,7 +171,7 @@ void ReduceOp::jit_run() {
|
|||
yp[yid] = @expand_macro(@OP, Ty, yp[yid], xp[xid]);
|
||||
}
|
||||
}
|
||||
(void)count, (void)yshape0, (void)ystride0;
|
||||
(void)count, (void)rcount, (void)yshape0, (void)ystride0;
|
||||
}
|
||||
#endif // JIT
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "ops/binary_op_defs.h"
|
||||
#ifdef JIT_cuda
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "helper_cuda.h"
|
||||
#endif
|
||||
#else
|
||||
#include "ops/op_register.h"
|
||||
|
@ -69,7 +69,7 @@ void SetitemOp::infer_shape() {
|
|||
for (int i=0; i<data_dim; i++) {
|
||||
int j = i - data_dim + out_shape.size();
|
||||
if (!(data_shape[i]==1 && out_shape[j]!=-1)) {
|
||||
CHECK(data_shape[i]<0 || data_shape[i]==out_shape[j])
|
||||
CHECK(data_shape[i]<0 || out_shape[j]<0 || data_shape[i]==out_shape[j])
|
||||
<< "Data shape not match" << data_shape << out_shape;
|
||||
bmask |= 1<<j;
|
||||
}
|
||||
|
|
|
@ -64,8 +64,11 @@ void TernaryOp::jit_run() {
|
|||
auto* __restrict__ yp = y->ptr<Ty>();
|
||||
auto* __restrict__ zp = z->ptr<Tz>();
|
||||
index_t num = z->num;
|
||||
for (index_t i=0; i<num; i++)
|
||||
zp[i] = condp[i] ? xp[i] : yp[i];
|
||||
for (index_t i=0; i<num; i++) {
|
||||
Tz xd_ = xp[i];
|
||||
Tz yd_ = yp[i];
|
||||
zp[i] = condp[i] ? xd_ : yd_;
|
||||
}
|
||||
}
|
||||
#endif // JIT
|
||||
|
||||
|
|
|
@ -40,38 +40,8 @@ TransposeOp::TransposeOp(Var* x, NanoVector axes_) : x(x), axes(axes_) {
|
|||
.get_constructor<VarPtr, Var*, NanoVector>();
|
||||
}
|
||||
if (cutt_transpose) {
|
||||
bool need_reshape = false;
|
||||
int dims = x->shape.size();
|
||||
vector<int64> in_axes;
|
||||
vector<int64> in_shape;
|
||||
vector<int64> out_shape;
|
||||
vector<int64> trans;
|
||||
int cnt = 0;
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
if (x->shape[i] == 1) {
|
||||
need_reshape = true;
|
||||
trans.push_back(-1);
|
||||
} else {
|
||||
trans.push_back(cnt);
|
||||
cnt += 1;
|
||||
in_shape.push_back(x->shape[i]);
|
||||
}
|
||||
out_shape.push_back(x->shape[axes[i]]);
|
||||
}
|
||||
for (int i = 0; i < dims; ++i) {
|
||||
if (x->shape[axes[i]] != 1) {
|
||||
in_axes.push_back(trans[axes[i]]);
|
||||
}
|
||||
}
|
||||
if (need_reshape) {
|
||||
auto x1 = make_reshape(x, NanoVector(in_shape));
|
||||
auto x2 = cutt_transpose(x1, in_axes);
|
||||
auto x3 = make_reshape(x2, NanoVector(out_shape));
|
||||
forward(x3);
|
||||
} else {
|
||||
auto var = cutt_transpose(x, axes);
|
||||
forward(var);
|
||||
}
|
||||
auto var = cutt_transpose(x, axes);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#ifdef JIT_cuda
|
||||
#include "executor.h"
|
||||
#include <assert.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "helper_cuda.h"
|
||||
#endif
|
||||
|
||||
namespace jittor {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue