mirror of https://github.com/Jittor/Jittor
merge nn.py
This commit is contained in:
commit
acfceca90f
|
@ -0,0 +1,57 @@
|
|||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: CI
|
||||
|
||||
# Controls when the action will run. Triggers the workflow on push or pull request
|
||||
# events but only for the master branch
|
||||
on: [ push ]
|
||||
# push:
|
||||
# branches: [ master ]
|
||||
# pull_request:
|
||||
# branches: [ master ]
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
test_clang_8_cuda_10:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: self-hosted
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export cache_name=github_${GITHUB_REF##*/}
|
||||
export cc_path="clang++-8"
|
||||
export cc_flags=" -g "
|
||||
export log_sync=0
|
||||
export log_v=0
|
||||
export PYTHONIOENCODING=utf8
|
||||
export PYTHONPATH=`pwd`/python
|
||||
export nvcc_path=/usr/local/cuda/bin/nvcc
|
||||
python3.7 -c "import jittor"
|
||||
python3.7 -m jittor.test -v
|
||||
|
||||
test_gcc:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: self-hosted
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export cache_name=github_${GITHUB_REF##*/}
|
||||
export cc_path="g++"
|
||||
export cc_flags=" -g "
|
||||
export log_sync=0
|
||||
export log_v=0
|
||||
export PYTHONIOENCODING=utf8
|
||||
export PYTHONPATH=`pwd`/python
|
||||
export nvcc_path=
|
||||
python3.7 -c "import jittor"
|
||||
python3.7 -m jittor.test -v
|
|
@ -19,4 +19,6 @@ venv/
|
|||
*.md
|
||||
!*.src.md
|
||||
!README.md
|
||||
!README.cn.md
|
||||
!README.cn.md
|
||||
python/jittor.egg-info
|
||||
dist/
|
28
README.cn.md
28
README.cn.md
|
@ -16,6 +16,8 @@ Jittor前端语言为Python。前端使用了模块化的设计,这是目前
|
|||
import jittor as jt
|
||||
from jittor import Module
|
||||
from jittor import nn
|
||||
import numpy as np
|
||||
|
||||
class Model(Module):
|
||||
def __init__(self):
|
||||
self.layer1 = nn.Linear(1, 10)
|
||||
|
@ -33,13 +35,18 @@ def get_data(n): # generate random data for training test.
|
|||
y = x*x
|
||||
yield jt.float32(x), jt.float32(y)
|
||||
|
||||
model = Model()
|
||||
|
||||
learning_rate = 0.1
|
||||
batch_size = 50
|
||||
n = 1000
|
||||
|
||||
model = Model()
|
||||
optim = nn.SGD(model.parameters(), learning_rate)
|
||||
|
||||
for i,(x,y) in enumerate(get_data(n)):
|
||||
pred_y = model(x)
|
||||
loss = ((pred_y - y)**2)
|
||||
dy = pred_y - y
|
||||
loss = dy * dy
|
||||
loss_mean = loss.mean()
|
||||
optim.step(loss_mean)
|
||||
print(f"step {i}, loss = {loss_mean.data.sum()}")
|
||||
|
@ -74,16 +81,19 @@ Jittor使用Python和C++编写。 它需要用于即时编译的编译器。当
|
|||
|
||||
* CPU 编译器 (需要下列至少一个)
|
||||
- g++ (>=5.4.0)
|
||||
- clang (>=8.0)推荐
|
||||
- clang (>=8.0)
|
||||
* GPU 编译器(可选)
|
||||
- nvcc(>=10.0)
|
||||
- nvcc (>=10.0 for g++ 或者 >=10.2 for clang)
|
||||
|
||||
Jittor的环境要求如下:
|
||||
|
||||
* 操作系统: Ubuntu >= 16.04
|
||||
* 操作系统: **Ubuntu** >= 16.04 (or **Windows** Subsystem of Linux)
|
||||
* Python版本 >= 3.7
|
||||
* C++编译器(g++ or clang)
|
||||
|
||||
注意:目前Jittor通过WSL的方式在Windows操作系统上运行,WSL的安装方法请参考[微软官网](https://docs.microsoft.com/en-us/windows/wsl/install-win10),目前WSL尚不支持CUDA。
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -98,10 +108,6 @@ Jittor 一共提供三种方式安装: pip安装, 一键脚本安装 和 手动
|
|||
```bash
|
||||
sudo apt install python3.7-dev libomp-dev
|
||||
sudo python3.7 -m pip install git+https://github.com/Jittor/jittor.git
|
||||
# if you cannot access github, please download code from our website:
|
||||
# wget https://cg.cs.tsinghua.edu.cn/jittor/assets/build/jittor.tgz
|
||||
# mkdir -p jittor && tar -xvf ./jittor.tgz -C jittor
|
||||
# sudo pip install ./jittor
|
||||
python3.7 -m jittor.test.test_example
|
||||
```
|
||||
|
||||
|
@ -145,7 +151,7 @@ wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install.
|
|||
sudo apt install g++ build-essential libomp-dev
|
||||
|
||||
# OR clang++-8
|
||||
wget -O - https://apt.llvm.org/llvm.sh > /tmp/llvm.sh
|
||||
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
|
||||
bash /tmp/llvm.sh 8
|
||||
```
|
||||
|
||||
|
@ -266,7 +272,7 @@ print(type(a), type(b), type(c))
|
|||
除此之外,我们使用的所有算子`jt.xxx(Var,...)`都具有别名`Var.xxx(...)`。 例如:
|
||||
|
||||
```python
|
||||
c.max() # alias of jt.max(a)
|
||||
c.max() # alias of jt.max(c)
|
||||
c.add(a) # alias of jt.add(c, a)
|
||||
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
|
||||
```
|
||||
|
|
28
README.md
28
README.md
|
@ -16,6 +16,8 @@ The following example shows how to model a two-layer neural network step by step
|
|||
import jittor as jt
|
||||
from jittor import Module
|
||||
from jittor import nn
|
||||
import numpy as np
|
||||
|
||||
class Model(Module):
|
||||
def __init__(self):
|
||||
self.layer1 = nn.Linear(1, 10)
|
||||
|
@ -33,13 +35,18 @@ def get_data(n): # generate random data for training test.
|
|||
y = x*x
|
||||
yield jt.float32(x), jt.float32(y)
|
||||
|
||||
model = Model()
|
||||
|
||||
learning_rate = 0.1
|
||||
batch_size = 50
|
||||
n = 1000
|
||||
|
||||
model = Model()
|
||||
optim = nn.SGD(model.parameters(), learning_rate)
|
||||
|
||||
for i,(x,y) in enumerate(get_data(n)):
|
||||
pred_y = model(x)
|
||||
loss = ((pred_y - y)**2)
|
||||
dy = pred_y - y
|
||||
loss = dy * dy
|
||||
loss_mean = loss.mean()
|
||||
optim.step(loss_mean)
|
||||
print(f"step {i}, loss = {loss_mean.data.sum()}")
|
||||
|
@ -74,18 +81,21 @@ Jittor is written in Python and C++. It requires a compiler for JIT compilation,
|
|||
|
||||
* CPU compiler (require at least one of the following)
|
||||
* g++ (>=5.4.0)
|
||||
* clang (>=8.0) recommend
|
||||
* clang (>=8.0)
|
||||
* GPU compiler (optional)
|
||||
* nvcc (>=10.0)
|
||||
* nvcc (>=10.0 for g++ or >=10.2 for clang)
|
||||
|
||||
|
||||
|
||||
|
||||
Jittor environment requirements:
|
||||
|
||||
* System: Ubuntu >= 16.04
|
||||
* System: **Ubuntu** >= 16.04 (or **Windows** Subsystem of Linux)
|
||||
* Python version >= 3.7
|
||||
* C++ compiler(g++ or clang)
|
||||
|
||||
Note: Currently Jittor runs on the Windows operating system through WSL. For the installation method of WSL, please refer to [Microsoft official website](https://docs.microsoft.com/en-us/windows/wsl/install-win10). WSL does not yet support CUDA.
|
||||
|
||||
Jittor offers three ways to install: pip, script or manual.
|
||||
|
||||
|
||||
|
@ -96,10 +106,6 @@ Jittor offers three ways to install: pip, script or manual.
|
|||
```bash
|
||||
sudo apt install python3.7-dev libomp-dev
|
||||
sudo python3.7 -m pip install git+https://github.com/Jittor/jittor.git
|
||||
# if you cannot access github, please download code from our website:
|
||||
# wget https://cg.cs.tsinghua.edu.cn/jittor/assets/build/jittor.tgz
|
||||
# mkdir -p jittor && tar -xvf ./jittor.tgz -C jittor
|
||||
# sudo pip install ./jittor
|
||||
python3.7 -m jittor.test.test_example
|
||||
```
|
||||
|
||||
|
@ -140,7 +146,7 @@ We will show how to install Jittor in Ubuntu 16.04 step by step, Other Linux dis
|
|||
sudo apt install g++ build-essential libomp-dev
|
||||
|
||||
# OR clang++-8
|
||||
wget -O - https://apt.llvm.org/llvm.sh > /tmp/llvm.sh
|
||||
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
|
||||
bash /tmp/llvm.sh 8
|
||||
```
|
||||
### Step 2: Install Python and python-dev
|
||||
|
@ -261,7 +267,7 @@ Beside that, All the operators we used `jt.xxx(Var, ...)` have alias `Var.xxx(..
|
|||
|
||||
|
||||
```python
|
||||
c.max() # alias of jt.max(a)
|
||||
c.max() # alias of jt.max(c)
|
||||
c.add(a) # alias of jt.add(c, a)
|
||||
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
|
||||
```
|
||||
|
|
|
@ -21,6 +21,8 @@ The following example shows how to model a two-layer neural network step by step
|
|||
import jittor as jt
|
||||
from jittor import Module
|
||||
from jittor import nn
|
||||
import numpy as np
|
||||
|
||||
class Model(Module):
|
||||
def __init__(self):
|
||||
self.layer1 = nn.Linear(1, 10)
|
||||
|
@ -38,13 +40,18 @@ def get_data(n): # generate random data for training test.
|
|||
y = x*x
|
||||
yield jt.float32(x), jt.float32(y)
|
||||
|
||||
model = Model()
|
||||
|
||||
learning_rate = 0.1
|
||||
batch_size = 50
|
||||
n = 1000
|
||||
|
||||
model = Model()
|
||||
optim = nn.SGD(model.parameters(), learning_rate)
|
||||
|
||||
for i,(x,y) in enumerate(get_data(n)):
|
||||
pred_y = model(x)
|
||||
loss = ((pred_y - y)**2)
|
||||
dy = pred_y - y
|
||||
loss = dy * dy
|
||||
loss_mean = loss.mean()
|
||||
optim.step(loss_mean)
|
||||
print(f"step {i}, loss = {loss_mean.data.sum()}")
|
||||
|
@ -93,27 +100,31 @@ Jittor使用Python和C++编写。 它需要用于即时编译的编译器。当
|
|||
|
||||
* CPU compiler (require at least one of the following)
|
||||
* g++ (>=5.4.0)
|
||||
* clang (>=8.0) recommend
|
||||
* clang (>=8.0)
|
||||
* CPU 编译器 (需要下列至少一个)
|
||||
- g++ (>=5.4.0)
|
||||
- clang (>=8.0)推荐
|
||||
- clang (>=8.0)
|
||||
* GPU compiler (optional)
|
||||
* nvcc (>=10.0)
|
||||
* nvcc (>=10.0 for g++ or >=10.2 for clang)
|
||||
* GPU 编译器(可选)
|
||||
- nvcc(>=10.0)
|
||||
- nvcc (>=10.0 for g++ 或者 >=10.2 for clang)
|
||||
|
||||
Jittor的环境要求如下:
|
||||
|
||||
* 操作系统: Ubuntu >= 16.04
|
||||
* 操作系统: **Ubuntu** >= 16.04 (or **Windows** Subsystem of Linux)
|
||||
* Python版本 >= 3.7
|
||||
* C++编译器(g++ or clang)
|
||||
|
||||
注意:目前Jittor通过WSL的方式在Windows操作系统上运行,WSL的安装方法请参考[微软官网](https://docs.microsoft.com/en-us/windows/wsl/install-win10),目前WSL尚不支持CUDA。
|
||||
|
||||
Jittor environment requirements:
|
||||
|
||||
* System: Ubuntu >= 16.04
|
||||
* System: **Ubuntu** >= 16.04 (or **Windows** Subsystem of Linux)
|
||||
* Python version >= 3.7
|
||||
* C++ compiler(g++ or clang)
|
||||
|
||||
Note: Currently Jittor runs on the Windows operating system through WSL. For the installation method of WSL, please refer to [Microsoft official website](https://docs.microsoft.com/en-us/windows/wsl/install-win10). WSL does not yet support CUDA.
|
||||
|
||||
Jittor offers three ways to install: pip, script or manual.
|
||||
|
||||
Jittor 一共提供三种方式安装: pip安装, 一键脚本安装 和 手动安装.
|
||||
|
@ -128,10 +139,6 @@ Jittor 一共提供三种方式安装: pip安装, 一键脚本安装 和 手动
|
|||
```bash
|
||||
sudo apt install python3.7-dev libomp-dev
|
||||
sudo python3.7 -m pip install git+https://github.com/Jittor/jittor.git
|
||||
# if you cannot access github, please download code from our website:
|
||||
# wget https://cg.cs.tsinghua.edu.cn/jittor/assets/build/jittor.tgz
|
||||
# mkdir -p jittor && tar -xvf ./jittor.tgz -C jittor
|
||||
# sudo pip install ./jittor
|
||||
python3.7 -m jittor.test.test_example
|
||||
```
|
||||
|
||||
|
@ -182,7 +189,7 @@ We will show how to install Jittor in Ubuntu 16.04 step by step, Other Linux dis
|
|||
sudo apt install g++ build-essential libomp-dev
|
||||
|
||||
# OR clang++-8
|
||||
wget -O - https://apt.llvm.org/llvm.sh > /tmp/llvm.sh
|
||||
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
|
||||
bash /tmp/llvm.sh 8
|
||||
```
|
||||
### Step 2: Install Python and python-dev
|
||||
|
@ -325,7 +332,7 @@ Beside that, All the operators we used `jt.xxx(Var, ...)` have alias `Var.xxx(..
|
|||
除此之外,我们使用的所有算子`jt.xxx(Var,...)`都具有别名`Var.xxx(...)`。 例如:
|
||||
|
||||
```python
|
||||
c.max() # alias of jt.max(a)
|
||||
c.max() # alias of jt.max(c)
|
||||
c.add(a) # alias of jt.add(c, a)
|
||||
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
|
||||
```
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "utils/log.h"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
@ -79,6 +81,11 @@ const char *_cudaGetErrorEnum(cusolverStatus_t error);
|
|||
const char *_cudaGetErrorEnum(curandStatus_t error);
|
||||
#endif
|
||||
|
||||
#ifdef NCCL_H_
|
||||
// cuRAND API errors
|
||||
const char *_cudaGetErrorEnum(ncclResult_t error);
|
||||
#endif
|
||||
|
||||
#ifdef NV_NPPIDEFS_H
|
||||
// NPP API errors
|
||||
const char *_cudaGetErrorEnum(NppStatus error);
|
||||
|
@ -98,10 +105,10 @@ template <typename T>
|
|||
void check(T result, char const *const func, const char *const file,
|
||||
int const line) {
|
||||
if (result) {
|
||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
|
||||
static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
|
||||
DEVICE_RESET
|
||||
throw std::runtime_error("CUDA error");
|
||||
LOGf << "CUDA error at" << file >> ":" >> line << " code="
|
||||
>> static_cast<unsigned int>(result) >> "(" << _cudaGetErrorEnum(result) << ")"
|
||||
<< func;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,13 +125,10 @@ inline void __getLastCudaError(const char *errorMessage, const char *file,
|
|||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr,
|
||||
"%s(%i) : getLastCudaError() CUDA error :"
|
||||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, static_cast<int>(err),
|
||||
cudaGetErrorString(err));
|
||||
DEVICE_RESET
|
||||
exit(EXIT_FAILURE);
|
||||
LOGf << "CUDA error at" << file >> ":" >> line << " code="
|
||||
>> static_cast<unsigned int>(err) >> "(" << _cudaGetErrorEnum(err) << ")"
|
||||
<< errorMessage;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,11 +141,10 @@ inline void __printLastCudaError(const char *errorMessage, const char *file,
|
|||
cudaError_t err = cudaGetLastError();
|
||||
|
||||
if (cudaSuccess != err) {
|
||||
fprintf(stderr,
|
||||
"%s(%i) : getLastCudaError() CUDA error :"
|
||||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, static_cast<int>(err),
|
||||
cudaGetErrorString(err));
|
||||
DEVICE_RESET
|
||||
LOGf << "CUDA error at" << file >> ":" >> line << " code="
|
||||
>> static_cast<unsigned int>(err) >> "(" << _cudaGetErrorEnum(err) << ")"
|
||||
<< errorMessage;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor.
|
||||
// Authors:
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "mpi_warper.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <nccl.h>
|
||||
#include <helper_cuda.h>
|
||||
|
||||
namespace jittor {
|
||||
|
||||
extern ncclComm_t comm;
|
||||
extern ncclUniqueId id;
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,61 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "var.h"
|
||||
#include "nccl_all_reduce_op.h"
|
||||
#include "misc/str_utils.h"
|
||||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
|
||||
static auto nccl_all_reduce =
|
||||
get_op_info("nccl_all_reduce").get_constructor<VarPtr, Var*>();
|
||||
|
||||
NcclAllReduceOp::NcclAllReduceOp(Var* x) : x(x) {
|
||||
flags.set(NodeFlags::_cpu, 0);
|
||||
flags.set(NodeFlags::_cuda, 1);
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void NcclAllReduceOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
}
|
||||
|
||||
VarPtr NcclAllReduceOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
return nccl_all_reduce(dout);
|
||||
}
|
||||
|
||||
void NcclAllReduceOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cuda
|
||||
|
||||
void NcclAllReduceOp::jit_run() {
|
||||
@define(T_NCCL,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, ncclFloat)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, ncclInt)
|
||||
@if(@strcmp(@Tx,float64)==0, ncclFloat64)
|
||||
@if(@strcmp(@Tx,int64)==0, ncclInt64)
|
||||
)
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
checkCudaErrors(ncclAllReduce(xp, yp, y->num, @T_NCCL, ncclSum, comm, 0));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,25 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct NcclAllReduceOp : Op {
|
||||
Var* x, * y;
|
||||
|
||||
NcclAllReduceOp(Var* x);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "nccl_all_reduce"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,59 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "var.h"
|
||||
#include "nccl_broadcast_op.h"
|
||||
#include "misc/str_utils.h"
|
||||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
NcclBroadcastOp::NcclBroadcastOp(Var* x, int root) : x(x), root(root) {
|
||||
flags.set(NodeFlags::_cpu, 0);
|
||||
flags.set(NodeFlags::_cuda, 1);
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void NcclBroadcastOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
}
|
||||
|
||||
VarPtr NcclBroadcastOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
static auto nccl_reduce =
|
||||
get_op_info("nccl_reduce").get_constructor<VarPtr, Var*, int>();
|
||||
return nccl_reduce(dout,root);
|
||||
}
|
||||
|
||||
void NcclBroadcastOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cuda
|
||||
|
||||
void NcclBroadcastOp::jit_run() {
|
||||
@define(T_NCCL,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, ncclFloat)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, ncclInt)
|
||||
@if(@strcmp(@Tx,float64)==0, ncclFloat64)
|
||||
@if(@strcmp(@Tx,int64)==0, ncclInt64)
|
||||
)
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
checkCudaErrors(ncclBroadcast(xp, yp, y->num, @T_NCCL, root, comm, 0));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,26 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct NcclBroadcastOp : Op {
|
||||
Var* x, * y;
|
||||
int root;
|
||||
|
||||
NcclBroadcastOp(Var* x, int root=0);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "nccl_broadcast"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,61 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "var.h"
|
||||
#include "nccl_reduce_op.h"
|
||||
#include "misc/str_utils.h"
|
||||
|
||||
#include <nccl.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#include "nccl_warper.h"
|
||||
#include "ops/op_register.h"
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
NcclReduceOp::NcclReduceOp(Var* x, int root) : x(x), root(root) {
|
||||
flags.set(NodeFlags::_cpu, 0);
|
||||
flags.set(NodeFlags::_cuda, 1);
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void NcclReduceOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
}
|
||||
|
||||
VarPtr NcclReduceOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
static auto nccl_broadcast =
|
||||
get_op_info("nccl_broadcast").get_constructor<VarPtr, Var*, int>();
|
||||
return nccl_broadcast(dout,root);
|
||||
}
|
||||
|
||||
void NcclReduceOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cuda
|
||||
|
||||
void NcclReduceOp::jit_run() {
|
||||
@define(T_NCCL,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, ncclFloat)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, ncclInt)
|
||||
@if(@strcmp(@Tx,float64)==0, ncclFloat64)
|
||||
@if(@strcmp(@Tx,int64)==0, ncclInt64)
|
||||
)
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
checkCudaErrors(ncclReduce(xp, yp, y->num, @T_NCCL, ncclSum, root, comm, 0));
|
||||
if (root != mpi_world_rank)
|
||||
checkCudaErrors(cudaMemsetAsync(yp, 0, y->size));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,26 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guoye Yang <498731903@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct NcclReduceOp : Op {
|
||||
Var* x, * y;
|
||||
int root;
|
||||
|
||||
NcclReduceOp(Var* x, int root=0);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "nccl_reduce"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,127 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2019 Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "var.h"
|
||||
#include "nccl_test_op.h"
|
||||
#include "misc/str_utils.h"
|
||||
|
||||
#include "nccl_warper.h"
|
||||
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
NcclTestOp::NcclTestOp(string cmd) : cmd(cmd) {
|
||||
flags.set(NodeFlags::_cpu, 0);
|
||||
flags.set(NodeFlags::_cuda, 1);
|
||||
output = create_output(1, ns_float32);
|
||||
}
|
||||
|
||||
void NcclTestOp::jit_prepare() {
|
||||
add_jit_define("T", ns_float32);
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cuda
|
||||
|
||||
static void test_with_mpi() {
|
||||
int size = 32*1024*1024;
|
||||
int myRank = mpi_world_rank;
|
||||
int nRanks = mpi_world_size;
|
||||
int localRank = mpi_local_rank;
|
||||
|
||||
float *sendbuff, *recvbuff;
|
||||
cudaStream_t s;
|
||||
checkCudaErrors(cudaMalloc(&sendbuff, size * sizeof(float)));
|
||||
checkCudaErrors(cudaMalloc(&recvbuff, size * sizeof(float)));
|
||||
checkCudaErrors(cudaStreamCreate(&s));
|
||||
|
||||
//communicating using NCCL
|
||||
checkCudaErrors(ncclAllReduce((const void*)sendbuff, (void*)recvbuff, size, ncclFloat, ncclSum,
|
||||
comm, s));
|
||||
|
||||
//completing NCCL operation by synchronizing on the CUDA stream
|
||||
checkCudaErrors(cudaStreamSynchronize(s));
|
||||
|
||||
//free device buffers
|
||||
checkCudaErrors(cudaFree(sendbuff));
|
||||
checkCudaErrors(cudaFree(recvbuff));
|
||||
checkCudaErrors(cudaStreamDestroy(s));
|
||||
|
||||
LOGi << "MPI rank" << myRank << "Success";
|
||||
}
|
||||
|
||||
void NcclTestOp::jit_run() {
|
||||
output->ptr<T>()[0] = 123;
|
||||
if (cmd == "test_with_mpi") {
|
||||
test_with_mpi();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//managing 4 devices
|
||||
int nDev;
|
||||
checkCudaErrors(cudaGetDeviceCount(&nDev));
|
||||
nDev = std::min(nDev, 2);
|
||||
|
||||
ncclComm_t comms[nDev];
|
||||
int size = 32*1024*1024;
|
||||
int devs[4] = { 0, 1, 2, 3 };
|
||||
|
||||
|
||||
//allocating and initializing device buffers
|
||||
float** sendbuff = (float**)malloc(nDev * sizeof(float*));
|
||||
float** recvbuff = (float**)malloc(nDev * sizeof(float*));
|
||||
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
|
||||
|
||||
|
||||
for (int i = 0; i < nDev; ++i) {
|
||||
checkCudaErrors(cudaSetDevice(i));
|
||||
checkCudaErrors(cudaMalloc(sendbuff + i, size * sizeof(float)));
|
||||
checkCudaErrors(cudaMalloc(recvbuff + i, size * sizeof(float)));
|
||||
checkCudaErrors(cudaMemset(sendbuff[i], 1, size * sizeof(float)));
|
||||
checkCudaErrors(cudaMemset(recvbuff[i], 0, size * sizeof(float)));
|
||||
checkCudaErrors(cudaStreamCreate(s+i));
|
||||
}
|
||||
|
||||
|
||||
//initializing NCCL
|
||||
checkCudaErrors(ncclCommInitAll(comms, nDev, devs));
|
||||
|
||||
|
||||
//calling NCCL communication API. Group API is required when using
|
||||
//multiple devices per thread
|
||||
checkCudaErrors(ncclGroupStart());
|
||||
for (int i = 0; i < nDev; ++i)
|
||||
checkCudaErrors(ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], size, ncclFloat, ncclSum,
|
||||
comms[i], s[i]));
|
||||
checkCudaErrors(ncclGroupEnd());
|
||||
|
||||
|
||||
//synchronizing on CUDA streams to wait for completion of NCCL operation
|
||||
for (int i = 0; i < nDev; ++i) {
|
||||
checkCudaErrors(cudaSetDevice(i));
|
||||
checkCudaErrors(cudaStreamSynchronize(s[i]));
|
||||
}
|
||||
|
||||
|
||||
//free device buffers
|
||||
for (int i = 0; i < nDev; ++i) {
|
||||
checkCudaErrors(cudaSetDevice(i));
|
||||
checkCudaErrors(cudaFree(sendbuff[i]));
|
||||
checkCudaErrors(cudaFree(recvbuff[i]));
|
||||
}
|
||||
|
||||
|
||||
//finalizing NCCL
|
||||
for(int i = 0; i < nDev; ++i)
|
||||
ncclCommDestroy(comms[i]);
|
||||
checkCudaErrors(cudaSetDevice(0));
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,24 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor.
|
||||
// Authors:
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct NcclTestOp : Op {
|
||||
Var* output;
|
||||
string cmd;
|
||||
|
||||
NcclTestOp(string cmd);
|
||||
|
||||
const char* name() const override { return "nccl_test"; }
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,44 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor.
|
||||
// Authors:
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "nccl_warper.h"
|
||||
#include "event_queue.h"
|
||||
|
||||
const char *_cudaGetErrorEnum(ncclResult_t error) {
|
||||
return ncclGetErrorString(error);
|
||||
}
|
||||
|
||||
namespace jittor {
|
||||
|
||||
ncclComm_t comm;
|
||||
ncclUniqueId id;
|
||||
|
||||
|
||||
struct nccl_initer {
|
||||
|
||||
nccl_initer() {
|
||||
if (mpi_world_rank == 0)
|
||||
checkCudaErrors(ncclGetUniqueId(&id));
|
||||
MPI_CHECK(MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD));
|
||||
LOGv << "NCCL init in device" << mpi_local_rank;
|
||||
checkCudaErrors(cudaSetDevice(mpi_local_rank));
|
||||
event_queue.run_sync([]() {
|
||||
checkCudaErrors(cudaSetDevice(mpi_local_rank));
|
||||
});
|
||||
checkCudaErrors(ncclCommInitRank(&comm, mpi_world_size, id, mpi_world_rank));
|
||||
}
|
||||
|
||||
~nccl_initer() {
|
||||
checkCudaErrors(ncclCommDestroy(comm));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
static nccl_initer nccl_init;
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,45 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor.
|
||||
// Authors:
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#define OMPI_SKIP_MPICXX
|
||||
#include <mpi.h>
|
||||
|
||||
extern void throw_mpi_error(int result,
|
||||
char const *const func, const char *const file, int const line);
|
||||
|
||||
static inline void mpi_check(int result,
|
||||
char const *const func, const char *const file, int const line) {
|
||||
if (result != MPI_SUCCESS) {
|
||||
throw_mpi_error(result, func, file, line);
|
||||
}
|
||||
}
|
||||
|
||||
#define MPI_CHECK(val) mpi_check((val), #val, __FILE__, __LINE__)
|
||||
|
||||
namespace jittor {
|
||||
|
||||
extern int mpi_world_size;
|
||||
extern int mpi_world_rank;
|
||||
extern int mpi_local_rank;
|
||||
|
||||
// @pyjt(world_size)
|
||||
int _mpi_world_size();
|
||||
|
||||
// @pyjt(world_rank)
|
||||
int _mpi_world_rank();
|
||||
|
||||
// @pyjt(local_rank)
|
||||
int _mpi_local_rank();
|
||||
|
||||
struct ArrayArgs;
|
||||
|
||||
// @pyjt(broadcast)
|
||||
void _mpi_broadcast(ArrayArgs&& args, int i);
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,85 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "mpi_warper.h"
|
||||
#include "var.h"
|
||||
#include "mpi_all_reduce_op.h"
|
||||
#include "ops/op_register.h"
|
||||
#include "misc/str_utils.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
|
||||
static auto make_array = get_op_info("array")
|
||||
.get_constructor<VarPtr, const void*, NanoVector, NanoString>();
|
||||
static auto make_binary = get_op_info("binary")
|
||||
.get_constructor<VarPtr, Var*, Var*, NanoString>();
|
||||
static auto make_mpi_all_reduce = get_op_info("mpi_all_reduce")
|
||||
.get_constructor<VarPtr, Var*, NanoString>();
|
||||
|
||||
MpiAllReduceOp::MpiAllReduceOp(Var* x, NanoString op) : x(x), op(op) {
|
||||
if (op == ns_mean) {
|
||||
auto var = make_mpi_all_reduce(x, ns_add);
|
||||
var = make_binary(var, make_array(&mpi_world_size, 1, ns_int32), ns_divide);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
ASSERT(op == ns_add) << "Not supported MPI op" << op;
|
||||
#ifdef HAS_CUDA
|
||||
if (use_cuda) {
|
||||
static auto nccl_all_reduce = has_op("nccl_all_reduce")
|
||||
? get_op_info("nccl_all_reduce").get_constructor<VarPtr, Var*>()
|
||||
: nullptr;
|
||||
if (nccl_all_reduce) {
|
||||
auto var = nccl_all_reduce(x);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void MpiAllReduceOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
}
|
||||
|
||||
VarPtr MpiAllReduceOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
static auto mpi_all_reduce =
|
||||
get_op_info("mpi_all_reduce").get_constructor<VarPtr, Var*,NanoString>();
|
||||
return mpi_all_reduce(dout, ns_add);
|
||||
}
|
||||
|
||||
void MpiAllReduceOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
add_jit_define("OP", op.to_cstring());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cpu
|
||||
void MpiAllReduceOp::jit_run() {
|
||||
@define(T_MPI,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, MPI_FLOAT)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, MPI_INT)
|
||||
@if(@strcmp(@Tx,float64)==0 || @strcmp(@Tx,double)==0, MPI_DOUBLE)
|
||||
@if(@strcmp(@Tx,int64)==0, MPI_DOUBLE_INT)
|
||||
)
|
||||
@define(OP_MPI,
|
||||
@if(@strcmp(@OP,add)==0, MPI_SUM)
|
||||
)
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
index_t num = y->num;
|
||||
MPI_Allreduce(xp, yp, num, T_MPI, OP_MPI, MPI_COMM_WORLD);
|
||||
}
|
||||
#endif // JIT_cpu
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,26 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct MpiAllReduceOp : Op {
|
||||
Var* x, * y;
|
||||
NanoString op;
|
||||
|
||||
MpiAllReduceOp(Var* x, NanoString op=ns_add);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "mpi_all_reduce"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,66 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "mpi_warper.h"
|
||||
#include "var.h"
|
||||
#include "mpi_broadcast_op.h"
|
||||
#include "ops/op_register.h"
|
||||
#include "misc/str_utils.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
MpiBroadcastOp::MpiBroadcastOp(Var* x, int root) : x(x), root(root) {
|
||||
#ifdef HAS_CUDA
|
||||
if (use_cuda) {
|
||||
static auto nccl_broadcast = has_op("nccl_broadcast")
|
||||
? get_op_info("nccl_broadcast").get_constructor<VarPtr, Var*, int>()
|
||||
: nullptr;
|
||||
if (nccl_broadcast) {
|
||||
auto var = nccl_broadcast(x, root);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void MpiBroadcastOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
if (root == mpi_world_rank)
|
||||
y->share_with(x);
|
||||
}
|
||||
|
||||
VarPtr MpiBroadcastOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
static auto mpi_reduce =
|
||||
get_op_info("mpi_reduce").get_constructor<VarPtr, Var*, NanoString, int>();
|
||||
return mpi_reduce(dout, ns_add, root);
|
||||
}
|
||||
|
||||
void MpiBroadcastOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cpu
|
||||
void MpiBroadcastOp::jit_run() {
|
||||
@define(T_MPI,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, MPI_FLOAT)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, MPI_INT)
|
||||
@if(@strcmp(@Tx,float64)==0 || @strcmp(@Tx,double)==0, MPI_DOUBLE)
|
||||
@if(@strcmp(@Tx,int64)==0, MPI_DOUBLE_INT)
|
||||
)
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
MPI_Bcast(yp, y->num, T_MPI, root, MPI_COMM_WORLD);
|
||||
}
|
||||
#endif // JIT_cpu
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,26 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct MpiBroadcastOp : Op {
|
||||
Var* x, * y;
|
||||
int root;
|
||||
|
||||
MpiBroadcastOp(Var* x, int root=0);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "mpi_broadcast"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,87 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "mpi_warper.h"
|
||||
#include "var.h"
|
||||
#include "mpi_reduce_op.h"
|
||||
#include "ops/op_register.h"
|
||||
#include "misc/str_utils.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
|
||||
static auto make_array = get_op_info("array")
|
||||
.get_constructor<VarPtr, const void*, NanoVector, NanoString>();
|
||||
static auto make_binary = get_op_info("binary")
|
||||
.get_constructor<VarPtr, Var*, Var*, NanoString>();
|
||||
static auto make_mpi_reduce = get_op_info("mpi_reduce")
|
||||
.get_constructor<VarPtr, Var*, NanoString, int>();
|
||||
|
||||
MpiReduceOp::MpiReduceOp(Var* x, NanoString op, int root) : x(x), op(op), root(root) {
|
||||
if (op == ns_mean) {
|
||||
auto var = make_mpi_reduce(x, ns_add, root);
|
||||
var = make_binary(var, make_array(&mpi_world_size, 1, ns_int32), ns_divide);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
ASSERT(op == ns_add) << "Not supported MPI op" << op;
|
||||
#ifdef HAS_CUDA
|
||||
if (use_cuda) {
|
||||
static auto nccl_reduce = has_op("nccl_reduce")
|
||||
? get_op_info("nccl_reduce").get_constructor<VarPtr, Var*, int>()
|
||||
: nullptr;
|
||||
if (nccl_reduce) {
|
||||
auto var = nccl_reduce(x, root);
|
||||
forward(var);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
y = create_output(nullptr, x->dtype());
|
||||
}
|
||||
|
||||
void MpiReduceOp::infer_shape() {
|
||||
y->set_shape(x->shape);
|
||||
}
|
||||
|
||||
VarPtr MpiReduceOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
static VarPtr(*mpi_broadcast)(Var*, int) =
|
||||
get_op_info("mpi_broadcast").get_constructor<VarPtr, Var*, int>();
|
||||
return mpi_broadcast(dout,root);
|
||||
}
|
||||
|
||||
void MpiReduceOp::jit_prepare() {
|
||||
add_jit_define("Tx", x->dtype());
|
||||
add_jit_define("OP", op.to_cstring());
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
#ifdef JIT_cpu
|
||||
void MpiReduceOp::jit_run() {
|
||||
@define(T_MPI,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, MPI_FLOAT)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, MPI_INT)
|
||||
@if(@strcmp(@Tx,float64)==0 || @strcmp(@Tx,double)==0, MPI_DOUBLE)
|
||||
@if(@strcmp(@Tx,int64)==0, MPI_DOUBLE_INT)
|
||||
)
|
||||
@define(OP_MPI,
|
||||
@if(@strcmp(@OP,add)==0, MPI_SUM)
|
||||
)
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
index_t num = y->num;
|
||||
MPI_CHECK(MPI_Reduce(xp, yp, num, T_MPI, OP_MPI, root, MPI_COMM_WORLD));
|
||||
if (root != mpi_world_rank)
|
||||
for (index_t i=0; i<num; i++) yp[i] = 0;
|
||||
}
|
||||
#endif // JIT_cpu
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,27 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Guowei Yang <471184555@qq.com>.
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct MpiReduceOp : Op {
|
||||
Var* x, * y;
|
||||
NanoString op;
|
||||
int root;
|
||||
|
||||
MpiReduceOp(Var* x, NanoString op=ns_add, int root=0);
|
||||
void infer_shape() override;
|
||||
|
||||
const char* name() const override { return "mpi_reduce"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,42 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2019 Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include "mpi_warper.h"
|
||||
|
||||
#include "var.h"
|
||||
#include "mpi_test_op.h"
|
||||
#include "misc/str_utils.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
MpiTestOp::MpiTestOp(string cmd) : cmd(cmd) {
|
||||
output = create_output(1, ns_float32);
|
||||
}
|
||||
|
||||
void MpiTestOp::jit_prepare() {
|
||||
add_jit_define("T", ns_float32);
|
||||
}
|
||||
|
||||
#else // JIT
|
||||
|
||||
void MpiTestOp::jit_run() {
|
||||
output->ptr<T>()[0] = 123;
|
||||
|
||||
int world_size = mpi_world_size;
|
||||
|
||||
int world_rank = mpi_world_rank;
|
||||
|
||||
char processor_name[MPI_MAX_PROCESSOR_NAME];
|
||||
int name_len;
|
||||
MPI_CHECK(MPI_Get_processor_name(processor_name, &name_len));
|
||||
|
||||
printf("Hello world from processor %s, rank %d out of %d processors\\n",processor_name, world_rank, world_size);
|
||||
|
||||
}
|
||||
|
||||
#endif // JIT
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,23 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "op.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct MpiTestOp : Op {
|
||||
Var* output;
|
||||
string cmd;
|
||||
|
||||
MpiTestOp(string cmd);
|
||||
|
||||
const char* name() const override { return "mpi_test"; }
|
||||
DECLARE_jit_run;
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,105 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor.
|
||||
// Authors:
|
||||
// Dun Liang <randonlang@gmail.com>.
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "mpi_warper.h"
|
||||
#include "common.h"
|
||||
#include "ops/array_op.h"
|
||||
|
||||
char jt_mpi_err_buffer[MPI_MAX_ERROR_STRING];
|
||||
|
||||
void throw_mpi_error(int result,
|
||||
char const *const func, const char *const file, int const line) {
|
||||
int resultlen;
|
||||
MPI_Error_string(result, jt_mpi_err_buffer, &resultlen);
|
||||
LOGf << "MPI error at " >> file >> ":" >> line << "code="
|
||||
>> result >> '(' >> jt_mpi_err_buffer >> ')' << func;
|
||||
}
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
||||
int mpi_world_size = 1;
|
||||
int mpi_world_rank = 0;
|
||||
int mpi_local_rank = 0;
|
||||
|
||||
int _mpi_world_size() {
|
||||
return mpi_world_size;
|
||||
}
|
||||
|
||||
int _mpi_world_rank() {
|
||||
return mpi_world_rank;
|
||||
}
|
||||
|
||||
int _mpi_local_rank() {
|
||||
return mpi_local_rank;
|
||||
}
|
||||
|
||||
void _mpi_broadcast(ArrayArgs&& args, int i) {
|
||||
int64 size = args.dtype.dsize();
|
||||
for (auto j : args.shape)
|
||||
size *= j;
|
||||
MPI_CHECK(MPI_Bcast((void *)args.ptr, size, MPI_BYTE, i, MPI_COMM_WORLD));
|
||||
}
|
||||
|
||||
static uint64_t getHostHash(const char* string) {
|
||||
// Based on DJB2, result = result * 33 + char
|
||||
uint64_t result = 5381;
|
||||
for (int c = 0; string[c] != '\0'; c++){
|
||||
result = ((result << 5) + result) + string[c];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
static void getHostName(char* hostname, int maxlen) {
|
||||
gethostname(hostname, maxlen);
|
||||
for (int i=0; i< maxlen; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct mpi_initer {
|
||||
|
||||
mpi_initer() {
|
||||
LOGvv << "MPI init...";
|
||||
MPI_CHECK(MPI_Init(NULL, NULL));
|
||||
MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size));
|
||||
MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank));
|
||||
|
||||
//calculating localRank based on hostname which is used in selecting a GPU
|
||||
uint64_t hostHashs[mpi_world_rank];
|
||||
char hostname[1024];
|
||||
getHostName(hostname, 1024);
|
||||
hostHashs[mpi_world_rank] = getHostHash(hostname);
|
||||
MPI_CHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD));
|
||||
mpi_local_rank = 0;
|
||||
for (int p=0; p<mpi_world_size; p++) {
|
||||
if (p == mpi_world_rank) break;
|
||||
if (hostHashs[p] == hostHashs[mpi_world_rank]) mpi_local_rank++;
|
||||
}
|
||||
LOGv << "MPI init finished: local" << mpi_local_rank
|
||||
<< "global" << mpi_world_rank
|
||||
<< "size" << mpi_world_size;
|
||||
}
|
||||
|
||||
~mpi_initer() {
|
||||
MPI_CHECK(MPI_Finalize());
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
static mpi_initer mpi_init;
|
||||
|
||||
} // jittor
|
|
@ -51,7 +51,7 @@ for mdname in all_md:
|
|||
else:
|
||||
cell["cell_type"] = "code"
|
||||
cell["outputs"] = []
|
||||
cell["execution_count"] = None
|
||||
cell["execution_count"] = 0
|
||||
cells.append(cell)
|
||||
ipynb = {
|
||||
"cells":cells,
|
||||
|
@ -60,7 +60,8 @@ for mdname in all_md:
|
|||
"metadata": {
|
||||
},
|
||||
}
|
||||
ipynb_name = mdname[:-2]+"ipynb"
|
||||
ipynb_name = os.path.basename(mdname[:-2])+"ipynb"
|
||||
ipynb_name = os.path.join(notebook_dir, ipynb_name)
|
||||
print(mdname, len(src), len(blocks), len(cells), "--->", ipynb_name)
|
||||
with open(os.path.join(notebook_dir, ipynb_name), "w") as f:
|
||||
with open(ipynb_name, "w") as f:
|
||||
f.write(json.dumps(ipynb))
|
|
@ -7,18 +7,21 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
from . import compiler
|
||||
from .compiler import LOG, has_cuda
|
||||
from .compiler import compile_custom_ops, compile_custom_op
|
||||
import jittor_core as core
|
||||
from jittor_core import *
|
||||
from jittor_core.ops import *
|
||||
from . import compile_extern
|
||||
from .compile_extern import mkl_ops
|
||||
from . import lock
|
||||
with lock.lock_scope():
|
||||
from . import compiler
|
||||
from .compiler import LOG, has_cuda
|
||||
from .compiler import compile_custom_ops, compile_custom_op
|
||||
import jittor_core as core
|
||||
from jittor_core import *
|
||||
from jittor_core.ops import *
|
||||
from . import compile_extern
|
||||
from .compile_extern import mkl_ops, mpi, mpi_ops
|
||||
|
||||
import contextlib
|
||||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Sequence, Mapping
|
||||
import types
|
||||
import pickle
|
||||
import sys
|
||||
|
@ -338,6 +341,37 @@ def detach(x):
|
|||
return x.clone().stop_grad().clone()
|
||||
Var.detach = detach
|
||||
|
||||
origin_reshape = reshape
|
||||
def reshape(x, *shape):
|
||||
if len(shape) == 1 and isinstance(shape[0], Sequence):
|
||||
shape = shape[0]
|
||||
return origin_reshape(x, shape)
|
||||
reshape.__doc__ = origin_reshape.__doc__
|
||||
Var.view = Var.reshape = view = reshape
|
||||
|
||||
origin_transpose = transpose
|
||||
def transpose(x, *dim):
|
||||
if len(dim) == 1 and isinstance(dim[0], Sequence):
|
||||
dim = dim[0]
|
||||
return origin_transpose(x, dim)
|
||||
transpose.__doc__ = origin_transpose.__doc__
|
||||
Var.transpose = Var.permute = permute = transpose
|
||||
|
||||
def flatten(input, start_dim=0, end_dim=-1):
|
||||
'''flatten dimentions by reshape'''
|
||||
in_shape = input.shape
|
||||
start_dim = len(in_shape) + start_dim if start_dim < 0 else start_dim
|
||||
end_dim = len(in_shape) + end_dim if end_dim < 0 else end_dim
|
||||
assert end_dim > start_dim, "end_dim should be larger than start_dim for flatten function"
|
||||
out_shape = []
|
||||
for i in range(0,start_dim,1): out_shape.append(in_shape[i])
|
||||
dims = 1
|
||||
for i in range(start_dim, end_dim+1, 1): dims *= in_shape[i]
|
||||
out_shape.append(dims)
|
||||
for i in range(end_dim+1,len(in_shape),1): out_shape.append(in_shape[i])
|
||||
return input.reshape(out_shape)
|
||||
Var.flatten = flatten
|
||||
|
||||
def detach_inplace(x):
|
||||
return x.swap(x.stop_grad().clone())
|
||||
Var.start_grad = Var.detach_inplace = detach_inplace
|
||||
|
@ -507,8 +541,9 @@ class Module:
|
|||
|
||||
def extra_repr(self):
|
||||
ss = []
|
||||
n = len(self.__init__.__code__.co_varnames) - \
|
||||
len(self.__init__.__defaults__)
|
||||
n = len(self.__init__.__code__.co_varnames)
|
||||
if self.__init__.__defaults__ is not None:
|
||||
n -= len(self.__init__.__defaults__)
|
||||
for i, k in enumerate(self.__init__.__code__.co_varnames[1:]):
|
||||
v = getattr(self, k) if hasattr(self, k) else None
|
||||
if isinstance(v, Var): v = v.peek()
|
||||
|
@ -535,7 +570,8 @@ class Module:
|
|||
end = 1
|
||||
break
|
||||
if end ==1:
|
||||
print(f'init {key} fail ...')
|
||||
# print(f'init {key} fail ...')
|
||||
pass
|
||||
else:
|
||||
# print(f'init {key} success ...')
|
||||
if isinstance(params[key], np.ndarray) or isinstance(params[key], list):
|
||||
|
@ -577,6 +613,11 @@ class Module:
|
|||
for p in self.parameters():
|
||||
if id(p) in self.backup_grad_state and self.backup_grad_state[id(p)]:
|
||||
p.start_grad()
|
||||
|
||||
def mpi_param_broadcast(self, root=0):
|
||||
if mpi is None: return
|
||||
for p in self.parameters():
|
||||
p.assign(p.mpi_broadcast(root).detach())
|
||||
|
||||
def make_module(func, exec_n_args=1):
|
||||
class MakeModule(Module):
|
||||
|
@ -639,8 +680,9 @@ def jittor_exit():
|
|||
core.sync_all(True)
|
||||
atexit.register(jittor_exit)
|
||||
|
||||
Var.__repr__ = Var.__str__ = lambda x: str(x.data)
|
||||
Var.peek = lambda x: str(x.dtype)+str(x.shape)
|
||||
Var.__str__ = lambda x: str(x.data)
|
||||
Var.__repr__ = lambda x: f"jt.Var:{x.dtype}{x.uncertain_shape}"
|
||||
Var.peek = lambda x: f"{x.dtype}{x.shape}"
|
||||
|
||||
from . import nn
|
||||
from .nn import matmul
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import os, sys
|
||||
import os, sys, shutil
|
||||
from .compiler import *
|
||||
from jittor_utils import run_cmd, get_version
|
||||
from jittor.dataset.utils import download_url_to_local
|
||||
|
||||
def search_file(dirs, name):
|
||||
|
@ -171,10 +172,10 @@ def install_cutt(root_folder):
|
|||
true_md5 = "a6f4f7f75310a69b131e21f1ebec768a"
|
||||
|
||||
if os.path.exists(fullname):
|
||||
md5 = os.popen('md5sum ' + fullname).read().split()[0]
|
||||
md5 = run_cmd('md5sum '+fullname).split()[0]
|
||||
if md5 != true_md5:
|
||||
os.system('rm ' + fullname)
|
||||
os.system('rm -rf ' + dirname)
|
||||
os.remove(fullname)
|
||||
shutil.rmtree(dirname)
|
||||
if not os.path.isfile(os.path.join(dirname, "bin", "cutt_test")):
|
||||
LOG.i("Downloading cutt...")
|
||||
download_url_to_local(url, filename, root_folder, true_md5)
|
||||
|
@ -186,11 +187,11 @@ def install_cutt(root_folder):
|
|||
zf.extractall(path=root_folder)
|
||||
except RuntimeError as e:
|
||||
print(e)
|
||||
raise
|
||||
zf.close()
|
||||
|
||||
from jittor_utils import run_cmd
|
||||
LOG.i("installing cutt...")
|
||||
run_cmd(f"cd {dirname} && make")
|
||||
run_cmd(f"make", cwd=dirname)
|
||||
return dirname
|
||||
|
||||
def setup_cutt():
|
||||
|
@ -233,6 +234,158 @@ def setup_cutt():
|
|||
LOG.vv("Get cutt_ops: "+str(dir(cutt_ops)))
|
||||
|
||||
|
||||
def install_nccl(root_folder):
|
||||
url = "https://github.com/NVIDIA/nccl/archive/v2.6.4-1.tar.gz"
|
||||
|
||||
filename = "nccl.tgz"
|
||||
fullname = os.path.join(root_folder, filename)
|
||||
dirname = os.path.join(root_folder, "nccl-2.6.4-1")
|
||||
true_md5 = "38d7a9e98d95a99df0a4f1ad6fb50fa7"
|
||||
|
||||
if os.path.exists(fullname):
|
||||
md5 = run_cmd('md5sum '+fullname).split()[0]
|
||||
if md5 != true_md5:
|
||||
os.remove(fullname)
|
||||
if os.path.isdir(dirname):
|
||||
shutil.rmtree(dirname)
|
||||
if not os.path.isfile(os.path.join(dirname, "build", "lib", "libnccl.so")):
|
||||
LOG.i("Downloading nccl...")
|
||||
download_url_to_local(url, filename, root_folder, true_md5)
|
||||
|
||||
import tarfile
|
||||
with tarfile.open(fullname, "r") as tar:
|
||||
tar.extractall(root_folder)
|
||||
|
||||
LOG.i("installing nccl...")
|
||||
arch_flag = f" -arch={','.join(map(lambda x:'sm_'+str(x),flags.cuda_archs))} "
|
||||
run_cmd(f"make -j8 src.build CUDA_HOME='{cuda_home}' NVCC_GENCODE='{arch_flag}' ", cwd=dirname)
|
||||
return dirname
|
||||
|
||||
def setup_nccl():
|
||||
global nccl_ops, use_nccl
|
||||
use_nccl = os.environ.get("use_nccl", "1")=="1"
|
||||
nccl_ops = None
|
||||
if not has_cuda or mpi is None:
|
||||
use_nccl = False
|
||||
return
|
||||
if not use_nccl: return
|
||||
nccl_include_path = os.environ.get("nccl_include_path")
|
||||
nccl_lib_path = os.environ.get("nccl_lib_path")
|
||||
|
||||
if nccl_lib_path is None or nccl_include_path is None:
|
||||
LOG.v("setup nccl...")
|
||||
# nccl_path decouple with cc_path
|
||||
from pathlib import Path
|
||||
nccl_path = os.path.join(str(Path.home()), ".cache", "jittor", "nccl")
|
||||
|
||||
make_cache_dir(nccl_path)
|
||||
nccl_home = install_nccl(nccl_path)
|
||||
nccl_include_path = os.path.join(nccl_home, "build", "include")
|
||||
nccl_lib_path = os.path.join(nccl_home, "build", "lib")
|
||||
|
||||
nccl_lib_name = os.path.join(nccl_lib_path, "libnccl.so")
|
||||
assert os.path.isdir(nccl_include_path)
|
||||
assert os.path.isdir(nccl_lib_path)
|
||||
assert os.path.isfile(nccl_lib_name), nccl_lib_name
|
||||
LOG.v(f"nccl_include_path: {nccl_include_path}")
|
||||
LOG.v(f"nccl_lib_path: {nccl_lib_path}")
|
||||
LOG.v(f"nccl_lib_name: {nccl_lib_name}")
|
||||
# We do not link manualy, link in custom ops
|
||||
ctypes.CDLL(nccl_lib_name, dlopen_flags)
|
||||
|
||||
nccl_src_dir = os.path.join(jittor_path, "extern", "cuda", "nccl")
|
||||
nccl_src_files = []
|
||||
for r, _, f in os.walk(nccl_src_dir):
|
||||
for fname in f:
|
||||
nccl_src_files.append(os.path.join(r, fname))
|
||||
|
||||
nccl_ops = compile_custom_ops(nccl_src_files,
|
||||
extra_flags=f" -I'{nccl_include_path}' {mpi_compile_flags} ")
|
||||
LOG.vv("Get nccl_ops: "+str(dir(nccl_ops)))
|
||||
|
||||
def manual_link(flags):
|
||||
lib_dirs = []
|
||||
libs = []
|
||||
for f in flags.split():
|
||||
if f.startswith("-l"):
|
||||
libs.append(f[2:])
|
||||
elif f.startswith("-L"):
|
||||
lib_dirs.append(f[2:])
|
||||
LOG.v("manual_link:", flags)
|
||||
LOG.v("lib_dirs:", lib_dirs)
|
||||
LOG.v("libs:", libs)
|
||||
for lib in libs:
|
||||
for d in lib_dirs:
|
||||
libname = os.path.join(d, f"lib{lib}.so")
|
||||
if os.path.isfile(libname):
|
||||
LOG.v("link:", libname)
|
||||
ctypes.CDLL(libname, dlopen_flags)
|
||||
break
|
||||
|
||||
def inside_mpi():
|
||||
return "OMPI_COMM_WORLD_SIZE" in os.environ
|
||||
|
||||
def setup_mpi():
|
||||
global mpi_ops, mpi, use_mpi
|
||||
global mpicc_path, has_mpi
|
||||
use_mpi = os.environ.get("use_mpi", "1")=="1"
|
||||
mpi_ops = None
|
||||
mpi = None
|
||||
has_mpi = False
|
||||
mpicc_path = env_or_try_find('mpicc_path', 'mpicc')
|
||||
if mpicc_path == "":
|
||||
LOG.i("mpicc not found, distribution disabled.")
|
||||
use_mpi = False
|
||||
else:
|
||||
use_mpi = True
|
||||
has_mpi = True
|
||||
if not inside_mpi():
|
||||
use_mpi = False
|
||||
if not use_mpi:
|
||||
return
|
||||
|
||||
global mpi_compile_flags, mpi_link_flags, mpi_flags
|
||||
mpi_compile_flags = run_cmd(mpicc_path+" --showme:compile")
|
||||
mpi_link_flags = run_cmd(mpicc_path+" --showme:link")
|
||||
mpi_flags = mpi_compile_flags + " " + mpi_link_flags
|
||||
LOG.v("mpi_flags: "+mpi_flags)
|
||||
|
||||
# find all source files
|
||||
mpi_src_dir = os.path.join(jittor_path, "extern", "mpi")
|
||||
mpi_src_files = []
|
||||
for r, _, f in os.walk(mpi_src_dir):
|
||||
for fname in f:
|
||||
mpi_src_files.append(os.path.join(r, fname))
|
||||
|
||||
# mpi compile flags add for nccl
|
||||
mpi_compile_flags += f" -I'{os.path.join(mpi_src_dir, 'inc')}' "
|
||||
mpi_compile_flags = mpi_compile_flags.replace("-pthread", "")
|
||||
|
||||
mpi_version = get_version(mpicc_path)
|
||||
if mpi_version.startswith("(1.") or mpi_version.startswith("(2."):
|
||||
# mpi version 1.x need to link like this
|
||||
manual_link(mpi_flags)
|
||||
# mpi(4.x) cannot use deepbind, it need to
|
||||
# share the 'environ' symbol.
|
||||
mpi = compile_custom_ops(mpi_src_files,
|
||||
extra_flags=f" {mpi_flags} ", return_module=True,
|
||||
dlopen_flags=os.RTLD_GLOBAL | os.RTLD_NOW)
|
||||
mpi_ops = mpi.ops
|
||||
LOG.vv("Get mpi: "+str(mpi.__dict__.keys()))
|
||||
LOG.vv("Get mpi_ops: "+str(mpi_ops.__dict__.keys()))
|
||||
def warper(func):
|
||||
def inner(self, *args, **kw):
|
||||
return func(self, *args, **kw)
|
||||
inner.__doc__ = func.__doc__
|
||||
return inner
|
||||
for k in mpi_ops.__dict__:
|
||||
if not k.startswith("mpi_"): continue
|
||||
if k == "mpi_test": continue
|
||||
setattr(core.Var, k, warper(mpi_ops.__dict__[k]))
|
||||
|
||||
setup_mpi()
|
||||
setup_nccl()
|
||||
|
||||
setup_cutt()
|
||||
setup_mkl()
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@ from ctypes.util import find_library
|
|||
import jittor_utils as jit_utils
|
||||
from jittor_utils import LOG, run_cmd, cache_path, find_exe, cc_path, cc_type, cache_path
|
||||
from . import pyjt_compiler
|
||||
from . import lock
|
||||
|
||||
def find_jittor_path():
|
||||
return os.path.dirname(__file__)
|
||||
|
@ -518,6 +519,7 @@ def gen_jit_op_maker(op_headers, export=False, extra_flags=""):
|
|||
"""
|
||||
return jit_src
|
||||
|
||||
@lock.lock_scope()
|
||||
def compile_custom_op(header, source, op_name, warp=True):
|
||||
"""Compile a single custom op
|
||||
header: code of op header, not path
|
||||
|
@ -554,22 +556,36 @@ def compile_custom_op(header, source, op_name, warp=True):
|
|||
m = compile_custom_ops([hname, ccname])
|
||||
return getattr(m, op_name)
|
||||
|
||||
def compile_custom_ops(filenames, extra_flags=""):
|
||||
@lock.lock_scope()
|
||||
def compile_custom_ops(
|
||||
filenames,
|
||||
extra_flags="",
|
||||
return_module=False,
|
||||
dlopen_flags=os.RTLD_GLOBAL | os.RTLD_NOW | os.RTLD_DEEPBIND):
|
||||
"""Compile custom ops
|
||||
filenames: path of op source files, filenames must be
|
||||
pairs of xxx_xxx_op.cc and xxx_xxx_op.h, and the
|
||||
type name of op must be XxxXxxOp.
|
||||
extra_flags: extra compile flags
|
||||
return_module: return module rather than ops(default: False)
|
||||
return: compiled ops
|
||||
"""
|
||||
srcs = {}
|
||||
headers = {}
|
||||
builds = []
|
||||
includes = []
|
||||
pyjt_includes = []
|
||||
for name in filenames:
|
||||
name = os.path.realpath(name)
|
||||
if name.endswith(".cc") or name.endswith(".cpp") or name.endswith(".cu"):
|
||||
builds.append(name)
|
||||
if name.endswith(".h"):
|
||||
dirname = os.path.dirname(name)
|
||||
if dirname.endswith("inc"):
|
||||
includes.append(dirname)
|
||||
with open(name, "r") as f:
|
||||
if "@pyjt" in f.read():
|
||||
pyjt_includes.append(name)
|
||||
bname = os.path.basename(name)
|
||||
bname = os.path.splitext(bname)[0]
|
||||
if bname.endswith("_op"):
|
||||
|
@ -597,23 +613,47 @@ def compile_custom_ops(filenames, extra_flags=""):
|
|||
gen_src_fname = os.path.join(cache_path, "custom_ops", gen_name+".cc")
|
||||
gen_head_fname = os.path.join(cache_path, "custom_ops", gen_name+".h")
|
||||
gen_lib = os.path.join("custom_ops", gen_name+extension_suffix)
|
||||
with open(gen_head_fname, "w") as f:
|
||||
f.write(gen_src)
|
||||
pyjt_compiler.compile_single(gen_head_fname, gen_src_fname)
|
||||
pyjt_compiler.compile_single(gen_head_fname, gen_src_fname, src=gen_src)
|
||||
# gen src initialize first
|
||||
builds.insert(0, gen_src_fname)
|
||||
|
||||
def insert_anchor(gen_src, anchor_str, insert_str):
|
||||
# insert insert_str after anchor_str into gen_src
|
||||
return gen_src.replace(anchor_str, anchor_str+insert_str, 1)
|
||||
|
||||
for name in pyjt_includes:
|
||||
LOG.i("handle pyjt_include", name)
|
||||
bname = name.split("/")[-1].split(".")[0]
|
||||
gen_src_fname = os.path.join(cache_path, "custom_ops", gen_name+"_"+bname+".cc")
|
||||
pyjt_compiler.compile_single(name, gen_src_fname)
|
||||
builds.insert(1, gen_src_fname)
|
||||
gen_src = insert_anchor(gen_src,
|
||||
"namespace jittor {",
|
||||
f"extern void pyjt_def_{bname}(PyObject* m);")
|
||||
gen_src = insert_anchor(gen_src,
|
||||
"init_module(PyModuleDef* mdef, PyObject* m) {",
|
||||
f"jittor::pyjt_def_{bname}(m);")
|
||||
|
||||
with open(gen_head_fname, "w") as f:
|
||||
f.write(gen_src)
|
||||
|
||||
LOG.vvv(f"Build custum ops lib:{gen_lib}")
|
||||
LOG.vvvv(f"Build sources:{builds}")
|
||||
compile(cc_path, cc_flags+opt_flags+includes+extra_flags, builds, gen_lib)
|
||||
compile(cc_path, extra_flags+cc_flags+opt_flags+includes, builds, gen_lib)
|
||||
|
||||
# add python path and import
|
||||
LOG.vvv(f"Import custum ops lib:{gen_lib}")
|
||||
lib_path = os.path.join(cache_path, "custom_ops")
|
||||
if lib_path not in os.sys.path:
|
||||
os.sys.path.append(lib_path)
|
||||
with jit_utils.import_scope(os.RTLD_GLOBAL | os.RTLD_NOW | os.RTLD_DEEPBIND):
|
||||
exec(f"import {gen_name}")
|
||||
return (locals()[gen_name]).ops
|
||||
# unlock scope when initialize
|
||||
with lock.unlock_scope():
|
||||
with jit_utils.import_scope(dlopen_flags):
|
||||
exec(f"import {gen_name}")
|
||||
mod = locals()[gen_name]
|
||||
if return_module:
|
||||
return mod
|
||||
return mod.ops
|
||||
|
||||
|
||||
def get_full_path_of_executable(name):
|
||||
|
@ -689,8 +729,9 @@ def compile_extern():
|
|||
def check_cuda():
|
||||
if nvcc_path == "":
|
||||
return
|
||||
global cc_flags, has_cuda, core_link_flags, cuda_dir, cuda_lib, cuda_include
|
||||
global cc_flags, has_cuda, core_link_flags, cuda_dir, cuda_lib, cuda_include, cuda_home
|
||||
cuda_dir = os.path.dirname(get_full_path_of_executable(nvcc_path))
|
||||
cuda_home = os.path.abspath(os.path.join(cuda_dir, ".."))
|
||||
assert cuda_dir.endswith("bin") and "cuda" in cuda_dir.lower(), f"Wrong cuda_dir: {cuda_dir}"
|
||||
cuda_include = os.path.abspath(os.path.join(cuda_dir, "..", "include"))
|
||||
cuda_lib = os.path.abspath(os.path.join(cuda_dir, "..", "lib64"))
|
||||
|
@ -764,6 +805,7 @@ dlopen_flags = os.RTLD_NOW | os.RTLD_GLOBAL | os.RTLD_DEEPBIND
|
|||
|
||||
with jit_utils.import_scope(import_flags):
|
||||
jit_utils.try_import_jit_utils_core()
|
||||
|
||||
jittor_path = find_jittor_path()
|
||||
check_debug_flags()
|
||||
|
||||
|
@ -785,6 +827,7 @@ addr2line_path = try_find_exe('addr2line')
|
|||
has_pybt = check_pybt(gdb_path, python_path)
|
||||
|
||||
cc_flags += " -Wall -Werror -Wno-unknown-pragmas -std=c++14 -fPIC -march=native "
|
||||
cc_flags += " -fdiagnostics-color=always "
|
||||
link_flags = " -lstdc++ -ldl -shared "
|
||||
core_link_flags = ""
|
||||
opt_flags = ""
|
||||
|
@ -832,6 +875,7 @@ if has_cuda:
|
|||
nvcc_flags = nvcc_flags.replace("-march", "-Xcompiler -march")
|
||||
nvcc_flags = nvcc_flags.replace("-Werror", "")
|
||||
nvcc_flags = nvcc_flags.replace("-fPIC", "-Xcompiler -fPIC")
|
||||
nvcc_flags = nvcc_flags.replace("-fdiagnostics", "-Xcompiler -fdiagnostics")
|
||||
nvcc_flags += f" -x cu --cudart=shared -ccbin='{cc_path}' --use_fast_math "
|
||||
# nvcc warning is noise
|
||||
nvcc_flags += " -w "
|
||||
|
@ -914,7 +958,11 @@ compile_extern()
|
|||
|
||||
with jit_utils.import_scope(import_flags):
|
||||
import jittor_core as core
|
||||
|
||||
flags = core.flags()
|
||||
if has_cuda:
|
||||
nvcc_flags += f" -arch={','.join(map(lambda x:'sm_'+str(x),flags.cuda_archs))} "
|
||||
|
||||
flags.cc_path = cc_path
|
||||
flags.cc_type = cc_type
|
||||
flags.cc_flags = cc_flags + link_flags + kernel_opt_flags
|
||||
|
@ -926,3 +974,5 @@ flags.jittor_path = jittor_path
|
|||
flags.gdb_path = gdb_path
|
||||
flags.addr2line_path = addr2line_path
|
||||
flags.has_pybt = has_pybt
|
||||
|
||||
core.set_lock_path(lock.lock_path)
|
||||
|
|
|
@ -17,36 +17,36 @@ def argmax_pool(x, size, stride, padding=0):
|
|||
|
||||
y = jt.code(y_shape, x.dtype, [x],
|
||||
cpu_src=f'''
|
||||
for (int i=0; i<outshape0; i++)
|
||||
for (int j=0; j<outshape1; j++)
|
||||
for (int k=0; k<outshape2; k++)
|
||||
for (int l=0; l<outshape3; l++) {{
|
||||
for (int i=0; i<out_shape0; i++)
|
||||
for (int j=0; j<out_shape1; j++)
|
||||
for (int k=0; k<out_shape2; k++)
|
||||
for (int l=0; l<out_shape3; l++) {{
|
||||
int kx=k*{stride}+{size}/2-{padding};
|
||||
int ky=l*{stride}+{size}/2-{padding};
|
||||
@out(i,j,k,l) = @in0(i,j,kx,ky);
|
||||
for (int p=kx-{size}/2;p<=kx+{size}/2;p++)
|
||||
for (int q=ky-{size}/2;q<=ky+{size}/2;q++)
|
||||
if (p>=0 && q>=0 && p<in0shape2 && q<in0shape3)
|
||||
if (p>=0 && q>=0 && p<in0_shape2 && q<in0_shape3)
|
||||
if (@out(i,j,k,l) < @in0(i,j,p,q))
|
||||
@out(i,j,k,l) = @in0(i,j,p,q);
|
||||
}}
|
||||
''',
|
||||
cpu_grad_src = [f'''
|
||||
for (int i=0; i<outshape0; i++)
|
||||
for (int j=0; j<outshape1; j++)
|
||||
for (int k=0; k<outshape2; k++)
|
||||
for (int l=0; l<outshape3; l++) @out(i,j,k,l) = 0;
|
||||
for (int i=0; i<out_shape0; i++)
|
||||
for (int j=0; j<out_shape1; j++)
|
||||
for (int k=0; k<out_shape2; k++)
|
||||
for (int l=0; l<out_shape3; l++) @out(i,j,k,l) = 0;
|
||||
|
||||
for (int i=0; i<poutshape0; i++)
|
||||
for (int j=0; j<poutshape1; j++)
|
||||
for (int k=0; k<poutshape2; k++)
|
||||
for (int l=0; l<poutshape3; l++) {{
|
||||
for (int i=0; i<pout_shape0; i++)
|
||||
for (int j=0; j<pout_shape1; j++)
|
||||
for (int k=0; k<pout_shape2; k++)
|
||||
for (int l=0; l<pout_shape3; l++) {{
|
||||
int kx=k*{stride}+{size}/2-{padding};
|
||||
int ky=l*{stride}+{size}/2-{padding};
|
||||
int bo=1;
|
||||
for (int p=kx-{size}/2;p<=kx+{size}/2 && bo;p++)
|
||||
for (int q=ky-{size}/2;q<=ky+{size}/2 && bo;q++)
|
||||
if (p>=0 && q>=0 && p<in0shape2 && q<in0shape3)
|
||||
if (p>=0 && q>=0 && p<in0_shape2 && q<in0_shape3)
|
||||
if (@pout(i,j,k,l) == @in0(i,j,p,q)) {{
|
||||
@out(i,j,p,q) += @dout(i,j,k,l);
|
||||
bo=0;
|
||||
|
|
|
@ -23,6 +23,7 @@ import jittor as jt
|
|||
|
||||
dataset_root = os.path.join(pathlib.Path.home(), ".cache", "jittor", "dataset")
|
||||
mp_log_v = os.environ.get("mp_log_v", 0)
|
||||
mpi = jt.compile_extern.mpi
|
||||
|
||||
class Worker:
|
||||
def __init__(self, target, args, buffer_size):
|
||||
|
@ -130,8 +131,8 @@ class Dataset(object):
|
|||
self.gidc.notify()
|
||||
batch = []
|
||||
if mp_log_v:
|
||||
print(f"#{worker_id} {os.getpid()} load batch", cid*self.batch_size, min(self.total_len, (cid+1)*self.batch_size))
|
||||
for i in range(cid*self.batch_size, min(self.total_len, (cid+1)*self.batch_size)):
|
||||
print(f"#{worker_id} {os.getpid()} load batch", cid*self.real_batch_size, min(self.real_len, (cid+1)*self.real_batch_size))
|
||||
for i in range(cid*self.real_batch_size, min(self.real_len, (cid+1)*self.real_batch_size)):
|
||||
batch.append(self[self.index_list[i]])
|
||||
batch = self.collate_batch(batch)
|
||||
if mp_log_v:
|
||||
|
@ -157,7 +158,7 @@ class Dataset(object):
|
|||
w.buffer.clear()
|
||||
|
||||
def _init_workers(self):
|
||||
self.index_list = mp.Array('i', self.total_len, lock=False)
|
||||
self.index_list = mp.Array('i', self.real_len, lock=False)
|
||||
workers = []
|
||||
# batch id to worker id
|
||||
self.idmap = mp.Array('i', self.batch_len, lock=False)
|
||||
|
@ -174,7 +175,7 @@ class Dataset(object):
|
|||
buffer_size=self.buffer_size)
|
||||
workers.append(w)
|
||||
self.workers = workers
|
||||
self.index_list_numpy = np.ndarray(dtype='int32', shape=self.total_len, buffer=self.index_list)
|
||||
self.index_list_numpy = np.ndarray(dtype='int32', shape=self.real_len, buffer=self.index_list)
|
||||
|
||||
def __del__(self):
|
||||
if mp_log_v:
|
||||
|
@ -186,10 +187,57 @@ class Dataset(object):
|
|||
index_list = get_order_list(self.total_len)
|
||||
else:
|
||||
index_list = get_random_list(self.total_len)
|
||||
|
||||
# scatter index_list for all mpi process
|
||||
# scatter rule:
|
||||
# batch 1 batch 2
|
||||
# [........] [........] ...
|
||||
# 00011122 00011122
|
||||
# if last batch is smaller than world_size
|
||||
# pad to world_size
|
||||
# last batch
|
||||
# [.] -> [012]
|
||||
if mpi:
|
||||
world_size = mpi.world_size()
|
||||
world_rank = mpi.world_rank()
|
||||
index_list = np.int32(index_list)
|
||||
mpi.broadcast(index_list, 0)
|
||||
|
||||
assert self.batch_size >= world_size, \
|
||||
f"Batch size({self.batch_size}) is smaller than MPI world_size({world_size})"
|
||||
real_batch_size = (self.batch_size-1) // world_size + 1
|
||||
if real_batch_size * world_size != self.batch_size:
|
||||
LOG.w("Batch size is not divisible by MPI world size, "
|
||||
"The distributed version may be different from "
|
||||
"the single-process version.")
|
||||
fix_batch = self.total_len // self.batch_size
|
||||
last_batch = self.total_len - fix_batch * self.batch_size
|
||||
fix_batch_l = index_list[0:fix_batch*self.batch_size] \
|
||||
.reshape(-1,self.batch_size)
|
||||
fix_batch_l = fix_batch_l[
|
||||
:,real_batch_size*world_rank:real_batch_size*(world_rank+1)]
|
||||
real_batch_size = fix_batch_l.shape[1]
|
||||
fix_batch_l = fix_batch_l.flatten()
|
||||
if not self.drop_last and last_batch > 0:
|
||||
last_batch_l = index_list[-last_batch:]
|
||||
real_last_batch = (last_batch-1)//world_size+1
|
||||
l = real_last_batch * world_rank
|
||||
r = l + real_last_batch
|
||||
if r > last_batch: r = last_batch
|
||||
if l >= r: l = r-1
|
||||
index_list = np.concatenate([fix_batch_l, last_batch_l[l:r]])
|
||||
else:
|
||||
index_list = fix_batch_l
|
||||
|
||||
self.real_len = len(index_list)
|
||||
self.real_batch_size = real_batch_size
|
||||
assert self.total_len // self.batch_size == \
|
||||
self.real_len // self.real_batch_size
|
||||
else:
|
||||
self.real_len = self.total_len
|
||||
self.real_batch_size = self.batch_size
|
||||
|
||||
self.batch_len = len(self)
|
||||
if "batch_len" in os.environ:
|
||||
self.batch_len = int(os.environ["batch_len"])
|
||||
|
||||
if not hasattr(self, "workers") and self.num_workers:
|
||||
self._init_workers()
|
||||
|
@ -223,7 +271,7 @@ class Dataset(object):
|
|||
batch_data = []
|
||||
for idx in index_list:
|
||||
batch_data.append(self[int(idx)])
|
||||
if len(batch_data) == self.batch_size:
|
||||
if len(batch_data) == self.real_batch_size:
|
||||
batch_data = self.collate_batch(batch_data)
|
||||
batch_data = self.to_jittor(batch_data)
|
||||
yield batch_data
|
||||
|
|
|
@ -15,6 +15,7 @@ from tqdm import tqdm
|
|||
import numpy as np
|
||||
from collections.abc import Sequence, Mapping
|
||||
from PIL import Image
|
||||
from .. import lock
|
||||
|
||||
def ensure_dir(dir_path):
|
||||
if not os.path.isdir(dir_path):
|
||||
|
@ -36,7 +37,7 @@ def _progress():
|
|||
|
||||
return bar_update
|
||||
|
||||
|
||||
@lock.lock_scope()
|
||||
def download_url_to_local(url, filename, root_folder, md5):
|
||||
ensure_dir(root_folder)
|
||||
file_path = os.path.join(root_folder, filename)
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
import fcntl
|
||||
import os
|
||||
from jittor_utils import cache_path, LOG
|
||||
|
||||
class Lock:
|
||||
def __init__(self, filename):
|
||||
self.handle = open(filename, 'w')
|
||||
LOG.v(f'OPEN LOCK path: {filename} PID: {os.getpid()}')
|
||||
self.is_locked = False
|
||||
|
||||
def lock(self):
|
||||
fcntl.flock(self.handle, fcntl.LOCK_EX)
|
||||
self.is_locked = True
|
||||
LOG.vv(f'LOCK PID: {os.getpid()}')
|
||||
|
||||
def unlock(self):
|
||||
fcntl.flock(self.handle, fcntl.LOCK_UN)
|
||||
self.is_locked = False
|
||||
LOG.vv(f'UNLOCK PID: {os.getpid()}')
|
||||
|
||||
def __del__(self):
|
||||
self.handle.close()
|
||||
|
||||
|
||||
class _base_scope:
|
||||
'''base_scope for support @xxx syntax'''
|
||||
def __enter__(self): pass
|
||||
def __exit__(self, *exc): pass
|
||||
def __call__(self, func):
|
||||
def inner(*args, **kw):
|
||||
with self:
|
||||
ret = func(*args, **kw)
|
||||
return ret
|
||||
return inner
|
||||
|
||||
class lock_scope(_base_scope):
|
||||
def __enter__(self):
|
||||
self.is_locked = jittor_lock.is_locked
|
||||
if not self.is_locked:
|
||||
jittor_lock.lock()
|
||||
|
||||
def __exit__(self, *exc):
|
||||
if not self.is_locked:
|
||||
jittor_lock.unlock()
|
||||
|
||||
class unlock_scope(_base_scope):
|
||||
def __enter__(self):
|
||||
self.is_locked = jittor_lock.is_locked
|
||||
if self.is_locked:
|
||||
jittor_lock.unlock()
|
||||
|
||||
def __exit__(self, *exc):
|
||||
if self.is_locked:
|
||||
jittor_lock.lock()
|
||||
|
||||
lock_path = os.path.abspath(os.path.join(cache_path, "../jittor.lock"))
|
||||
if not os.path.exists(lock_path):
|
||||
LOG.i("Create lock file:", lock_path)
|
||||
try:
|
||||
os.mknod(lock_path)
|
||||
except:
|
||||
pass
|
||||
jittor_lock = Lock(lock_path)
|
|
@ -1,2 +1,18 @@
|
|||
from . import resnet
|
||||
from . import vgg
|
||||
from .resnet import *
|
||||
from . import vgg
|
||||
from .vgg import *
|
||||
from . import alexnet
|
||||
from .alexnet import *
|
||||
from . import squeezenet
|
||||
from .squeezenet import *
|
||||
from . import inception
|
||||
from .inception import *
|
||||
from . import googlenet
|
||||
from .googlenet import *
|
||||
from . import mobilenet
|
||||
from .mobilenet import *
|
||||
from . import mnasnet
|
||||
from .mnasnet import *
|
||||
from . import shufflenetv2
|
||||
from .shufflenetv2 import *
|
|
@ -0,0 +1,53 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
import jittor.nn as nn
|
||||
|
||||
__all__ = ['AlexNet', 'alexnet']
|
||||
|
||||
class AlexNet(nn.Module):
|
||||
|
||||
def __init__(self, num_classes=1000):
|
||||
super(AlexNet, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv(3, 64, kernel_size=11, stride=4, padding=2),
|
||||
nn.Relu(),
|
||||
nn.Pool(kernel_size=3, stride=2, op='maximum'),
|
||||
nn.Conv(64, 192, kernel_size=5, padding=2),
|
||||
nn.Relu(), nn.Pool(kernel_size=3, stride=2, op='maximum'),
|
||||
nn.Conv(192, 384, kernel_size=3, padding=1),
|
||||
nn.Relu(),
|
||||
nn.Conv(384, 256, kernel_size=3, padding=1),
|
||||
nn.Relu(),
|
||||
nn.Conv(256, 256, kernel_size=3, padding=1),
|
||||
nn.Relu(),
|
||||
nn.Pool(kernel_size=3, stride=2, op='maximum')
|
||||
)
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(),
|
||||
nn.Linear(((256 * 6) * 6), 4096),
|
||||
nn.Relu(),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.Relu(),
|
||||
nn.Linear(4096, num_classes)
|
||||
)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.features(x)
|
||||
x = self.avgpool(x)
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def alexnet(**kwargs):
|
||||
model = AlexNet(**kwargs)
|
||||
return model
|
|
@ -0,0 +1,143 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
|
||||
__all__ = ['GoogLeNet', 'googlenet']
|
||||
|
||||
def googlenet(**kwargs):
|
||||
return GoogLeNet(**kwargs)
|
||||
|
||||
class GoogLeNet(nn.Module):
|
||||
|
||||
def __init__(self, num_classes=1000, aux_logits=True, init_weights=True, blocks=None):
|
||||
super(GoogLeNet, self).__init__()
|
||||
if (blocks is None):
|
||||
blocks = [BasicConv2d, Inception, InceptionAux]
|
||||
assert (len(blocks) == 3)
|
||||
conv_block = blocks[0]
|
||||
inception_block = blocks[1]
|
||||
inception_aux_block = blocks[2]
|
||||
self.aux_logits = aux_logits
|
||||
self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
|
||||
self.maxpool1 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum')
|
||||
self.conv2 = conv_block(64, 64, kernel_size=1)
|
||||
self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
|
||||
self.maxpool2 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum')
|
||||
self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
|
||||
self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
|
||||
self.maxpool3 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum')
|
||||
self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
|
||||
self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
|
||||
self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
|
||||
self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
|
||||
self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
|
||||
self.maxpool4 = nn.Pool(2, stride=2, ceil_mode=True, op='maximum')
|
||||
self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
|
||||
self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)
|
||||
if aux_logits:
|
||||
self.aux1 = inception_aux_block(512, num_classes)
|
||||
self.aux2 = inception_aux_block(528, num_classes)
|
||||
else:
|
||||
self.aux1 = None
|
||||
self.aux2 = None
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.dropout = nn.Dropout(0.2)
|
||||
self.fc = nn.Linear(1024, num_classes)
|
||||
|
||||
def _forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.maxpool1(x)
|
||||
x = self.conv2(x)
|
||||
x = self.conv3(x)
|
||||
x = self.maxpool2(x)
|
||||
x = self.inception3a(x)
|
||||
x = self.inception3b(x)
|
||||
x = self.maxpool3(x)
|
||||
x = self.inception4a(x)
|
||||
if (self.aux1 is not None):
|
||||
aux1 = self.aux1(x)
|
||||
x = self.inception4b(x)
|
||||
x = self.inception4c(x)
|
||||
x = self.inception4d(x)
|
||||
if (self.aux2 is not None):
|
||||
aux2 = self.aux2(x)
|
||||
x = self.inception4e(x)
|
||||
x = self.maxpool4(x)
|
||||
x = self.inception5a(x)
|
||||
x = self.inception5b(x)
|
||||
x = self.avgpool(x)
|
||||
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = self.dropout(x)
|
||||
x = self.fc(x)
|
||||
return (x, aux2, aux1)
|
||||
|
||||
def eager_outputs(self, x, aux2, aux1):
|
||||
return x
|
||||
|
||||
def execute(self, x):
|
||||
(x, aux1, aux2) = self._forward(x)
|
||||
aux_defined = (self.aux_logits)
|
||||
return self.eager_outputs(x, aux2, aux1)
|
||||
|
||||
class Inception(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, conv_block=None):
|
||||
super(Inception, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)
|
||||
self.branch2 = nn.Sequential(conv_block(in_channels, ch3x3red, kernel_size=1), conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1))
|
||||
self.branch3 = nn.Sequential(conv_block(in_channels, ch5x5red, kernel_size=1), conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1))
|
||||
self.branch4 = nn.Sequential(nn.Pool(kernel_size=3, stride=1, padding=1, ceil_mode=True, op='maximum'), conv_block(in_channels, pool_proj, kernel_size=1))
|
||||
|
||||
def _forward(self, x):
|
||||
branch1 = self.branch1(x)
|
||||
branch2 = self.branch2(x)
|
||||
branch3 = self.branch3(x)
|
||||
branch4 = self.branch4(x)
|
||||
outputs = [branch1, branch2, branch3, branch4]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionAux(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, num_classes, conv_block=None):
|
||||
super(InceptionAux, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.conv = conv_block(in_channels, 128, kernel_size=1)
|
||||
self.fc1 = nn.Linear(2048, 1024)
|
||||
self.fc2 = nn.Linear(1024, num_classes)
|
||||
|
||||
def execute(self, x):
|
||||
x = nn.AdaptiveAvgPool2d(4)(x)
|
||||
x = self.conv(x)
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = nn.relu(self.fc1(x))
|
||||
x = nn.Dropout(0.7)(x)
|
||||
x = self.fc2(x)
|
||||
return x
|
||||
|
||||
class BasicConv2d(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(BasicConv2d, self).__init__()
|
||||
self.conv = nn.Conv(in_channels, out_channels, bias=False, **kwargs)
|
||||
self.bn = nn.BatchNorm(out_channels, eps=0.001)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return nn.relu(x)
|
|
@ -0,0 +1,268 @@
|
|||
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
__all__ = ['Inception3', 'inception_v3']
|
||||
|
||||
def inception_v3(pretrained=False, progress=True, **kwargs):
|
||||
return Inception3(**kwargs)
|
||||
|
||||
class Inception3(nn.Module):
|
||||
|
||||
def __init__(self, num_classes=1000, aux_logits=True, inception_blocks=None, init_weights=True):
|
||||
super(Inception3, self).__init__()
|
||||
if (inception_blocks is None):
|
||||
inception_blocks = [BasicConv2d, InceptionA, InceptionB, InceptionC, InceptionD, InceptionE, InceptionAux]
|
||||
assert (len(inception_blocks) == 7)
|
||||
conv_block = inception_blocks[0]
|
||||
inception_a = inception_blocks[1]
|
||||
inception_b = inception_blocks[2]
|
||||
inception_c = inception_blocks[3]
|
||||
inception_d = inception_blocks[4]
|
||||
inception_e = inception_blocks[5]
|
||||
inception_aux = inception_blocks[6]
|
||||
self.aux_logits = aux_logits
|
||||
self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2)
|
||||
self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3)
|
||||
self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1)
|
||||
self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1)
|
||||
self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3)
|
||||
self.Mixed_5b = inception_a(192, pool_features=32)
|
||||
self.Mixed_5c = inception_a(256, pool_features=64)
|
||||
self.Mixed_5d = inception_a(288, pool_features=64)
|
||||
self.Mixed_6a = inception_b(288)
|
||||
self.Mixed_6b = inception_c(768, channels_7x7=128)
|
||||
self.Mixed_6c = inception_c(768, channels_7x7=160)
|
||||
self.Mixed_6d = inception_c(768, channels_7x7=160)
|
||||
self.Mixed_6e = inception_c(768, channels_7x7=192)
|
||||
if aux_logits:
|
||||
self.AuxLogits = inception_aux(768, num_classes)
|
||||
self.Mixed_7a = inception_d(768)
|
||||
self.Mixed_7b = inception_e(1280)
|
||||
self.Mixed_7c = inception_e(2048)
|
||||
self.fc = nn.Linear(2048, num_classes)
|
||||
|
||||
def _forward(self, x):
|
||||
x = self.Conv2d_1a_3x3(x)
|
||||
x = self.Conv2d_2a_3x3(x)
|
||||
x = self.Conv2d_2b_3x3(x)
|
||||
x = nn.pool(x, 3, "maximum", stride=2)
|
||||
x = self.Conv2d_3b_1x1(x)
|
||||
x = self.Conv2d_4a_3x3(x)
|
||||
x = nn.pool(x, 3, "maximum", stride=2)
|
||||
x = self.Mixed_5b(x)
|
||||
x = self.Mixed_5c(x)
|
||||
x = self.Mixed_5d(x)
|
||||
x = self.Mixed_6a(x)
|
||||
x = self.Mixed_6b(x)
|
||||
x = self.Mixed_6c(x)
|
||||
x = self.Mixed_6d(x)
|
||||
x = self.Mixed_6e(x)
|
||||
aux_defined = self.aux_logits
|
||||
if aux_defined:
|
||||
aux = self.AuxLogits(x)
|
||||
else:
|
||||
aux = None
|
||||
x = self.Mixed_7a(x)
|
||||
x = self.Mixed_7b(x)
|
||||
x = self.Mixed_7c(x)
|
||||
x = nn.AdaptiveAvgPool2d(1)(x)
|
||||
x = nn.Dropout()(x)
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = self.fc(x)
|
||||
return (x, aux)
|
||||
|
||||
def eager_outputs(self, x, aux):
|
||||
return x
|
||||
|
||||
def execute(self, x):
|
||||
(x, aux) = self._forward(x)
|
||||
aux_defined = self.aux_logits
|
||||
return self.eager_outputs(x, aux)
|
||||
|
||||
class InceptionA(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, pool_features, conv_block=None):
|
||||
super(InceptionA, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch1x1 = conv_block(in_channels, 64, kernel_size=1)
|
||||
self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1)
|
||||
self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2)
|
||||
self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
|
||||
self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
|
||||
self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1)
|
||||
self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1)
|
||||
|
||||
def _forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
branch5x5 = self.branch5x5_1(x)
|
||||
branch5x5 = self.branch5x5_2(branch5x5)
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
|
||||
branch_pool = nn.pool(x, 3, "mean", stride=1, padding=1)
|
||||
branch_pool = self.branch_pool(branch_pool)
|
||||
outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionB(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, conv_block=None):
|
||||
super(InceptionB, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2)
|
||||
self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1)
|
||||
self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1)
|
||||
self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2)
|
||||
|
||||
def _forward(self, x):
|
||||
branch3x3 = self.branch3x3(x)
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
|
||||
branch_pool = nn.pool(x, 3, "maximum", stride=2)
|
||||
outputs = [branch3x3, branch3x3dbl, branch_pool]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionC(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, channels_7x7, conv_block=None):
|
||||
super(InceptionC, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch1x1 = conv_block(in_channels, 192, kernel_size=1)
|
||||
c7 = channels_7x7
|
||||
self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
|
||||
self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
|
||||
self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0))
|
||||
self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
|
||||
self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
|
||||
self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
|
||||
self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
|
||||
self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3))
|
||||
self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
|
||||
|
||||
def _forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
branch7x7 = self.branch7x7_1(x)
|
||||
branch7x7 = self.branch7x7_2(branch7x7)
|
||||
branch7x7 = self.branch7x7_3(branch7x7)
|
||||
branch7x7dbl = self.branch7x7dbl_1(x)
|
||||
branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
|
||||
branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
|
||||
branch_pool = nn.pool(x, kernel_size=3, op="mean", stride=1, padding=1)
|
||||
branch_pool = self.branch_pool(branch_pool)
|
||||
outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionD(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, conv_block=None):
|
||||
super(InceptionD, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1)
|
||||
self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2)
|
||||
self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1)
|
||||
self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3))
|
||||
self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0))
|
||||
self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2)
|
||||
|
||||
def _forward(self, x):
|
||||
branch3x3 = self.branch3x3_1(x)
|
||||
branch3x3 = self.branch3x3_2(branch3x3)
|
||||
branch7x7x3 = self.branch7x7x3_1(x)
|
||||
branch7x7x3 = self.branch7x7x3_2(branch7x7x3)
|
||||
branch7x7x3 = self.branch7x7x3_3(branch7x7x3)
|
||||
branch7x7x3 = self.branch7x7x3_4(branch7x7x3)
|
||||
branch_pool = nn.pool(x, kernel_size=3, op="maximum", stride=2)
|
||||
outputs = [branch3x3, branch7x7x3, branch_pool]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionE(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, conv_block=None):
|
||||
super(InceptionE, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.branch1x1 = conv_block(in_channels, 320, kernel_size=1)
|
||||
self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1)
|
||||
self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
|
||||
self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
|
||||
self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1)
|
||||
self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1)
|
||||
self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1))
|
||||
self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0))
|
||||
self.branch_pool = conv_block(in_channels, 192, kernel_size=1)
|
||||
|
||||
def _forward(self, x):
|
||||
branch1x1 = self.branch1x1(x)
|
||||
branch3x3 = self.branch3x3_1(x)
|
||||
branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)]
|
||||
branch3x3 = jt.contrib.concat(branch3x3, dim=1)
|
||||
branch3x3dbl = self.branch3x3dbl_1(x)
|
||||
branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
|
||||
branch3x3dbl = [self.branch3x3dbl_3a(branch3x3dbl), self.branch3x3dbl_3b(branch3x3dbl)]
|
||||
branch3x3dbl = jt.contrib.concat(branch3x3dbl, dim=1)
|
||||
branch_pool = nn.pool(x, kernel_size=3, op="mean", stride=1, padding=1)
|
||||
branch_pool = self.branch_pool(branch_pool)
|
||||
outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
|
||||
return outputs
|
||||
|
||||
def execute(self, x):
|
||||
outputs = self._forward(x)
|
||||
return jt.contrib.concat(outputs, dim=1)
|
||||
|
||||
class InceptionAux(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, num_classes, conv_block=None):
|
||||
super(InceptionAux, self).__init__()
|
||||
if (conv_block is None):
|
||||
conv_block = BasicConv2d
|
||||
self.conv0 = conv_block(in_channels, 128, kernel_size=1)
|
||||
self.conv1 = conv_block(128, 768, kernel_size=5)
|
||||
self.conv1.stddev = 0.01
|
||||
self.fc = nn.Linear(768, num_classes)
|
||||
self.fc.stddev = 0.001
|
||||
|
||||
def execute(self, x):
|
||||
x = nn.pool(x, kernel_size=5, op="mean", stride=3)
|
||||
x = self.conv0(x)
|
||||
x = self.conv1(x)
|
||||
|
||||
|
||||
x = nn.AdaptiveAvgPool2d(1)(x)
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
class BasicConv2d(nn.Module):
|
||||
|
||||
def __init__(self, in_channels, out_channels, **kwargs):
|
||||
super(BasicConv2d, self).__init__()
|
||||
self.conv = nn.Conv(in_channels, out_channels, bias=False, **kwargs)
|
||||
self.bn = nn.BatchNorm(out_channels, eps=0.001)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return nn.relu(x)
|
|
@ -0,0 +1,99 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
__all__ = ['MNASNet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3']
|
||||
_BN_MOMENTUM = (1 - 0.9997)
|
||||
|
||||
class _InvertedResidual(nn.Module):
|
||||
|
||||
def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, bn_momentum=0.1):
|
||||
super(_InvertedResidual, self).__init__()
|
||||
assert (stride in [1, 2])
|
||||
assert (kernel_size in [3, 5])
|
||||
mid_ch = (in_ch * expansion_factor)
|
||||
self.apply_residual = ((in_ch == out_ch) and (stride == 1))
|
||||
self.layers = nn.Sequential(nn.Conv(in_ch, mid_ch, 1, bias=False), nn.BatchNorm(mid_ch, momentum=bn_momentum), nn.Relu(), nn.Conv(mid_ch, mid_ch, kernel_size, padding=(kernel_size // 2), stride=stride, groups=mid_ch, bias=False), nn.BatchNorm(mid_ch, momentum=bn_momentum), nn.Relu(), nn.Conv(mid_ch, out_ch, 1, bias=False), nn.BatchNorm(out_ch, momentum=bn_momentum))
|
||||
|
||||
def execute(self, input):
|
||||
if self.apply_residual:
|
||||
return (self.layers(input) + input)
|
||||
else:
|
||||
return self.layers(input)
|
||||
|
||||
def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats, bn_momentum):
|
||||
assert (repeats >= 1)
|
||||
first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, bn_momentum=bn_momentum)
|
||||
remaining = []
|
||||
for _ in range(1, repeats):
|
||||
remaining.append(_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, bn_momentum=bn_momentum))
|
||||
return nn.Sequential(first, *remaining)
|
||||
|
||||
def _round_to_multiple_of(val, divisor, round_up_bias=0.9):
|
||||
assert (0.0 < round_up_bias < 1.0)
|
||||
new_val = max(divisor, ((int((val + (divisor / 2))) // divisor) * divisor))
|
||||
return (new_val if (new_val >= (round_up_bias * val)) else (new_val + divisor))
|
||||
|
||||
def _get_depths(alpha):
|
||||
depths = [24, 40, 80, 96, 192, 320]
|
||||
return [_round_to_multiple_of((depth * alpha), 8) for depth in depths]
|
||||
|
||||
class MNASNet(nn.Module):
|
||||
_version = 2
|
||||
|
||||
def __init__(self, alpha, num_classes=1000, dropout=0.2):
|
||||
super(MNASNet, self).__init__()
|
||||
assert (alpha > 0.0)
|
||||
self.alpha = alpha
|
||||
self.num_classes = num_classes
|
||||
depths = _get_depths(alpha)
|
||||
layers = [
|
||||
nn.Conv(3, 32, 3, padding=1, stride=2, bias=False),
|
||||
nn.BatchNorm(32, momentum=_BN_MOMENTUM),
|
||||
nn.Relu(),
|
||||
nn.Conv(32, 32, 3, padding=1, stride=1, groups=32, bias=False),
|
||||
nn.BatchNorm(32, momentum=_BN_MOMENTUM),
|
||||
nn.Relu(),
|
||||
nn.Conv(32, 16, 1, padding=0, stride=1, bias=False),
|
||||
nn.BatchNorm(16, momentum=_BN_MOMENTUM),
|
||||
_stack(16, depths[0], 3, 2, 3, 3, _BN_MOMENTUM),
|
||||
_stack(depths[0], depths[1], 5, 2, 3, 3, _BN_MOMENTUM),
|
||||
_stack(depths[1], depths[2], 5, 2, 6, 3, _BN_MOMENTUM),
|
||||
_stack(depths[2], depths[3], 3, 1, 6, 2, _BN_MOMENTUM),
|
||||
_stack(depths[3], depths[4], 5, 2, 6, 4, _BN_MOMENTUM),
|
||||
_stack(depths[4], depths[5], 3, 1, 6, 1, _BN_MOMENTUM),
|
||||
nn.Conv(depths[5], 1280, 1, padding=0, stride=1, bias=False),
|
||||
nn.BatchNorm(1280, momentum=_BN_MOMENTUM),
|
||||
nn.Relu()
|
||||
]
|
||||
self.layers = nn.Sequential(*layers)
|
||||
self.classifier = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(1280, num_classes))
|
||||
|
||||
def execute(self, x):
|
||||
x = self.layers(x)
|
||||
x = x.mean([2, 3])
|
||||
return self.classifier(x)
|
||||
|
||||
def mnasnet0_5(**kwargs):
|
||||
model = MNASNet(0.5, **kwargs)
|
||||
return model
|
||||
|
||||
def mnasnet0_75(**kwargs):
|
||||
model = MNASNet(0.75, **kwargs)
|
||||
return model
|
||||
|
||||
def mnasnet1_0(**kwargs):
|
||||
model = MNASNet(1.0, **kwargs)
|
||||
return model
|
||||
|
||||
def mnasnet1_3(**kwargs):
|
||||
model = MNASNet(1.3, **kwargs)
|
||||
return model
|
|
@ -0,0 +1,88 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
|
||||
import jittor as jt
|
||||
from jittor import init
|
||||
from jittor import nn
|
||||
__all__ = ['MobileNetV2', 'mobilenet_v2']
|
||||
|
||||
def _make_divisible(v, divisor, min_value=None):
|
||||
if (min_value is None):
|
||||
min_value = divisor
|
||||
new_v = max(min_value, ((int((v + (divisor / 2))) // divisor) * divisor))
|
||||
if (new_v < (0.9 * v)):
|
||||
new_v += divisor
|
||||
return new_v
|
||||
|
||||
class ConvBNReLU(nn.Sequential):
|
||||
|
||||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
|
||||
padding = ((kernel_size - 1) // 2)
|
||||
super(ConvBNReLU, self).__init__(nn.Conv(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm(out_planes), nn.ReLU6())
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
|
||||
def __init__(self, inp, oup, stride, expand_ratio):
|
||||
super(InvertedResidual, self).__init__()
|
||||
self.stride = stride
|
||||
assert (stride in [1, 2])
|
||||
hidden_dim = int(round((inp * expand_ratio)))
|
||||
self.use_res_connect = ((self.stride == 1) and (inp == oup))
|
||||
layers = []
|
||||
if (expand_ratio != 1):
|
||||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
|
||||
layers.extend([ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), nn.Conv(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm(oup)])
|
||||
self.conv = nn.Sequential(*layers)
|
||||
|
||||
def execute(self, x):
|
||||
if self.use_res_connect:
|
||||
return (x + self.conv(x))
|
||||
else:
|
||||
return self.conv(x)
|
||||
|
||||
class MobileNetV2(nn.Module):
|
||||
|
||||
def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8, block=None):
|
||||
super(MobileNetV2, self).__init__()
|
||||
if (block is None):
|
||||
block = InvertedResidual
|
||||
input_channel = 32
|
||||
last_channel = 1280
|
||||
if (inverted_residual_setting is None):
|
||||
inverted_residual_setting = [[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1]]
|
||||
if ((len(inverted_residual_setting) == 0) or (len(inverted_residual_setting[0]) != 4)):
|
||||
raise ValueError('inverted_residual_setting should be non-empty or a 4-element list, got {}'.format(inverted_residual_setting))
|
||||
input_channel = _make_divisible((input_channel * width_mult), round_nearest)
|
||||
self.last_channel = _make_divisible((last_channel * max(1.0, width_mult)), round_nearest)
|
||||
features = [ConvBNReLU(3, input_channel, stride=2)]
|
||||
for (t, c, n, s) in inverted_residual_setting:
|
||||
output_channel = _make_divisible((c * width_mult), round_nearest)
|
||||
for i in range(n):
|
||||
stride = (s if (i == 0) else 1)
|
||||
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
|
||||
input_channel = output_channel
|
||||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
|
||||
self.features = nn.Sequential(*features)
|
||||
self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes))
|
||||
|
||||
def _forward_impl(self, x):
|
||||
x = self.features(x)
|
||||
x = nn.AdaptiveAvgPool2d(1)(x)
|
||||
x = jt.reshape(x, (x.shape[0], -1))
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def execute(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def mobilenet_v2():
|
||||
model = MobileNetV2()
|
||||
return model
|
||||
|
|
@ -7,200 +7,128 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
from jittor import Module
|
||||
|
||||
@jt.var_scope('basic_block')
|
||||
def basic_block(x, is_train, in_planes, out_planes, stride = 1):
|
||||
identity = x
|
||||
x = nn.conv(x, in_planes, out_planes, 3, 1, stride)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
x = nn.relu(x)
|
||||
x = nn.conv(x, out_planes, out_planes, 3, 1)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
if in_planes!=out_planes:
|
||||
identity = nn.conv(identity, in_planes, out_planes, 1, 0, stride)
|
||||
identity = nn.batch_norm(identity, is_train)
|
||||
x = x+identity
|
||||
x = nn.relu(x)
|
||||
return x
|
||||
__all__ = ['ResNet', 'Resnet18', 'Resnet34', 'Resnet50', 'Resnet101', 'Resnet152', 'Resnext50_32x4d', 'Resnext101_32x8d', 'Wide_resnet50_2', 'Wide_resnet101_2',
|
||||
'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2']
|
||||
|
||||
@jt.var_scope('make_layer')
|
||||
def make_layer(x, is_train, out_planes, blocks, layer_in_planes, stride = 1):
|
||||
x = basic_block(x, is_train, layer_in_planes, out_planes, stride)
|
||||
layer_in_planes = out_planes
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
return nn.Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation)
|
||||
|
||||
for i in range(1, blocks):
|
||||
x = basic_block(x, is_train, layer_in_planes, out_planes)
|
||||
return x, layer_in_planes
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
return nn.Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
|
||||
|
||||
@jt.var_scope('bottleneck_block')
|
||||
def bottleneck_block(x, is_train, in_planes, out_planes, stride = 1):
|
||||
expansion = 4
|
||||
width = out_planes
|
||||
identity = x
|
||||
|
||||
x = nn.conv(x, in_planes, width, 1, 0)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
x = nn.relu(x)
|
||||
|
||||
x = nn.conv(x, width, width, 3, 1, stride)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
x = nn.relu(x)
|
||||
|
||||
x = nn.conv(x, width, out_planes * expansion, 1, 0)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
|
||||
if in_planes != out_planes * expansion:
|
||||
identity = nn.conv(identity, in_planes, out_planes * expansion, 1, 0, stride)
|
||||
identity = nn.batch_norm(identity, is_train)
|
||||
|
||||
x = x+identity
|
||||
x = nn.relu(x)
|
||||
return x
|
||||
|
||||
@jt.var_scope('make_layer_bottleneck')
|
||||
def make_layer_bottleneck(x, is_train, out_planes, blocks, layer_in_planes, stride = 1):
|
||||
expansion = 4
|
||||
x = bottleneck_block(x, is_train, layer_in_planes, out_planes, stride)
|
||||
layer_in_planes = out_planes * expansion
|
||||
for i in range(1, blocks):
|
||||
x = bottleneck_block(x, is_train, layer_in_planes, out_planes)
|
||||
return x, layer_in_planes
|
||||
|
||||
@jt.var_scope('resnet')
|
||||
def resnet(x, is_train, block, layers, num_classes = 1000):
|
||||
layer_in_planes = 64
|
||||
x = nn.conv(x, 3, layer_in_planes, 7, 3, 2)
|
||||
x = nn.batch_norm(x, is_train)
|
||||
x = nn.relu(x)
|
||||
x = nn.pool(x, 3, "maximum", 1, 2)
|
||||
x, layer_in_planes = block(x, is_train, 64, layers[0], layer_in_planes)
|
||||
x, layer_in_planes = block(x, is_train, 128, layers[1], layer_in_planes, 2)
|
||||
x, layer_in_planes = block(x, is_train, 256, layers[2], layer_in_planes, 2)
|
||||
x, layer_in_planes = block(x, is_train, 512, layers[3], layer_in_planes, 2)
|
||||
|
||||
x = x.reindex_reduce("add", [x.shape[0],x.shape[1]], ["i0","i1"])/x.shape[2]/x.shape[3]
|
||||
x = nn.linear(x, num_classes)
|
||||
|
||||
return x
|
||||
|
||||
@jt.var_scope('resnet18', unique=True)
|
||||
def resnet18(x, is_train):
|
||||
return resnet(x, is_train, make_layer, [2, 2, 2, 2])
|
||||
|
||||
@jt.var_scope('resnet34', unique=True)
|
||||
def resnet34(x, is_train):
|
||||
return resnet(x, is_train, make_layer, [3, 4, 6, 3])
|
||||
|
||||
@jt.var_scope('resnet50', unique=True)
|
||||
def resnet50(x, is_train):
|
||||
return resnet(x, is_train, make_layer_bottleneck, [3, 4, 6, 3])
|
||||
|
||||
@jt.var_scope('resnet101', unique=True)
|
||||
def resnet101(x, is_train):
|
||||
return resnet(x, is_train, make_layer_bottleneck, [3, 4, 23, 3])
|
||||
|
||||
@jt.var_scope('resnet152', unique=True)
|
||||
def resnet152(x, is_train):
|
||||
return resnet(x, is_train, make_layer_bottleneck, [3, 8, 36, 3])
|
||||
|
||||
class BasicBlock(Module):
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
self.conv1 = nn.Conv(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm(planes)
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
if (norm_layer is None):
|
||||
norm_layer = nn.BatchNorm
|
||||
if ((groups != 1) or (base_width != 64)):
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if (dilation > 1):
|
||||
raise NotImplementedError('Dilation > 1 not supported in BasicBlock')
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = norm_layer(planes)
|
||||
self.relu = nn.Relu()
|
||||
self.conv2 = nn.Conv(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm(planes)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = norm_layer(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
self.planes = planes
|
||||
|
||||
def execute(self, x):
|
||||
residual = x
|
||||
identity = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
if (self.downsample is not None):
|
||||
identity = self.downsample(x)
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
class Bottleneck(Module):
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
self.conv1 = nn.Conv(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm(planes)
|
||||
self.conv2 = nn.Conv(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm(planes)
|
||||
self.conv3 = nn.Conv(planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm(planes * self.expansion)
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
if (norm_layer is None):
|
||||
norm_layer = nn.BatchNorm
|
||||
width = (int((planes * (base_width / 64.0))) * groups)
|
||||
self.conv1 = conv1x1(inplanes, width)
|
||||
self.bn1 = norm_layer(width)
|
||||
self.conv2 = conv3x3(width, width, stride, groups, dilation)
|
||||
self.bn2 = norm_layer(width)
|
||||
self.conv3 = conv1x1(width, (planes * self.expansion))
|
||||
self.bn3 = norm_layer((planes * self.expansion))
|
||||
self.relu = nn.Relu()
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
|
||||
def execute(self, x):
|
||||
residual = x
|
||||
|
||||
identity = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
if (self.downsample is not None):
|
||||
identity = self.downsample(x)
|
||||
out += identity
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
class ResNet(Module):
|
||||
def __init__(self, block, layers, num_classes=1000):
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None):
|
||||
super(ResNet, self).__init__()
|
||||
if (norm_layer is None):
|
||||
norm_layer = nn.BatchNorm
|
||||
self._norm_layer = norm_layer
|
||||
self.inplanes = 64
|
||||
self.conv1 = nn.Conv(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm(64)
|
||||
self.dilation = 1
|
||||
if (replace_stride_with_dilation is None):
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if (len(replace_stride_with_dilation) != 3):
|
||||
raise ValueError('replace_stride_with_dilation should be None or a 3-element tuple, got {}'.format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
self.conv1 = nn.Conv(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = norm_layer(self.inplanes)
|
||||
self.relu = nn.Relu()
|
||||
self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1)
|
||||
self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1, op='maximum')
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
self.avgpool = nn.Pool(7, stride=1, op="mean")
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
self.fc = nn.Linear((512 * block.expansion), num_classes)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
norm_layer = self._norm_layer
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm(planes * block.expansion),
|
||||
)
|
||||
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if ((stride != 1) or (self.inplanes != (planes * block.expansion))):
|
||||
downsample = nn.Sequential(conv1x1(self.inplanes, (planes * block.expansion), stride), norm_layer((planes * block.expansion)))
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer))
|
||||
self.inplanes = (planes * block.expansion)
|
||||
for _ in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer))
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def execute(self, x):
|
||||
|
||||
def _forward_impl(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
|
@ -209,29 +137,56 @@ class ResNet(Module):
|
|||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = jt.reshape(x, [x.shape[0],-1])
|
||||
x = jt.reshape(x, (x.shape[0], (- 1)))
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
def Resnet18():
|
||||
model = ResNet(BasicBlock, [2,2,2,2])
|
||||
def execute(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def _resnet(block, layers, **kwargs):
|
||||
model = ResNet(block, layers, **kwargs)
|
||||
return model
|
||||
|
||||
def Resnet34():
|
||||
model = ResNet(BasicBlock, [3,4,6,3])
|
||||
return model
|
||||
def Resnet18(**kwargs):
|
||||
return _resnet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
resnet18 = Resnet18
|
||||
|
||||
def Resnet50():
|
||||
model = ResNet(Bottleneck, [3,4,6,3])
|
||||
return model
|
||||
def Resnet34(**kwargs):
|
||||
return _resnet( BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
resnet34 = Resnet34
|
||||
|
||||
def Resnet101():
|
||||
model = ResNet(Bottleneck, [3,4,23,3])
|
||||
return model
|
||||
def Resnet50(**kwargs):
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
resnet50 = Resnet50
|
||||
|
||||
def Resnet152():
|
||||
model = ResNet(Bottleneck, [3,8,36,3])
|
||||
return model
|
||||
def Resnet101(**kwargs):
|
||||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
resnet101 = Resnet101
|
||||
|
||||
def Resnet152(**kwargs):
|
||||
return _resnet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
resnet152 = Resnet152
|
||||
|
||||
def Resnext50_32x4d(**kwargs):
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 4
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
resnext50_32x4d = Resnext50_32x4d
|
||||
|
||||
def Resnext101_32x8d(**kwargs):
|
||||
kwargs['groups'] = 32
|
||||
kwargs['width_per_group'] = 8
|
||||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
resnext101_32x8d = Resnext101_32x8d
|
||||
|
||||
def Wide_resnet50_2(**kwargs):
|
||||
kwargs['width_per_group'] = (64 * 2)
|
||||
return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
wide_resnet50_2 = Wide_resnet50_2
|
||||
|
||||
def Wide_resnet101_2(**kwargs):
|
||||
kwargs['width_per_group'] = (64 * 2)
|
||||
return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
wide_resnet101_2 = Wide_resnet101_2
|
|
@ -0,0 +1,106 @@
|
|||
|
||||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
|
||||
__all__ = ['ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0']
|
||||
|
||||
def channel_shuffle(x, groups):
|
||||
(batchsize, num_channels, height, width) = x.data.shape
|
||||
channels_per_group = (num_channels // groups)
|
||||
x = jt.reshape(x, [batchsize, groups, channels_per_group, height, width])
|
||||
x = jt.transpose(x, (0,2,1,3,4))
|
||||
x = jt.reshape(x, [batchsize, (- 1), height, width])
|
||||
return x
|
||||
|
||||
class InvertedResidual(nn.Module):
|
||||
|
||||
def __init__(self, inp, oup, stride):
|
||||
super(InvertedResidual, self).__init__()
|
||||
if (not (1 <= stride <= 3)):
|
||||
raise ValueError('illegal stride value')
|
||||
self.stride = stride
|
||||
branch_features = (oup // 2)
|
||||
assert ((self.stride != 1) or (inp == (branch_features << 1)))
|
||||
if (self.stride > 1):
|
||||
self.branch1 = nn.Sequential(self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm(inp), nn.Conv(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu())
|
||||
else:
|
||||
self.branch1 = nn.Sequential()
|
||||
self.branch2 = nn.Sequential(nn.Conv((inp if (self.stride > 1) else branch_features), branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu(), self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm(branch_features), nn.Conv(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu())
|
||||
|
||||
@staticmethod
|
||||
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
|
||||
return nn.Conv(i, o, kernel_size, stride, padding, bias=bias, groups=i)
|
||||
|
||||
def execute(self, x):
|
||||
if (self.stride == 1):
|
||||
x1 = x[:,0:x.shape[1]//2]
|
||||
x2 = x[:,x.shape[1]//2:x.shape[1]]
|
||||
out = jt.contrib.concat([x1, self.branch2(x2)], dim=1)
|
||||
else:
|
||||
out = jt.contrib.concat([self.branch1(x), self.branch2(x)], dim=1)
|
||||
out = channel_shuffle(out, 2)
|
||||
return out
|
||||
|
||||
class ShuffleNetV2(nn.Module):
|
||||
|
||||
def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual):
|
||||
super(ShuffleNetV2, self).__init__()
|
||||
if (len(stages_repeats) != 3):
|
||||
raise ValueError('expected stages_repeats as list of 3 positive ints')
|
||||
if (len(stages_out_channels) != 5):
|
||||
raise ValueError('expected stages_out_channels as list of 5 positive ints')
|
||||
self._stage_out_channels = stages_out_channels
|
||||
input_channels = 3
|
||||
output_channels = self._stage_out_channels[0]
|
||||
self.conv1 = nn.Sequential(nn.Conv(input_channels, output_channels, 3, 2, 1, bias=False), nn.BatchNorm(output_channels), nn.Relu())
|
||||
input_channels = output_channels
|
||||
self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1, op='maximum')
|
||||
stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
|
||||
for (name, repeats, output_channels) in zip(stage_names, stages_repeats, self._stage_out_channels[1:]):
|
||||
seq = [inverted_residual(input_channels, output_channels, 2)]
|
||||
for i in range((repeats - 1)):
|
||||
seq.append(inverted_residual(output_channels, output_channels, 1))
|
||||
setattr(self, name, nn.Sequential(*seq))
|
||||
input_channels = output_channels
|
||||
output_channels = self._stage_out_channels[(- 1)]
|
||||
self.conv5 = nn.Sequential(nn.Conv(input_channels, output_channels, 1, 1, 0, bias=False), nn.BatchNorm(output_channels), nn.Relu())
|
||||
self.fc = nn.Linear(output_channels, num_classes)
|
||||
|
||||
def _forward_impl(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.stage2(x)
|
||||
x = self.stage3(x)
|
||||
x = self.stage4(x)
|
||||
x = self.conv5(x)
|
||||
x = x.mean([2, 3])
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
def execute(self, x):
|
||||
return self._forward_impl(x)
|
||||
|
||||
def _shufflenetv2(arch, *args):
|
||||
model = ShuffleNetV2(*args)
|
||||
return model
|
||||
|
||||
def shufflenet_v2_x0_5():
|
||||
return _shufflenetv2('shufflenetv2_x0.5', [4, 8, 4], [24, 48, 96, 192, 1024])
|
||||
|
||||
def shufflenet_v2_x1_0():
|
||||
return _shufflenetv2('shufflenetv2_x1.0', [4, 8, 4], [24, 116, 232, 464, 1024])
|
||||
|
||||
def shufflenet_v2_x1_5():
|
||||
return _shufflenetv2('shufflenetv2_x1.5', [4, 8, 4], [24, 176, 352, 704, 1024])
|
||||
|
||||
def shufflenet_v2_x2_0():
|
||||
return _shufflenetv2('shufflenetv2_x2.0', [4, 8, 4], [24, 244, 488, 976, 2048])
|
|
@ -0,0 +1,90 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']
|
||||
|
||||
class Fire(nn.Module):
|
||||
|
||||
def __init__(self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes):
|
||||
super(Fire, self).__init__()
|
||||
self.inplanes = inplanes
|
||||
self.squeeze = nn.Conv(inplanes, squeeze_planes, kernel_size=1)
|
||||
self.squeeze_activation = nn.Relu()
|
||||
self.expand1x1 = nn.Conv(squeeze_planes, expand1x1_planes, kernel_size=1)
|
||||
self.expand1x1_activation = nn.Relu()
|
||||
self.expand3x3 = nn.Conv(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)
|
||||
self.expand3x3_activation = nn.Relu()
|
||||
|
||||
def execute(self, x):
|
||||
x = self.squeeze_activation(self.squeeze(x))
|
||||
return jt.contrib.concat([self.expand1x1_activation(self.expand1x1(x)), self.expand3x3_activation(self.expand3x3(x))], dim=1)
|
||||
|
||||
class SqueezeNet(nn.Module):
|
||||
|
||||
def __init__(self, version='1_0', num_classes=1000):
|
||||
super(SqueezeNet, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
if (version == '1_0'):
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv(3, 96, kernel_size=7, stride=2),
|
||||
nn.Relu(),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(96, 16, 64, 64),
|
||||
Fire(128, 16, 64, 64),
|
||||
Fire(128, 32, 128, 128),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(256, 32, 128, 128),
|
||||
Fire(256, 48, 192, 192),
|
||||
Fire(384, 48, 192, 192),
|
||||
Fire(384, 64, 256, 256),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(512, 64, 256, 256)
|
||||
)
|
||||
elif (version == '1_1'):
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv(3, 64, kernel_size=3, stride=2),
|
||||
nn.Relu(),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(64, 16, 64, 64),
|
||||
Fire(128, 16, 64, 64),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(128, 32, 128, 128),
|
||||
Fire(256, 32, 128, 128),
|
||||
nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'),
|
||||
Fire(256, 48, 192, 192),
|
||||
Fire(384, 48, 192, 192),
|
||||
Fire(384, 64, 256, 256),
|
||||
Fire(512, 64, 256, 256)
|
||||
)
|
||||
else:
|
||||
raise ValueError('Unsupported SqueezeNet version {version}:1_0 or 1_1 expected'.format(version=version))
|
||||
final_conv = nn.Conv(512, self.num_classes, kernel_size=1)
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(p=0.5),
|
||||
final_conv,
|
||||
nn.Relu(),
|
||||
nn.AdaptiveAvgPool2d((1, 1))
|
||||
)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.features(x)
|
||||
x = self.classifier(x)
|
||||
return jt.reshape(x, (x.shape[0], (- 1)))
|
||||
|
||||
def _squeezenet(version, **kwargs):
|
||||
model = SqueezeNet(version, **kwargs)
|
||||
return model
|
||||
|
||||
def squeezenet1_0(**kwargs):
|
||||
return _squeezenet('1_0', **kwargs)
|
||||
|
||||
def squeezenet1_1(**kwargs):
|
||||
return _squeezenet('1_1', **kwargs)
|
|
@ -6,21 +6,21 @@
|
|||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
# This model is generated by pytorch converter.
|
||||
import jittor as jt
|
||||
from jittor import nn
|
||||
|
||||
|
||||
__all__ = [
|
||||
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
|
||||
'vgg19_bn', 'vgg19',
|
||||
]
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
|
||||
def __init__(self, features, num_classes=1000, init_weights=True):
|
||||
super(VGG, self).__init__()
|
||||
self.features = features
|
||||
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512 * 7 * 7, 4096),
|
||||
nn.ReLU(),
|
||||
|
@ -33,6 +33,7 @@ class VGG(nn.Module):
|
|||
|
||||
def execute(self, x):
|
||||
x = self.features(x)
|
||||
x = self.avgpool(x)
|
||||
x = jt.reshape(x, [x.shape[0],-1])
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
@ -66,57 +67,33 @@ def _vgg(arch, cfg, batch_norm, **kwargs):
|
|||
return model
|
||||
|
||||
|
||||
def VGG11(**kwargs):
|
||||
r"""VGG 11-layer model (configuration "A") from
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg11(**kwargs):
|
||||
return _vgg('vgg11', 'A', False, **kwargs)
|
||||
|
||||
|
||||
def VGG11_bn(**kwargs):
|
||||
r"""VGG 11-layer model (configuration "A") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg11_bn(**kwargs):
|
||||
return _vgg('vgg11_bn', 'A', True, **kwargs)
|
||||
|
||||
|
||||
def VGG13(**kwargs):
|
||||
r"""VGG 13-layer model (configuration "B")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg13(**kwargs):
|
||||
return _vgg('vgg13', 'B', False, **kwargs)
|
||||
|
||||
|
||||
def VGG13_bn(**kwargs):
|
||||
r"""VGG 13-layer model (configuration "B") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg13_bn(**kwargs):
|
||||
return _vgg('vgg13_bn', 'B', True, **kwargs)
|
||||
|
||||
|
||||
def VGG16(**kwargs):
|
||||
r"""VGG 16-layer model (configuration "D")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg16(**kwargs):
|
||||
return _vgg('vgg16', 'D', False, **kwargs)
|
||||
|
||||
|
||||
def VGG16_bn(**kwargs):
|
||||
r"""VGG 16-layer model (configuration "D") with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg16_bn(**kwargs):
|
||||
return _vgg('vgg16_bn', 'D', True, **kwargs)
|
||||
|
||||
|
||||
def VGG19(**kwargs):
|
||||
r"""VGG 19-layer model (configuration "E")
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg19(**kwargs):
|
||||
return _vgg('vgg19', 'E', False, **kwargs)
|
||||
|
||||
|
||||
def VGG19_bn(**kwargs):
|
||||
r"""VGG 19-layer model (configuration 'E') with batch normalization
|
||||
`"Very Deep Convolutional Networks For Large-Scale Image Recognition" <https://arxiv.org/pdf/1409.1556.pdf>`_
|
||||
"""
|
||||
def vgg19_bn(**kwargs):
|
||||
return _vgg('vgg19_bn', 'E', True, **kwargs)
|
|
@ -1,6 +1,7 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Guoye Yang <498731903@qq.com>
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Meng-Hao Guo <guomenghao1997@gmail.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
|
@ -13,7 +14,7 @@ import jittor as jt
|
|||
from jittor import init, Module
|
||||
import numpy as np
|
||||
import math
|
||||
from jittor.pool import Pool, pool
|
||||
from jittor.pool import Pool, pool, AdaptiveAvgPool2d
|
||||
|
||||
def matmul_transpose(a, b):
|
||||
'''
|
||||
|
@ -41,30 +42,6 @@ jt.Var.__imatmul__ = lambda a,b: a.assign(matmul(a,b))
|
|||
def get_init_var_rand(shape, dtype):
|
||||
return jt.array(np.random.normal(0.0, 1.0, shape).astype(np.float32))
|
||||
|
||||
@jt.var_scope('batch_norm')
|
||||
def batch_norm(x, is_train, eps=1e-5, momentum=0.1):
|
||||
w = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0))
|
||||
b = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0))
|
||||
running_mean = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 0.0))
|
||||
running_var = jt.make_var([x.shape[1]], init=lambda *a: init.constant(*a, 1.0))
|
||||
|
||||
w = w.broadcast(x, [0,2,3])
|
||||
b = b.broadcast(x, [0,2,3])
|
||||
if is_train:
|
||||
xmean = jt.mean(x, dims=[0,2,3], keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1)
|
||||
xvar = x2mean-xmean*xmean
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+eps)
|
||||
|
||||
running_mean += (xmean.sum([0,2,3])-running_mean)*momentum
|
||||
running_var += (xvar.sum([0,2,3])-running_var)*momentum
|
||||
else:
|
||||
running_mean = running_mean.broadcast(x, [0,2,3])
|
||||
running_var = running_var.broadcast(x, [0,2,3])
|
||||
norm_x = (x-running_mean)/jt.sqrt(running_var+eps)
|
||||
|
||||
return norm_x * w + b
|
||||
|
||||
@jt.var_scope('conv')
|
||||
def conv(x, in_planes, out_planes, kernel_size, padding, stride = 1, init_method=None):
|
||||
Kw = kernel_size
|
||||
|
@ -99,6 +76,7 @@ def linear(x, n):
|
|||
|
||||
def relu(x): return jt.maximum(x, 0)
|
||||
def leaky_relu(x, scale): return jt.ternary(x>0, x, x*scale)
|
||||
def relu6(x): return jt.minimum(jt.maximum(x, 0), 6)
|
||||
|
||||
#TODO dims is 4 will cause slowly execution
|
||||
def cross_entropy_loss(output, target, ignore_index=None):
|
||||
|
@ -113,7 +91,7 @@ def cross_entropy_loss(output, target, ignore_index=None):
|
|||
target = target.reshape((-1, ))
|
||||
target = target.broadcast(output, [1])
|
||||
target = target.index(1) == target
|
||||
|
||||
|
||||
output = output - output.max([1], keepdims=True)
|
||||
loss = output.exp().sum(1).log()
|
||||
loss = loss - (output*target).sum(1)
|
||||
|
@ -127,17 +105,22 @@ class SGD(object):
|
|||
optimizer = nn.SGD(model.parameters(), lr)
|
||||
optimizer.step(loss)
|
||||
"""
|
||||
def __init__(self, parameters, lr, momentum=0, weight_decay=0, dampening=0, nesterov=False):
|
||||
def __init__(self, parameters, lr, momentum=0, weight_decay=0, dampening=0, nesterov=False, param_sync_iter=10000):
|
||||
self.lr = lr
|
||||
self.momentum = momentum
|
||||
self.weight_decay = weight_decay
|
||||
self.dampening = dampening
|
||||
self.nesterov = nesterov
|
||||
self.sgd_step = 0
|
||||
self.param_sync_iter = param_sync_iter
|
||||
|
||||
self.no_grad_parameters = []
|
||||
self.parameters = []
|
||||
self.values = []
|
||||
for p in parameters:
|
||||
# broadcast parameter from 0 node when init
|
||||
if jt.mpi:
|
||||
p.assign(p.mpi_broadcast().detach())
|
||||
if p.is_stop_grad():
|
||||
self.no_grad_parameters.append(p)
|
||||
continue
|
||||
|
@ -145,8 +128,15 @@ class SGD(object):
|
|||
self.values.append(jt.zeros(p.shape, p.dtype).stop_fuse().stop_grad())
|
||||
|
||||
def step(self, loss):
|
||||
self.sgd_step += 1
|
||||
ps = self.parameters
|
||||
gs = jt.grad(loss, ps)
|
||||
if jt.mpi:
|
||||
for g in gs:
|
||||
g.assign(g.mpi_all_reduce("mean"))
|
||||
if self.sgd_step%self.param_sync_iter==0:
|
||||
for p in ps:
|
||||
p.assign(p.mpi_all_reduce("mean"))
|
||||
for p, g, v in zip(ps, gs, self.values):
|
||||
dp = p * self.weight_decay + g
|
||||
v.assign(self.momentum * v + dp * (1 - self.dampening))
|
||||
|
@ -166,19 +156,22 @@ class Adam(object):
|
|||
optimizer = nn.Adam(model.parameters(), lr)
|
||||
optimizer.step(loss)
|
||||
"""
|
||||
def __init__(self, parameters, lr, eps=1e-8, betas=(0.9, 0.999), weight_decay=0):
|
||||
def __init__(self, parameters, lr, eps=1e-8, betas=(0.9, 0.999), weight_decay=0, param_sync_iter=10000):
|
||||
self.lr = lr
|
||||
self.eps = eps
|
||||
self.betas = betas
|
||||
# self.weight_decay = weight_decay
|
||||
assert weight_decay==0, "weight_decay is not supported yet"
|
||||
self.adam_step = 0
|
||||
self.param_sync_iter = param_sync_iter
|
||||
|
||||
self.no_grad_parameters = []
|
||||
self.parameters = []
|
||||
self.values = []
|
||||
self.m = []
|
||||
for p in parameters:
|
||||
if jt.mpi:
|
||||
p.assign(p.mpi_broadcast().detach())
|
||||
if p.is_stop_grad():
|
||||
self.no_grad_parameters.append(p)
|
||||
continue
|
||||
|
@ -187,9 +180,15 @@ class Adam(object):
|
|||
self.m.append(jt.zeros(p.shape, p.dtype).stop_fuse().stop_grad())
|
||||
|
||||
def step(self, loss):
|
||||
self.adam_step += 1
|
||||
ps = self.parameters
|
||||
gs = jt.grad(loss, ps)
|
||||
self.adam_step += 1
|
||||
if jt.mpi:
|
||||
for g in gs:
|
||||
g.assign(g.mpi_all_reduce("mean"))
|
||||
if self.adam_step%self.param_sync_iter==0:
|
||||
for p in ps:
|
||||
p.assign(p.mpi_all_reduce("mean"))
|
||||
n, (b0, b1) = float(self.adam_step), self.betas
|
||||
for p, g, v, m in zip(ps, gs, self.values, self.m):
|
||||
m.assign(b0 * m + (1-b0) * g)
|
||||
|
@ -219,10 +218,11 @@ class Dropout(Module):
|
|||
if self.p > 0 and self.is_train:
|
||||
if self.p == 1:
|
||||
noise = jt.zeros(input.shape)
|
||||
output = output * noise
|
||||
else:
|
||||
noise = jt.random(input.shape)
|
||||
noise = (noise > self.p).int()
|
||||
output = output * noise
|
||||
output = output * noise / (1.0 - self.p) # div keep prob
|
||||
return output
|
||||
|
||||
class Linear(Module):
|
||||
|
@ -240,9 +240,10 @@ class Linear(Module):
|
|||
return x
|
||||
|
||||
class BatchNorm(Module):
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True):
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True, sync=True):
|
||||
assert affine == None
|
||||
|
||||
self.sync = sync
|
||||
self.num_features = num_features
|
||||
self.is_train = is_train
|
||||
self.eps = eps
|
||||
|
@ -256,6 +257,10 @@ class BatchNorm(Module):
|
|||
if self.is_train:
|
||||
xmean = jt.mean(x, dims=[0,2,3], keepdims=1)
|
||||
x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1)
|
||||
if self.sync and jt.mpi:
|
||||
xmean = xmean.mpi_all_reduce("mean")
|
||||
x2mean = x2mean.mpi_all_reduce("mean")
|
||||
|
||||
xvar = x2mean-xmean*xmean
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
self.running_mean += (xmean.sum([0,2,3])-self.running_mean)*self.momentum
|
||||
|
@ -270,7 +275,9 @@ class BatchNorm(Module):
|
|||
|
||||
Relu = jt.make_module(relu)
|
||||
ReLU = Relu
|
||||
Leaky_relu = jt.make_module(leaky_relu, 2)
|
||||
Leaky_relu = jt.make_module(leaky_relu, 0.01)
|
||||
LeakyReLU = Leaky_relu
|
||||
ReLU6 = jt.make_module(relu6)
|
||||
Softmax = jt.make_module(softmax, 2)
|
||||
|
||||
class Conv(Module):
|
||||
|
@ -281,6 +288,9 @@ class Conv(Module):
|
|||
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
|
||||
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
|
||||
self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
|
||||
self.groups = groups
|
||||
assert in_channels % groups == 0, 'in_channels must be divisible by groups'
|
||||
assert out_channels % groups == 0, 'out_channels must be divisible by groups'
|
||||
Kh, Kw = self.kernel_size
|
||||
self.groups = groups
|
||||
assert in_channels % groups == 0, 'in_channels must be divisible by groups'
|
||||
|
@ -403,7 +413,7 @@ class Tanh(Module):
|
|||
def __init__(self):
|
||||
super().__init__()
|
||||
def execute(self, x) :
|
||||
return ((jt.exp (x) - jt.exp(-x)) / (jt.exp(x) + jt.exp (-x)))
|
||||
return x.tanh()
|
||||
|
||||
class Sigmoid(Module):
|
||||
def __init__(self):
|
||||
|
|
|
@ -48,12 +48,12 @@ class Pool(Module):
|
|||
int s2 = blockDim.y * gridDim.x;
|
||||
int i1 = blockIdx.y;
|
||||
int i0 = blockIdx.z;
|
||||
for (int i3 = p3; i3 < outshape3; i3 += s3)
|
||||
for (int i2 = p2; i2 < outshape2; i2 += s2) {{
|
||||
for (int i3 = p3; i3 < out_shape3; i3 += s3)
|
||||
for (int i2 = p2; i2 < out_shape2; i2 += s2) {{
|
||||
int k3 = i3*{self.stride}-{self.padding};
|
||||
int k2 = i2*{self.stride}-{self.padding};
|
||||
int k3_ = min(k3 + {self.kernel_size}, in0shape3);
|
||||
int k2_ = min(k2 + {self.kernel_size}, in0shape2);
|
||||
int k3_ = min(k3 + {self.kernel_size}, in0_shape3);
|
||||
int k2_ = min(k2 + {self.kernel_size}, in0_shape2);
|
||||
k3 = max(0, k3);
|
||||
k2 = max(0, k2);
|
||||
@out(i0, i1, i2, i3) = @in0(i0, i1, k2, k3);
|
||||
|
@ -62,11 +62,11 @@ class Pool(Module):
|
|||
@out(i0, i1, i2, i3) = {op}(@out(i0, i1, i2, i3), @in0(i0, i1, p, q));
|
||||
}}
|
||||
}}
|
||||
int tx = min(1024, outshape3);
|
||||
int ty = min(1024 / tx, outshape2);
|
||||
int bx = (outshape2 - 1) / ty + 1;
|
||||
int by = outshape1;
|
||||
int bz = outshape0;
|
||||
int tx = min(1024, out_shape3);
|
||||
int ty = min(1024 / tx, out_shape2);
|
||||
int bx = (out_shape2 - 1) / ty + 1;
|
||||
int by = out_shape1;
|
||||
int bz = out_shape0;
|
||||
dim3 s1(bx, by, bz);
|
||||
dim3 s2(tx, ty);
|
||||
kernel1<<<s1, s2>>>(@ARGS);
|
||||
|
@ -80,12 +80,12 @@ class Pool(Module):
|
|||
int s2 = blockDim.y * gridDim.x;
|
||||
int i1 = blockIdx.y;
|
||||
int i0 = blockIdx.z;
|
||||
for (int i3 = p3; i3 < poutshape3; i3 += s3)
|
||||
for (int i2 = p2; i2 < poutshape2; i2 += s2) {{
|
||||
for (int i3 = p3; i3 < pout_shape3; i3 += s3)
|
||||
for (int i2 = p2; i2 < pout_shape2; i2 += s2) {{
|
||||
int k3 = i3*{self.stride}-{self.padding};
|
||||
int k2 = i2*{self.stride}-{self.padding};
|
||||
int k3_ = min(k3 + {self.kernel_size}, in0shape3);
|
||||
int k2_ = min(k2 + {self.kernel_size}, in0shape2);
|
||||
int k3_ = min(k3 + {self.kernel_size}, in0_shape3);
|
||||
int k2_ = min(k2 + {self.kernel_size}, in0_shape2);
|
||||
k3 = max(0, k3);
|
||||
k2 = max(0, k2);
|
||||
int bo=1;
|
||||
|
@ -98,25 +98,25 @@ class Pool(Module):
|
|||
}}
|
||||
}}
|
||||
}}
|
||||
cudaMemsetAsync(outp, 0, out->size);
|
||||
int tx = min(1024, poutshape3);
|
||||
int ty = min(1024 / tx, poutshape2);
|
||||
int bx = (poutshape2 - 1) / ty + 1;
|
||||
int by = poutshape1;
|
||||
int bz = poutshape0;
|
||||
cudaMemsetAsync(out_p, 0, out->size);
|
||||
int tx = min(1024, pout_shape3);
|
||||
int ty = min(1024 / tx, pout_shape2);
|
||||
int bx = (pout_shape2 - 1) / ty + 1;
|
||||
int by = pout_shape1;
|
||||
int bz = pout_shape0;
|
||||
dim3 s1_(bx, by, bz);
|
||||
dim3 s2_(tx, ty);
|
||||
kernel3<<<s1_, s2_>>>(@ARGS);
|
||||
'''],
|
||||
cpu_src=f'''
|
||||
for (int i0=0; i0<outshape0; i0++)
|
||||
for (int i1=0; i1<outshape1; i1++)
|
||||
for (int i2=0; i2<outshape2; i2++)
|
||||
for (int i3=0; i3<outshape3; i3++) {{
|
||||
for (int i0=0; i0<out_shape0; i0++)
|
||||
for (int i1=0; i1<out_shape1; i1++)
|
||||
for (int i2=0; i2<out_shape2; i2++)
|
||||
for (int i3=0; i3<out_shape3; i3++) {{
|
||||
int k2 = i2*{self.stride}-{self.padding};
|
||||
int k3 = i3*{self.stride}-{self.padding};
|
||||
int k2_ = std::min(k2 + {self.kernel_size}, in0shape2);
|
||||
int k3_ = std::min(k3 + {self.kernel_size}, in0shape3);
|
||||
int k2_ = std::min(k2 + {self.kernel_size}, in0_shape2);
|
||||
int k3_ = std::min(k3 + {self.kernel_size}, in0_shape3);
|
||||
k2 = std::max(0, k2);
|
||||
k3 = std::max(0, k3);
|
||||
@out(i0, i1, i2, i3) = @in0(i0, i1, k2, k3);
|
||||
|
@ -126,19 +126,19 @@ class Pool(Module):
|
|||
}}
|
||||
''',
|
||||
cpu_grad_src = [f'''
|
||||
for (int i=0; i<outshape0; i++)
|
||||
for (int j=0; j<outshape1; j++)
|
||||
for (int k=0; k<outshape2; k++)
|
||||
for (int l=0; l<outshape3; l++) @out(i,j,k,l) = 0;
|
||||
for (int i=0; i<out_shape0; i++)
|
||||
for (int j=0; j<out_shape1; j++)
|
||||
for (int k=0; k<out_shape2; k++)
|
||||
for (int l=0; l<out_shape3; l++) @out(i,j,k,l) = 0;
|
||||
|
||||
for (int i0=0; i0<poutshape0; i0++)
|
||||
for (int i1=0; i1<poutshape1; i1++)
|
||||
for (int i2=0; i2<poutshape2; i2++)
|
||||
for (int i3=0; i3<poutshape3; i3++) {{
|
||||
for (int i0=0; i0<pout_shape0; i0++)
|
||||
for (int i1=0; i1<pout_shape1; i1++)
|
||||
for (int i2=0; i2<pout_shape2; i2++)
|
||||
for (int i3=0; i3<pout_shape3; i3++) {{
|
||||
int k3 = i3*{self.stride}-{self.padding};
|
||||
int k2 = i2*{self.stride}-{self.padding};
|
||||
int k3_ = std::min(k3 + {self.kernel_size}, in0shape3);
|
||||
int k2_ = std::min(k2 + {self.kernel_size}, in0shape2);
|
||||
int k3_ = std::min(k3 + {self.kernel_size}, in0_shape3);
|
||||
int k2_ = std::min(k2 + {self.kernel_size}, in0_shape2);
|
||||
k3 = std::max(0, k3);
|
||||
k2 = std::max(0, k2);
|
||||
int bo=1;
|
||||
|
@ -161,5 +161,34 @@ class Pool(Module):
|
|||
])
|
||||
return xx.reduce(self.op, [4,5])
|
||||
|
||||
def pool(x, size, op, padding, stride = 1):
|
||||
return Pool(size, stride, padding, op=op)(x)
|
||||
|
||||
class AdaptiveAvgPool2d(Module):
|
||||
def __init__(self, output_size):
|
||||
self.output_size = output_size
|
||||
|
||||
def execute(self, x):
|
||||
if isinstance(self.output_size, int):
|
||||
oh = self.output_size
|
||||
ow = self.output_size
|
||||
elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list):
|
||||
oh = x.shape[2] if self.output_size[0] is None else self.output_size[0]
|
||||
ow = x.shape[3] if self.output_size[1] is None else self.output_size[1]
|
||||
else:
|
||||
raise TypeError(f"AdaptiveAvgPool2d only support int, typle or list input. Not support {type(self.output_size)} yet.")
|
||||
N,C,H,W = x.shape
|
||||
self.sh = math.floor(H / oh)
|
||||
self.sw = math.floor(W / ow)
|
||||
self.ksh = H - (oh - 1) * self.sh
|
||||
self.ksw = W - (ow - 1) * self.sw
|
||||
h = (H-self.ksh)//self.sh+1
|
||||
w = (W-self.ksw)//self.sw+1
|
||||
xx = x.reindex([N,C,h,w,self.ksh,self.ksw], [
|
||||
"i0", # Nid
|
||||
"i1", # Cid
|
||||
f"i2*{self.sh}+i4", # Hid
|
||||
f"i3*{self.sw}+i5", # Wid
|
||||
])
|
||||
return xx.reduce("mean", [4,5])
|
||||
|
||||
def pool(x, kernel_size, op, padding=0, stride = 1):
|
||||
return Pool(kernel_size, stride, padding, op=op)(x)
|
|
@ -239,6 +239,46 @@ reg = re.compile(
|
|||
# attrs args $5
|
||||
, re.DOTALL)
|
||||
|
||||
def generate_error_code_from_func_header(func_head, target_scope_name, name, dfs, basename, h, class_info):
|
||||
# func_head is a string like:
|
||||
# (PyObject* self, PyObject** args, int64 n, PyObject* kw) -> PyObject*
|
||||
lib_name = os.path.basename(h).split("_")[0]
|
||||
# TODO: fix/add var help
|
||||
if target_scope_name == "Var": target_scope_name = None
|
||||
if target_scope_name:
|
||||
if target_scope_name == "flags":
|
||||
help_name = "flags"
|
||||
else:
|
||||
help_name = ""+target_scope_name+'.'+name
|
||||
else:
|
||||
help_name = name
|
||||
if lib_name in ["mpi", "nccl", "cudnn", "curand", "cublas", "mkl"]:
|
||||
help_name = lib_name+'.'+help_name
|
||||
help_cmd = f"help(jt.{help_name})"
|
||||
|
||||
LOG.vvv("gen err from func_head", func_head)
|
||||
args = func_head[1:].split(")")[0].split(",")
|
||||
error_code = f" << \"Wrong inputs arguments, Please refer to examples(e.g. {help_cmd}).\""
|
||||
error_code += r' << "\n\nTypes of your inputs are:\n"'
|
||||
for arg in args:
|
||||
arg = arg.strip()
|
||||
if arg.startswith("PyObject* "):
|
||||
t, n = arg.split(' ')
|
||||
if n == "args" or n == "_args":
|
||||
error_code += f" << PyTupleArgPrinter{{{n}, \"args\"}} "
|
||||
elif n == "kw":
|
||||
error_code += f" << PyKwArgPrinter{{{n}}} "
|
||||
else:
|
||||
error_code += f" << PyArgPrinter{{{n}, \"{n}\"}} "
|
||||
elif arg.startswith("PyObject** "):
|
||||
t, n = arg.split(' ')
|
||||
error_code += f" << PyFastCallArgPrinter{{{n}, n, kw}} "
|
||||
break
|
||||
else:
|
||||
LOG.vvv("Unhandled arg", arg)
|
||||
LOG.vvv("gen err from func_head", func_head, " -> ", error_code)
|
||||
return error_code
|
||||
|
||||
def compile_src(src, h, basename):
|
||||
res = list(reg.finditer(src, re.S))
|
||||
if len(res)==0: return
|
||||
|
@ -586,7 +626,7 @@ def compile_src(src, h, basename):
|
|||
|
||||
arr_func_return = []
|
||||
doc_all = ""
|
||||
decs = "Declarations:\n"
|
||||
decs = "The function declarations are:\n"
|
||||
for did, has_return in enumerate(arr_has_return):
|
||||
df = dfs[did]
|
||||
func_call = arr_func_call[did]
|
||||
|
@ -595,7 +635,7 @@ def compile_src(src, h, basename):
|
|||
doc_all += df["doc"]
|
||||
doc_all += "\nDeclaration:\n"
|
||||
doc_all += df["dec"]
|
||||
decs += df["dec"]+'\n'
|
||||
decs += " " + df["dec"]+'\n'
|
||||
if has_return:
|
||||
assert "-> int" not in func_head
|
||||
if "-> PyObject*" in func_head:
|
||||
|
@ -618,6 +658,8 @@ def compile_src(src, h, basename):
|
|||
assert "-> void" in func_head
|
||||
arr_func_return.append(f"{func_call};return")
|
||||
func_return_failed = "return"
|
||||
# generate error msg when not a valid call
|
||||
error_log_code = generate_error_code_from_func_header(func_head, target_scope_name, name, dfs, basename ,h, class_info)
|
||||
func = f"""
|
||||
{func_cast}[]{func_head} {{
|
||||
try {{
|
||||
|
@ -633,11 +675,15 @@ def compile_src(src, h, basename):
|
|||
'''
|
||||
for did in range(len(arr_func_return))
|
||||
])}
|
||||
LOGf << "Not a valid call";
|
||||
LOGf << "Not a valid call.";
|
||||
}} catch (const std::exception& e) {{
|
||||
PyErr_Format(PyExc_RuntimeError, "%s\\n%s",
|
||||
e.what(),
|
||||
R""({decs})""
|
||||
std::stringstream ss;
|
||||
ss {error_log_code};
|
||||
PyErr_Format(PyExc_RuntimeError,
|
||||
"%s\\n%s\\nFailed reason:%s",
|
||||
ss.str().c_str(),
|
||||
R""({decs})"",
|
||||
e.what()
|
||||
);
|
||||
}}
|
||||
{func_return_failed};
|
||||
|
@ -711,6 +757,7 @@ def compile_src(src, h, basename):
|
|||
has_seq = class_name == "NanoVector"
|
||||
code = f"""
|
||||
#include "pyjt/py_converter.h"
|
||||
#include "pyjt/py_arg_printer.h"
|
||||
#include "common.h"
|
||||
#include "{include_name}"
|
||||
|
||||
|
|
|
@ -5,23 +5,33 @@
|
|||
# ***************************************************************
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
import unittest, os
|
||||
|
||||
suffix = "__main__.py"
|
||||
assert __file__.endswith(suffix)
|
||||
test_dir = __file__[:-len(suffix)]
|
||||
import os
|
||||
|
||||
skip_l = int(os.environ.get("test_skip_l", "0"))
|
||||
skip_r = int(os.environ.get("test_skip_r", "1000000"))
|
||||
test_only = None
|
||||
if "test_only" in os.environ:
|
||||
test_only = set(os.environ.get("test_only").split(","))
|
||||
|
||||
test_files = os.listdir(test_dir)
|
||||
for test_file in test_files:
|
||||
test_files = sorted(test_files)
|
||||
suite = unittest.TestSuite()
|
||||
|
||||
for _, test_file in enumerate(test_files):
|
||||
if not test_file.startswith("test_"):
|
||||
continue
|
||||
if _ < skip_l or _ > skip_r:
|
||||
continue
|
||||
test_name = test_file.split(".")[0]
|
||||
exec(f"from . import {test_name}")
|
||||
test_mod = globals()[test_name]
|
||||
print(test_name)
|
||||
for i in dir(test_mod):
|
||||
obj = getattr(test_mod, i)
|
||||
if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
|
||||
globals()[test_name+"_"+i] = obj
|
||||
if test_only and test_name not in test_only:
|
||||
continue
|
||||
|
||||
unittest.main()
|
||||
print("Add Test", _, test_name)
|
||||
suite.addTest(unittest.defaultTestLoader.loadTestsFromName(
|
||||
"jittor.test."+test_name))
|
||||
|
||||
unittest.TextTestRunner(verbosity=3).run(suite)
|
|
@ -18,6 +18,7 @@ import pickle as pk
|
|||
skip_this_test = False
|
||||
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
from torch.nn import MaxPool2d, Sequential
|
||||
except:
|
||||
|
@ -45,6 +46,7 @@ def check(jt_model, torch_model, shape, near_data):
|
|||
|
||||
@unittest.skipIf(skip_this_test, "No Torch found")
|
||||
class TestArgPoolOp(unittest.TestCase):
|
||||
@unittest.skipIf(not jt.compiler.has_cuda, "No cuda found")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_cuda(self):
|
||||
jt_model = jt.nn.Sequential(Pool(2, 2, 0), Pool(2, 2, 0), Pool(2, 2, 0, ceil_mode=True), Pool(2, 2, 0), Pool(2, 2, 0), Pool(3, 1, 1))
|
||||
|
@ -57,15 +59,18 @@ class TestArgPoolOp(unittest.TestCase):
|
|||
check(jt_model, torch_model, [1,1,300,300], True)
|
||||
|
||||
def test_cpu_(self):
|
||||
x = jt.random([32, 128, 157, 300])
|
||||
# x = jt.random([32, 128, 157, 300])
|
||||
x = jt.random([4, 128, 157, 300])
|
||||
x = jt.nn.pool(x, 2, "maximum", 0, 2)
|
||||
|
||||
def test_cpu(self):
|
||||
jt_model = jt.nn.Sequential(Pool(2, 2, 0), Pool(2, 2, 0), Pool(2, 2, 0, ceil_mode=True), Pool(2, 2, 0), Pool(2, 2, 0), Pool(3, 1, 1))
|
||||
torch_model = Sequential(MaxPool2d(2, 2, 0), MaxPool2d(2, 2, 0), MaxPool2d(2, 2, 0, ceil_mode=True), MaxPool2d(2, 2, 0), MaxPool2d(2, 2, 0), MaxPool2d(3, 1, 1))
|
||||
shape = [64, 64, 300, 300]
|
||||
# shape = [64, 64, 300, 300]
|
||||
shape = [4, 64, 300, 300]
|
||||
check(jt_model, torch_model, shape, False)
|
||||
shape = [32, 128, 157, 300]
|
||||
# shape = [32, 128, 157, 300]
|
||||
shape = [4, 128, 157, 300]
|
||||
check(jt_model, torch_model, shape, False)
|
||||
for i in range(10):
|
||||
check(jt_model, torch_model, [1,1,300,300], True)
|
||||
|
|
|
@ -106,7 +106,7 @@ class TestArray(unittest.TestCase):
|
|||
with jt.flag_scope(use_cuda=1):
|
||||
a = jt.array(np.float32([1,2,3]))
|
||||
b = jt.code(a.shape, a.dtype, [a], cpu_src="""
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @in0(i)*@in0(i)*2;
|
||||
""")
|
||||
assert (b.data==[2,8,18]).all()
|
||||
|
|
|
@ -11,6 +11,7 @@ import numpy as np
|
|||
|
||||
class TestClone(unittest.TestCase):
|
||||
def test(self):
|
||||
jt.clean()
|
||||
b = a = jt.array(1)
|
||||
for i in range(10):
|
||||
b = b.clone()
|
||||
|
|
|
@ -12,11 +12,11 @@ class TestCodeOp(unittest.TestCase):
|
|||
a = jt.random([10])
|
||||
b = jt.code(a.shape, a.dtype, [a],
|
||||
cpu_src='''
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @in0(i)*@in0(i)*2;
|
||||
''',
|
||||
cpu_grad_src = ['''
|
||||
for (int i=0; i<in0shape0; i++) {
|
||||
for (int i=0; i<in0_shape0; i++) {
|
||||
@out(i) = @dout(i)*@in0(i)*4;
|
||||
}
|
||||
'''])
|
||||
|
@ -32,15 +32,15 @@ class TestCodeOp(unittest.TestCase):
|
|||
b = jt.random([10])
|
||||
c = jt.code(a.shape, a.dtype, [a,b],
|
||||
cpu_src='''
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @in0(i)*@in1(i);
|
||||
''',
|
||||
cpu_grad_src = ['''
|
||||
for (int i=0; i<in0shape0; i++) {
|
||||
for (int i=0; i<in0_shape0; i++) {
|
||||
@out(i) = @dout(i)*@in1(i);
|
||||
}
|
||||
''', '''
|
||||
for (int i=0; i<in0shape0; i++) {
|
||||
for (int i=0; i<in0_shape0; i++) {
|
||||
@out(i) = @dout(i)*@in0(i);
|
||||
}
|
||||
'''])
|
||||
|
@ -52,11 +52,102 @@ class TestCodeOp(unittest.TestCase):
|
|||
def test_header(self):
|
||||
a = jt.array([3,2,1])
|
||||
b = jt.code(a.shape, a.dtype, [a],
|
||||
cpu_header='#include <algorithm>',
|
||||
cpu_header="""
|
||||
#include <algorithm>
|
||||
@alias(a, in0)
|
||||
@alias(b, out)
|
||||
""",
|
||||
cpu_src="""
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
@out(i) = @in0(i);
|
||||
std::sort(&@out(0), &@out(in0shape0));
|
||||
for (int i=0; i<a_shape0; i++)
|
||||
@b(i) = @a(i);
|
||||
std::sort(&@b(0), &@b(in0_shape0));
|
||||
"""
|
||||
)
|
||||
assert (b.data==[1,2,3]).all()
|
||||
|
||||
def test_multi_output(self):
|
||||
a = jt.array([3,2,1])
|
||||
b,c = jt.code([[2],[4]], ["float32", "float64"], [a],
|
||||
cpu_src="""
|
||||
@alias(a, in0)
|
||||
@alias(b, out0)
|
||||
@alias(c, out1)
|
||||
for (int i=0; i<a_shape0; i++) {
|
||||
if (i<b_shape0) @b(i) = @a(i);
|
||||
if (i<c_shape0) @c(i) = @a(i);
|
||||
}
|
||||
"""
|
||||
)
|
||||
assert b.shape == [2]
|
||||
assert c.shape == [4]
|
||||
assert b.dtype == "float32"
|
||||
assert c.dtype == "float64"
|
||||
assert (b.data == [3,2]).all()
|
||||
assert (c.data[:3] == [3,2,1]).all()
|
||||
|
||||
def test_multi_output2(self):
|
||||
a = jt.array([3,2,1])
|
||||
b,c = jt.code([(1,), (1,)], [a.dtype, a.dtype], [a],
|
||||
cpu_header="""
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
""",
|
||||
cpu_src="""
|
||||
@alias(a, in0)
|
||||
@alias(b, out0)
|
||||
@alias(c, out1)
|
||||
@b(0) = @c(0) = @a(0);
|
||||
for (int i=0; i<a_shape0; i++) {
|
||||
@b(0) = std::min(@b(0), @a(i));
|
||||
@c(0) = std::max(@c(0), @a(i));
|
||||
}
|
||||
cout << "min:" << @b(0) << " max:" << @c(0) << endl;
|
||||
"""
|
||||
)
|
||||
assert b.data == 1, b
|
||||
assert c.data == 3, c
|
||||
|
||||
def test_vary_shape(self):
|
||||
a = jt.array([5,-4,3,-2,1])
|
||||
|
||||
# negtive shape for max size of vary dimension
|
||||
b,c = jt.code([(-5,), (-5,)], [a.dtype, a.dtype], [a],
|
||||
cpu_src="""
|
||||
@alias(a, in0)
|
||||
@alias(b, out0)
|
||||
@alias(c, out1)
|
||||
int num_b=0, num_c=0;
|
||||
for (int i=0; i<a_shape0; i++) {
|
||||
if (@a(i)>0)
|
||||
@b(num_b++) = @a(i);
|
||||
else
|
||||
@c(num_c++) = @a(i);
|
||||
}
|
||||
b->set_shape({num_b});
|
||||
c->set_shape({num_c});
|
||||
"""
|
||||
)
|
||||
assert (b.data == [5,3,1]).all()
|
||||
assert (c.data == [-4,-2]).all()
|
||||
|
||||
def test_comment(self):
|
||||
a = jt.array([3,2,1])
|
||||
b = jt.code(a.shape, a.dtype, [a],
|
||||
cpu_header='''
|
||||
#include <algorithm>
|
||||
// asd
|
||||
/* asd
|
||||
*/
|
||||
''',
|
||||
cpu_src="""
|
||||
// test comment
|
||||
/*
|
||||
multi line
|
||||
*/
|
||||
@alias(a, in0)
|
||||
for (int i=0; i<a_shape0; i++)
|
||||
@out(i) = @a(i);
|
||||
std::sort(&@out(0), &@out(a_shape0));
|
||||
"""
|
||||
)
|
||||
assert (b.data==[1,2,3]).all()
|
||||
|
@ -72,29 +163,29 @@ class TestCodeOp(unittest.TestCase):
|
|||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @in0(i)*@in1(i);
|
||||
}
|
||||
kernel1<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel1<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
''',
|
||||
cuda_grad_src = ['''
|
||||
__global__ static void kernel2(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @dout(i)*@in1(i);
|
||||
}
|
||||
kernel2<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel2<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
''', '''
|
||||
__global__ static void kernel3(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @dout(i)*@in0(i);
|
||||
}
|
||||
kernel3<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel3<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
'''])
|
||||
da, db = jt.grad(c, [a, b])
|
||||
assert np.allclose(c.data, a.data*b.data), (c.data, a.data*b.data)
|
||||
|
@ -110,8 +201,8 @@ class TestCodeOp(unittest.TestCase):
|
|||
cuda_src='''
|
||||
__global__ static void kernel1(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @in0(i,j)*@in1(i,j);
|
||||
}
|
||||
kernel1<<<32, 32>>>(@ARGS);
|
||||
|
@ -119,8 +210,8 @@ class TestCodeOp(unittest.TestCase):
|
|||
cuda_grad_src = ['''
|
||||
__global__ static void kernel(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @dout(i,j)*@in1(i,j);
|
||||
}
|
||||
kernel<<<32, 32>>>(@ARGS);
|
||||
|
@ -128,8 +219,8 @@ class TestCodeOp(unittest.TestCase):
|
|||
__global__ static void kernel(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
@pout(0,0);
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @dout(i,j)*@in0(i,j);
|
||||
}
|
||||
kernel<<<32, 32>>>(@ARGS);
|
||||
|
|
|
@ -133,8 +133,8 @@ def check_backward(xshape, wshape, stride, padding, dilation, use_cuda, nhwc):
|
|||
jt.sync([cy, closs, cdx, cdw])
|
||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: " + op_name + ".*)")
|
||||
assert len(logs)==3 and "oihw" in logs[0][0], (logs)
|
||||
assert np.allclose(y.data, cy.data)
|
||||
assert np.allclose(dw.data, cdw.data), (dw.data, cdw.data)
|
||||
assert np.allclose(y.data, cy.data, 1e-3)
|
||||
assert np.allclose(dw.data, cdw.data, 1e-3), (dw.data, cdw.data)
|
||||
assert np.allclose(dx.data, cdx.data, 1e-3), (dx.data, cdx.data, np.abs(cdx.data).max(), np.abs(dx.data - cdx.data).max())
|
||||
|
||||
class TestConvTuner(unittest.TestCase):
|
||||
|
|
|
@ -18,11 +18,12 @@ def test_cuda(use_cuda=1):
|
|||
|
||||
@unittest.skipIf(not jt.compiler.has_cuda, "No CUDA found")
|
||||
class TestCuda(unittest.TestCase):
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_cuda_flags(self):
|
||||
with jt.var_scope(use_cuda=1):
|
||||
a = jt.random((10, 10))
|
||||
a.sync()
|
||||
a = jt.random((10, 10))
|
||||
a.sync()
|
||||
|
||||
@jt.flag_scope(use_cuda=2)
|
||||
def test_no_cuda_op(self):
|
||||
no_cuda_op = jt.compile_custom_op("""
|
||||
struct NoCudaOp : Op {
|
||||
|
@ -49,10 +50,10 @@ class TestCuda(unittest.TestCase):
|
|||
""",
|
||||
"no_cuda")
|
||||
# force use cuda
|
||||
with jt.var_scope(use_cuda=2):
|
||||
a = no_cuda_op([3,4,5], 'float')
|
||||
expect_error(lambda: a())
|
||||
a = no_cuda_op([3,4,5], 'float')
|
||||
expect_error(lambda: a())
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_cuda_custom_op(self):
|
||||
my_op = jt.compile_custom_op("""
|
||||
struct MyCudaOp : Op {
|
||||
|
@ -94,9 +95,8 @@ class TestCuda(unittest.TestCase):
|
|||
#endif // JIT
|
||||
""",
|
||||
"my_cuda")
|
||||
with jt.var_scope(use_cuda=1):
|
||||
a = my_op([3,4,5], 'float')
|
||||
na = a.data
|
||||
a = my_op([3,4,5], 'float')
|
||||
na = a.data
|
||||
assert a.shape == [3,4,5] and a.dtype == 'float'
|
||||
assert (-na.flatten() == range(3*4*5)).all(), na
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ class TestCudnnConvOp(unittest.TestCase):
|
|||
assert len(logs)==3 and "oihw" in logs[0][0], logs
|
||||
assert np.allclose(y.data, cy.data)
|
||||
assert np.allclose(dx.data, cdx.data, 1e-2)
|
||||
assert np.allclose(dw.data, cdw.data)
|
||||
assert np.allclose(dw.data, cdw.data, 1e-2)
|
||||
check([10,3,100,100], [5,3,3,3], stride=2, padding=0, dilation=1)
|
||||
check([10,4,40,50], [5,4,5,5], stride=1, padding=1, dilation=1)
|
||||
check([10,4,40,50], [5,4,4,4], stride=3, padding=1, dilation=1)
|
||||
|
|
|
@ -22,7 +22,6 @@ class TestCutt(unittest.TestCase):
|
|||
@jt.flag_scope(use_cuda=1)
|
||||
def test(self):
|
||||
t = cutt_ops.cutt_test("213")
|
||||
jt.sync_all(True)
|
||||
print(t.data)
|
||||
assert t.data == 123
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,27 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import os, sys
|
||||
import jittor as jt
|
||||
from pathlib import Path
|
||||
|
||||
class TestLock(unittest.TestCase):
|
||||
def test(self):
|
||||
if os.environ.get('lock_full_test', '0') == '1':
|
||||
cache_path = os.path.join(str(Path.home()), ".cache", "jittor", "lock")
|
||||
assert os.system(f"rm -rf {cache_path}") == 0
|
||||
cmd = f"cache_name=lock {sys.executable} -m jittor.test.test_example"
|
||||
else:
|
||||
cmd = f"{sys.executable} -m jittor.test.test_example"
|
||||
print("run cmd twice", cmd)
|
||||
assert os.system(f"{cmd} & {cmd} & wait %1 && wait %2") == 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -11,6 +11,7 @@ import numpy as np
|
|||
class TestMiscIssue(unittest.TestCase):
|
||||
def test_issue4(self):
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
except:
|
||||
return
|
||||
|
@ -42,6 +43,7 @@ b.sync()
|
|||
|
||||
def test_mkl_conflict1(self):
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
except:
|
||||
return
|
||||
|
@ -67,6 +69,7 @@ m(torch.rand(*nchw))
|
|||
|
||||
def test_mkl_conflict2(self):
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
except:
|
||||
return
|
||||
|
@ -126,5 +129,18 @@ jt.mkl_ops.mkl_conv(x, w, 1, 2).sync()
|
|||
assert a.min().data == a.data.min(), (a.min(), a.data.min())
|
||||
assert a.max().data == a.data.max(), (a.max(), a.data.max())
|
||||
|
||||
@unittest.skipIf(not jt.compiler.has_cuda, "No CUDA found")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_cuda_pow_grad_nan(self):
|
||||
a = jt.float32([1,-1, -1000.1])
|
||||
da = jt.grad(a**2, a)
|
||||
assert np.isnan(da.data).sum()==0, da.data
|
||||
|
||||
def test_tanh_nan(self):
|
||||
m=jt.nn.Tanh()
|
||||
a = m(jt.array([1000]))
|
||||
assert np.isnan(a.data).sum()==0, a
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,101 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
import jittor.models as jtmodels
|
||||
|
||||
skip_this_test = False
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
import torchvision.models as tcmodels
|
||||
from torch import nn
|
||||
except:
|
||||
torch = None
|
||||
skip_this_test = True
|
||||
|
||||
@unittest.skipIf(skip_this_test, "skip_this_test")
|
||||
class test_models(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
self.models = [
|
||||
'inception_v3',
|
||||
'squeezenet1_0',
|
||||
'squeezenet1_1',
|
||||
'alexnet',
|
||||
'resnet18',
|
||||
'resnet34',
|
||||
'resnet50',
|
||||
'resnet101',
|
||||
'resnet152',
|
||||
'resnext50_32x4d',
|
||||
'resnext101_32x8d',
|
||||
'vgg11',
|
||||
'vgg11_bn',
|
||||
'vgg13',
|
||||
'vgg13_bn',
|
||||
'vgg16',
|
||||
'vgg16_bn',
|
||||
'vgg19',
|
||||
'vgg19_bn',
|
||||
'wide_resnet50_2',
|
||||
'wide_resnet101_2',
|
||||
'googlenet',
|
||||
'mobilenet_v2',
|
||||
'mnasnet0_5',
|
||||
'mnasnet0_75',
|
||||
'mnasnet1_0',
|
||||
'mnasnet1_3',
|
||||
'shufflenet_v2_x0_5',
|
||||
'shufflenet_v2_x1_0',
|
||||
'shufflenet_v2_x1_5',
|
||||
'shufflenet_v2_x2_0',
|
||||
]
|
||||
|
||||
@unittest.skipIf(not jt.has_cuda, "Cuda not found")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_models(self):
|
||||
def to_cuda(x):
|
||||
if jt.has_cuda:
|
||||
return x.cuda()
|
||||
return x
|
||||
threshold = 1e-2
|
||||
# Define numpy input image
|
||||
bs = 1
|
||||
test_img = np.random.random((bs,3,224,224)).astype('float32')
|
||||
# Define pytorch & jittor input image
|
||||
pytorch_test_img = to_cuda(torch.Tensor(test_img))
|
||||
jittor_test_img = jt.array(test_img)
|
||||
for test_model in self.models:
|
||||
if test_model == "inception_v3":
|
||||
test_img = np.random.random((bs,3,300,300)).astype('float32')
|
||||
pytorch_test_img = to_cuda(torch.Tensor(test_img))
|
||||
jittor_test_img = jt.array(test_img)
|
||||
# Define pytorch & jittor model
|
||||
pytorch_model = to_cuda(tcmodels.__dict__[test_model]())
|
||||
jittor_model = jtmodels.__dict__[test_model]()
|
||||
# Set eval to avoid dropout layer
|
||||
pytorch_model.eval()
|
||||
jittor_model.eval()
|
||||
# Jittor loads pytorch parameters to ensure forward alignment
|
||||
jittor_model.load_parameters(pytorch_model.state_dict())
|
||||
# Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes.
|
||||
pytorch_result = pytorch_model(pytorch_test_img)
|
||||
jittor_result = jittor_model(jittor_test_img)
|
||||
x = pytorch_result.detach().cpu().numpy() + 1
|
||||
y = jittor_result.data + 1
|
||||
relative_error = abs(x - y) / abs(y)
|
||||
diff = relative_error.mean()
|
||||
assert diff < threshold, f"[*] {test_model} forward fails..., Relative Error: {diff}"
|
||||
print(f"[*] {test_model} forword passes with Relative Error {diff}")
|
||||
print('all models pass test.')
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,77 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import os, sys
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
mpi = jt.compile_extern.mpi
|
||||
|
||||
@unittest.skipIf(mpi is None, "no inside mpirun")
|
||||
class TestMpi(unittest.TestCase):
|
||||
def test_mpi_test_op(self):
|
||||
assert jt.compile_extern.mpi_ops.mpi_test("").data == 123
|
||||
|
||||
@unittest.skipIf(jt.compile_extern.nccl_ops is None, "no nccl")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_nccl_with_mpi(self):
|
||||
assert jt.compile_extern.nccl_ops.nccl_test("test_with_mpi").data == 123
|
||||
|
||||
def test_mpi_broadcast(self):
|
||||
for i in range(mpi.world_size()):
|
||||
a = np.zeros(100) + mpi.world_rank()
|
||||
mpi.broadcast(a, i)
|
||||
assert (a == i).all()
|
||||
|
||||
def test_mpi_dataset(self):
|
||||
from jittor.dataset.dataset import Dataset
|
||||
class ToyDataset(Dataset):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.set_attrs(batch_size=21, total_len=211)
|
||||
|
||||
def __getitem__(self, index):
|
||||
return index, index*index
|
||||
|
||||
toy = ToyDataset()
|
||||
offset = ((toy.batch_size-1) // mpi.world_size() + 1) * mpi.world_rank()
|
||||
|
||||
for _ in range(2):
|
||||
for i,(a,b) in enumerate(toy):
|
||||
assert (a.data*a.data == b.data).all()
|
||||
if mpi.world_rank() == 0:
|
||||
if i == len(toy)-1:
|
||||
assert a.shape[0] == 1
|
||||
c = np.array([210])
|
||||
else:
|
||||
assert toy.real_batch_size == 11
|
||||
c = np.array(range(offset+i*toy.batch_size, offset+i*toy.batch_size + toy.real_batch_size))
|
||||
else:
|
||||
if i == len(toy)-1:
|
||||
assert a.shape[0] == 1
|
||||
c = np.array([210])
|
||||
else:
|
||||
assert toy.real_batch_size == 10
|
||||
c = np.array(range(offset+i*toy.batch_size, offset+i*toy.batch_size + toy.real_batch_size))
|
||||
|
||||
assert (c==a.data).all(), (c, a.data)
|
||||
|
||||
def run_mpi_test(num_procs, name):
|
||||
if not jt.compile_extern.inside_mpi():
|
||||
mpirun_path = jt.compile_extern.mpicc_path.replace("mpicc", "mpirun")
|
||||
cmd = f"{mpirun_path} -np {num_procs} {sys.executable} -m jittor.test.{name} -v"
|
||||
print("run cmd:", cmd)
|
||||
assert os.system(cmd)==0, "run cmd failed: "+cmd
|
||||
|
||||
@unittest.skipIf(not jt.compile_extern.has_mpi, "no mpi found")
|
||||
class TestMpiEntry(unittest.TestCase):
|
||||
def test_entry(self):
|
||||
run_mpi_test(2, "test_mpi")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,104 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import os, sys
|
||||
import jittor as jt
|
||||
from jittor import init
|
||||
from jittor import nn
|
||||
import numpy as np
|
||||
from jittor.test.test_mpi import run_mpi_test
|
||||
|
||||
mpi = jt.compile_extern.mpi
|
||||
|
||||
class FakeMpiBatchNorm(nn.Module):
|
||||
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True):
|
||||
assert affine == None
|
||||
|
||||
self.num_features = num_features
|
||||
self.is_train = is_train
|
||||
self.eps = eps
|
||||
self.momentum = momentum
|
||||
self.weight = init.constant((num_features,), "float32", 1.0)
|
||||
self.bias = init.constant((num_features,), "float32", 0.0)
|
||||
self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
|
||||
self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
|
||||
|
||||
def execute(self, x, global_x):
|
||||
if self.is_train:
|
||||
xmean = jt.mean(global_x, dims=[0,2,3], keepdims=1)
|
||||
x2mean = jt.mean(global_x*global_x, dims=[0,2,3], keepdims=1)
|
||||
|
||||
xvar = x2mean-xmean*xmean
|
||||
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
|
||||
self.running_mean += (xmean.sum([0,2,3])-self.running_mean)*self.momentum
|
||||
self.running_var += (xvar.sum([0,2,3])-self.running_var)*self.momentum
|
||||
else:
|
||||
running_mean = self.running_mean.broadcast(x, [0,2,3])
|
||||
running_var = self.running_var.broadcast(x, [0,2,3])
|
||||
norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
|
||||
w = self.weight.broadcast(x, [0,2,3])
|
||||
b = self.bias.broadcast(x, [0,2,3])
|
||||
return norm_x * w + b
|
||||
|
||||
@unittest.skipIf(mpi is None, "no inside mpirun")
|
||||
class TestMpiBatchnorm(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
np.random.seed(0)
|
||||
jt.seed(3)
|
||||
|
||||
def test_batchnorm(self):
|
||||
mpi = jt.compile_extern.mpi
|
||||
data = np.random.rand(30,3,10,10).astype("float32")
|
||||
x1 = jt.array(data)
|
||||
x2 = jt.array(data[mpi.world_rank()*10:(mpi.world_rank()+1)*10,...])
|
||||
|
||||
bn1 = nn.BatchNorm(3, sync=False)
|
||||
bn2 = nn.BatchNorm(3, sync=True)
|
||||
bn3 = FakeMpiBatchNorm(3)
|
||||
y1 = bn1(x1).data
|
||||
y2 = bn2(x2).data
|
||||
y3 = bn3(x2,x1).data
|
||||
|
||||
assert np.allclose(y2, y3, atol=1e-4), (y2, y3)
|
||||
assert np.allclose(bn1.running_mean.data, bn2.running_mean.data), \
|
||||
(bn1.running_mean.data, bn2.running_mean.data)
|
||||
assert np.allclose(bn1.running_var.data, bn2.running_var.data)
|
||||
|
||||
def test_batchnorm_backward(self):
|
||||
mpi = jt.compile_extern.mpi
|
||||
data = np.random.rand(30,3,10,10).astype("float32")
|
||||
global_x = jt.array(data)
|
||||
x = jt.array(data[mpi.world_rank()*10:(mpi.world_rank()+1)*10,...])
|
||||
|
||||
bn1 = nn.BatchNorm(3, sync=True)
|
||||
bn2 = FakeMpiBatchNorm(3)
|
||||
y1 = bn1(x)
|
||||
y2 = bn2(x,global_x)
|
||||
gs1 = jt.grad(y1,bn1.parameters())
|
||||
gs2 = jt.grad(y2,bn2.parameters())
|
||||
|
||||
assert np.allclose(y1.data, y2.data, atol=1e-5),(mpi.world_rank(),y1.data, y2.data, y1.data-y2.data)
|
||||
for i in range(len(gs1)):
|
||||
assert np.allclose(gs1[i].data, gs2[i].data, rtol=1e-3),(mpi.world_rank(),gs1[i].data, gs2[i].data,gs1[i].data-gs2[i].data)
|
||||
|
||||
@unittest.skipIf(not jt.has_cuda, "no cuda")
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_batchnorm_cuda(self):
|
||||
self.test_batchnorm()
|
||||
self.test_batchnorm_backward()
|
||||
|
||||
|
||||
@unittest.skipIf(not jt.compile_extern.has_mpi, "no mpi found")
|
||||
class TestMpiBatchnormEntry(unittest.TestCase):
|
||||
def test(self):
|
||||
run_mpi_test(3, "test_mpi_batchnorm")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,70 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import os, sys
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
from jittor.test.test_mpi import run_mpi_test
|
||||
|
||||
mpi = jt.compile_extern.mpi
|
||||
|
||||
@unittest.skipIf(mpi is None, "no inside mpirun")
|
||||
class TestMpiOps(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
np.random.seed(0)
|
||||
jt.seed(3)
|
||||
|
||||
def test_all_reduce(self):
|
||||
x = jt.random([5, 5])
|
||||
y = x.mpi_all_reduce()
|
||||
assert np.allclose(y.data, (x*3).data)
|
||||
g = jt.grad(y,x)
|
||||
assert np.allclose(g.data, np.ones([5,5])*3)
|
||||
|
||||
def test_all_reduce_mean(self):
|
||||
x = jt.random([5, 5])
|
||||
y = x.mpi_all_reduce("mean")
|
||||
assert np.allclose(y.data, x.data)
|
||||
g = jt.grad(y,x)
|
||||
assert np.allclose(g.data, np.ones([5,5]))
|
||||
|
||||
def test_broadcast(self):
|
||||
data = jt.random([5, 5])
|
||||
if mpi.world_rank() == 0:
|
||||
x = data
|
||||
else:
|
||||
x = jt.zeros([5, 5])
|
||||
y = x.mpi_broadcast(0)
|
||||
assert np.allclose(y.data, data.data)
|
||||
g = jt.grad(y,x)
|
||||
if mpi.world_rank() == 0:
|
||||
assert np.allclose(g.data, np.ones([5,5])*3)
|
||||
else:
|
||||
assert np.allclose(g.data, np.zeros([5,5]))
|
||||
|
||||
def test_reduce(self):
|
||||
x = jt.random([5, 5])
|
||||
y = x.mpi_reduce(root=0)
|
||||
y.sync()
|
||||
if mpi.world_rank() == 0:
|
||||
assert np.allclose(y.data, (x*3).data)
|
||||
else:
|
||||
assert np.allclose(y.data, np.zeros([5,5]))
|
||||
g = jt.grad(y,x)
|
||||
assert np.allclose(g.data, np.ones([5,5]))
|
||||
|
||||
|
||||
@unittest.skipIf(not jt.compile_extern.has_mpi, "no mpi found")
|
||||
class TestMpiOpsEntry(unittest.TestCase):
|
||||
def test(self):
|
||||
run_mpi_test(3, "test_mpi_op")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -23,6 +23,7 @@ class TestNanoString(unittest.TestCase):
|
|||
t = (time.time() - t)/n
|
||||
# t is about 0.01 for 100w loop
|
||||
# 92ns one loop
|
||||
print("nanostring time", t)
|
||||
assert t < [1.5e-7, 1.7e-7][mid], t
|
||||
|
||||
assert (jt.hash("asdasd") == 4152566416)
|
||||
|
@ -34,6 +35,39 @@ class TestNanoString(unittest.TestCase):
|
|||
# int init: 1.2
|
||||
# dtype init(cache): 0.75
|
||||
# final: 1.0
|
||||
|
||||
def test_type(self):
|
||||
import numpy as np
|
||||
assert str(jt.NanoString(float)) == "float"
|
||||
assert str(jt.NanoString(np.float)) == "float"
|
||||
assert str(jt.NanoString(np.float32)) == "float32"
|
||||
assert str(jt.NanoString(np.float64)) == "float64"
|
||||
assert str(jt.NanoString(np.int8)) == "int8"
|
||||
assert str(jt.NanoString(np.array([1,2,3]).dtype)) == "int64"
|
||||
|
||||
assert str(jt.NanoString(jt.float)) == "float"
|
||||
assert str(jt.NanoString(jt.float32)) == "float32"
|
||||
assert str(jt.NanoString(jt.float64)) == "float64"
|
||||
assert str(jt.NanoString(jt.int8)) == "int8"
|
||||
assert str(jt.NanoString(jt.array([1,2,3]).dtype)) == "int64"
|
||||
assert str(jt.NanoString(jt.sum)) == "add"
|
||||
|
||||
def get_error_str(call):
|
||||
es = ""
|
||||
try:
|
||||
call()
|
||||
except Exception as e:
|
||||
es = str(e)
|
||||
return es
|
||||
|
||||
e = get_error_str(lambda: jt.code([1,], {}, [1], cpu_header=""))
|
||||
assert "help(jt.ops.code)" in e
|
||||
assert "cpu_header=str" in e
|
||||
e = get_error_str(lambda: jt.NanoString([1,2,3], fuck=1))
|
||||
assert "fuck=int" in str(e)
|
||||
assert "(list, )" in str(e)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,18 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import jittor as jt
|
||||
import unittest
|
||||
|
||||
@unittest.skipIf(jt.compile_extern.nccl_ops is None, "no nccl found")
|
||||
class TestNccl(unittest.TestCase):
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_nccl(self):
|
||||
assert jt.compile_extern.nccl_ops.nccl_test("").data == 123
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -0,0 +1,143 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guoye Yang <498731903@qq.com>
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import os, sys
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
from jittor import nn
|
||||
from jittor import nn, Module
|
||||
import copy
|
||||
from jittor.test.test_log import find_log_with_re
|
||||
from jittor.test.test_mpi import run_mpi_test
|
||||
from jittor.compile_extern import mpi, nccl_ops
|
||||
n = 2
|
||||
|
||||
@unittest.skipIf(nccl_ops is None, "nccl not found")
|
||||
class TestNcclOps(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
np.random.seed(0)
|
||||
jt.seed(3)
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_all_reduce(self):
|
||||
with jt.log_capture_scope(enable_tuner=1, log_silent=1,
|
||||
log_v=1, log_vprefix="op.cc=100,exe=1000"
|
||||
) as raw_log:
|
||||
x = jt.random([5, 5])
|
||||
y = x.mpi_all_reduce()
|
||||
assert np.allclose(y.data, (x*n).data)
|
||||
g = jt.grad(y,x)
|
||||
assert np.allclose(g.data, np.ones([5,5])*n)
|
||||
|
||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: nccl_all_reduce.*)")
|
||||
assert len(logs)==2, len(logs)
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_broadcast(self):
|
||||
with jt.log_capture_scope(enable_tuner=1, log_silent=1,
|
||||
log_v=1, log_vprefix="op.cc=100,exe=1000"
|
||||
) as raw_log:
|
||||
data = jt.random([5, 5])
|
||||
if mpi.world_rank() == 0:
|
||||
x = data
|
||||
else:
|
||||
x = jt.zeros([5, 5])
|
||||
y = x.mpi_broadcast(0)
|
||||
assert np.allclose(y.data, data.data)
|
||||
g = jt.grad(y.sum(),x)
|
||||
g_ = g.data
|
||||
if mpi.world_rank() == 0:
|
||||
assert np.allclose(g_, np.ones([5,5])*n)
|
||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: nccl_broadcast.*)")
|
||||
assert len(logs)==1, len(logs)
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_reduce(self):
|
||||
with jt.log_capture_scope(enable_tuner=1, log_silent=1,
|
||||
log_v=1, log_vprefix="op.cc=100,exe=1000"
|
||||
) as raw_log:
|
||||
x = jt.random([5, 5])
|
||||
y = x.mpi_reduce(root=0)
|
||||
y_ = y.data
|
||||
x_ = (x*n).data
|
||||
if mpi.world_rank() == 0:
|
||||
assert np.allclose(y_, x_)
|
||||
g = jt.grad(y,x)
|
||||
assert np.allclose(g.data, np.ones([5,5]))
|
||||
logs = find_log_with_re(raw_log, "(Jit op key (not )?found: nccl_reduce.*)")
|
||||
assert len(logs)==1, len(logs)
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_sync(self):
|
||||
|
||||
class Model(Module):
|
||||
def __init__(self):
|
||||
self.linear1 = nn.Linear(3, 3)
|
||||
self.linear2 = nn.Linear(3, 1024, False)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.linear1(x)
|
||||
x = nn.relu(x)
|
||||
return self.linear2(x)
|
||||
|
||||
net = Model()
|
||||
if mpi.world_rank() == 0:
|
||||
net.linear1.weight *= 0
|
||||
net.linear2.weight *= 0
|
||||
net.linear1.bias *= 0
|
||||
net.linear1.weight += 1
|
||||
net.linear2.weight += 1
|
||||
net.linear1.bias += 1
|
||||
net.mpi_param_broadcast()
|
||||
assert np.allclose(net.linear1.weight.data, jt.ones(net.linear1.weight.shape).data)
|
||||
assert np.allclose(net.linear2.weight.data, jt.ones(net.linear2.weight.shape).data)
|
||||
assert np.allclose(net.linear1.bias.data, jt.ones(net.linear1.bias.shape).data)
|
||||
|
||||
@jt.flag_scope(use_cuda=1)
|
||||
def test_optimizer(self):
|
||||
|
||||
class Model2(Module):
|
||||
def __init__(self, input_size):
|
||||
self.linear1 = nn.Linear(input_size, 10)
|
||||
self.relu1 = nn.Relu()
|
||||
self.linear2 = nn.Linear(10, 1)
|
||||
def execute(self, x):
|
||||
x = self.linear1(x)
|
||||
x = self.relu1(x)
|
||||
return self.linear2(x)
|
||||
|
||||
def get_data(n):
|
||||
for i in range(n):
|
||||
x = np.random.rand(50, 1)
|
||||
y = x*x
|
||||
yield jt.float32(x), jt.float32(y)
|
||||
|
||||
num = 2000
|
||||
model = Model2(1)
|
||||
model.mpi_param_broadcast()
|
||||
optimizer = nn.SGD(model.parameters(), 0.1)
|
||||
dataset = list(enumerate(get_data(num)))
|
||||
for i in range(mpi.world_rank(), num, n):
|
||||
id, (x, y) = dataset[i]
|
||||
pred_y = model(x)
|
||||
loss = (pred_y - y)**2
|
||||
loss_mean = loss.mean()
|
||||
optimizer.step(loss_mean)
|
||||
assert loss_mean.data < 0.0025, loss_mean.data
|
||||
jt.clean()
|
||||
|
||||
@unittest.skipIf(not jt.compile_extern.has_mpi, "no mpi found")
|
||||
class TestNcclOpsEntry(unittest.TestCase):
|
||||
def test(self):
|
||||
run_mpi_test(2, "test_nccl_ops")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -6,13 +6,16 @@
|
|||
import unittest, os
|
||||
import jittor as jt
|
||||
from jittor import LOG
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
dirname = os.path.join(jt.flags.jittor_path, "notebook")
|
||||
notebook_dir = os.path.join(jt.flags.cache_path, "notebook")
|
||||
notebook_dir = os.path.join(str(Path.home()), ".cache","jittor","notebook")
|
||||
tests = []
|
||||
for mdname in os.listdir(dirname):
|
||||
if not mdname.endswith(".src.md"): continue
|
||||
# temporary disable model_test
|
||||
if "LSGAN" in mdname: continue
|
||||
tests.append(mdname[:-3])
|
||||
|
||||
try:
|
||||
|
@ -27,7 +30,9 @@ def test(name):
|
|||
jt.compiler.run_cmd("ipython "+ipynb_name)
|
||||
|
||||
def init():
|
||||
jt.compiler.run_cmd("python3 "+os.path.join(dirname, "md_to_ipynb.py"))
|
||||
cmd = sys.executable+" "+os.path.join(dirname, "md_to_ipynb.py")
|
||||
LOG.i("init notebooks:", cmd)
|
||||
jt.compiler.run_cmd(cmd)
|
||||
|
||||
src = """class TestNodebooks(unittest.TestCase):
|
||||
@classmethod
|
||||
|
|
|
@ -133,5 +133,34 @@ class TestOpCompiler(unittest.TestCase):
|
|||
expect_error(lambda: jit_precompile(vars, "@if(1)"))
|
||||
expect_error(lambda: jit_precompile(vars, "#define OP1(a,b) a+b\n@expand_macro(OP1,1)"))
|
||||
|
||||
def test_strcmp(self):
|
||||
vars = {"Tx":"float"}
|
||||
check = lambda expr, result: \
|
||||
self.assertEqual(jit_precompile(vars, expr), result)
|
||||
check("@strcmp(aaa,aaa)", "0")
|
||||
check("@strcmp(aaa,bbb)", "-1")
|
||||
check("@strcmp(ccc,bbb)", "1")
|
||||
check("@{@strcmp(aaa,aaa)}", "0")
|
||||
check("@{@strcmp(aaa,bbb)}", "-1")
|
||||
check("@{@strcmp(ccc,bbb)}", "1")
|
||||
|
||||
code = \
|
||||
"""@define(T_NCCL,
|
||||
@if(@strcmp(@Tx,float)==0 || @strcmp(@Tx,float32)==0, ncclFloat)
|
||||
@if(@strcmp(@Tx,int)==0 || @strcmp(@Tx,int32)==0, ncclInt)
|
||||
@if(@strcmp(@Tx,float64)==0, ncclFloat64)
|
||||
@if(@strcmp(@Tx,int64)==0, ncclInt64)
|
||||
)
|
||||
ncclBcast(..., @T_NCCL, ...)
|
||||
"""
|
||||
assert "ncclFloat" in jit_precompile({"Tx":"float"}, code)
|
||||
assert "ncclFloat" in jit_precompile({"Tx":"float32"}, code)
|
||||
assert "ncclFloat64" in jit_precompile({"Tx":"float64"}, code)
|
||||
assert "ncclInt" in jit_precompile({"Tx":"int"}, code)
|
||||
assert "ncclInt" in jit_precompile({"Tx":"int32"}, code)
|
||||
assert "ncclInt64" in jit_precompile({"Tx":"int64"}, code)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -5,325 +5,233 @@
|
|||
# ***************************************************************
|
||||
import unittest
|
||||
import jittor as jt
|
||||
import math
|
||||
import numpy as np
|
||||
from jittor.utils.pytorch_converter import convert
|
||||
import os
|
||||
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
from torch import nn
|
||||
from jittor.utils import pytorch_converter
|
||||
except:
|
||||
torch = None
|
||||
|
||||
code="""
|
||||
import torch.nn as nn
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
'''3x3 convolution with padding'''
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
||||
self.relu = nn.ReLU()
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000):
|
||||
self.inplanes = 64
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU()
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
self.avgpool = nn.AvgPool2d(7, stride=1)
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out')
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
def resnet18(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-18 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet34(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-34 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet50(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-50 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-101 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet152(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-152 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
|
||||
return model
|
||||
"""
|
||||
|
||||
@unittest.skipIf(torch is None, "pytorch not found.")
|
||||
class TestPytorchConverter(unittest.TestCase):
|
||||
def test_simple(self):
|
||||
def model(c):
|
||||
a = torch.Tensor([1,2,3,4,0])
|
||||
b = a+a
|
||||
b = b*2
|
||||
b = b[:2]
|
||||
a = a[1<a]
|
||||
return a[0]+b[0]+c[0]
|
||||
|
||||
c = torch.Tensor([1,2,3])
|
||||
r1 = model(c)
|
||||
with pytorch_converter.trace_scope(["model"]):
|
||||
r2 = model(c)
|
||||
assert r1.numpy()==r2.numpy()
|
||||
r3 = model(c)
|
||||
assert r1.numpy() == r2.numpy() and r2.numpy() == r3.numpy(), (r1,r2,r3)
|
||||
ans = """root in:[] out:[]
|
||||
model in:[input_0] out:[out_11]
|
||||
inj_torch_Tensor___init__ in:[array_1] out:[] args:[array_1, [1, 2, 3, 4, 0]]
|
||||
inj_torch_Tensor___add__ in:[array_1, array_1] out:[out_2] args:[array_1, array_1]
|
||||
inj_torch_Tensor___mul__ in:[out_2] out:[out_3] args:[out_2, 2]
|
||||
inj_torch_Tensor___getitem__ in:[out_3] out:[out_4] args:[out_3, slice(None, 2, None)]
|
||||
inj_torch_Tensor___gt__ in:[array_1] out:[out_5] args:[array_1, 1]
|
||||
inj_torch_Tensor___getitem__ in:[array_1, out_5] out:[out_6] args:[array_1, out_5]
|
||||
inj_torch_Tensor___getitem__ in:[out_6] out:[out_7] args:[out_6, 0]
|
||||
inj_torch_Tensor___getitem__ in:[out_4] out:[out_8] args:[out_4, 0]
|
||||
inj_torch_Tensor___add__ in:[out_7, out_8] out:[out_9] args:[out_7, out_8]
|
||||
inj_torch_Tensor___getitem__ in:[input_0] out:[out_10] args:[input_0, 0]
|
||||
inj_torch_Tensor___add__ in:[out_9, out_10] out:[out_11] args:[out_9, out_10]
|
||||
model in:[input_0] out:[out_11] end
|
||||
root in:[] out:[] end"""
|
||||
ct = pytorch_converter.call_tree
|
||||
assert str(ct) == ans
|
||||
code = ct.to_jt()
|
||||
lc = {}
|
||||
exec(code, globals(), lc)
|
||||
print(code)
|
||||
jt_model = lc["model"]
|
||||
assert jt_model(jt.array([1,2,3])).data == r1.numpy()
|
||||
|
||||
def test_resnet(self):
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, dilation=dilation,
|
||||
padding=dilation, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
class ResNet(nn.Module):
|
||||
def __init__(self, block, layers=(3, 4, 23, 3)):
|
||||
self.inplanes = 64
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, math.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = [block(self.inplanes, planes, stride, downsample)]
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, dilation=dilation))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x1 = self.conv1(x)
|
||||
x2 = self.bn1(x1)
|
||||
x2 = self.relu(x2)
|
||||
x2 = self.maxpool(x2)
|
||||
|
||||
x2 = self.layer1(x2)
|
||||
x3 = self.layer2(x2)
|
||||
x3 = self.layer3(x3)
|
||||
x3 = self.layer4(x3)
|
||||
|
||||
return x1, x2, x3
|
||||
|
||||
return
|
||||
resnet50 = ResNet(Bottleneck, [3, 4, 6, 3])
|
||||
x = torch.Tensor(np.random.rand(2, 3, 224, 224))
|
||||
with pytorch_converter.trace_scope():
|
||||
y = resnet50(x)
|
||||
|
||||
ct = pytorch_converter.call_tree
|
||||
code = ct.to_jt()
|
||||
print(code)
|
||||
|
||||
def test_convert_batchnorm(self):
|
||||
class TestModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(TestModel, self).__init__()
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.bn2 = nn.BatchNorm2d(64)
|
||||
self.bn3 = nn.BatchNorm2d(64)
|
||||
def forward(self, x):
|
||||
y = self.bn1(x)
|
||||
z = self.bn2(x*x)
|
||||
x = self.bn3(y+z)
|
||||
return x
|
||||
model = TestModel()
|
||||
|
||||
""" test_code:
|
||||
x = torch.Tensor(np.random.rand(16, 64, 15, 15).astype("float32"))
|
||||
jt_array = jt.array(x.numpy())
|
||||
jt_result = jt.nn.batch_norm(jt_array, is_train=False, eps=1e-05, momentum=0.1)
|
||||
torch_result = nn.BatchNorm2d(64)(x)
|
||||
"""
|
||||
|
||||
x = torch.Tensor(np.random.rand(16, 64, 15, 15).astype("float32"))
|
||||
with pytorch_converter.trace_scope():
|
||||
y = model(x)
|
||||
|
||||
ct = pytorch_converter.call_tree
|
||||
ans = """root in:[] out:[]
|
||||
TestModel.forward in:[input_0] out:[out_27] args:{'self': TestModel(
|
||||
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
(bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
), 'x': input_0}
|
||||
BatchNorm2d.forward in:[input_0] out:[out_7] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': input_0}
|
||||
functional.batch_norm in:[input_0, out_3, out_4, out_5, out_6] out:[out_7]
|
||||
BatchNorm2d.forward in:[input_0] out:[out_7] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': input_0} end
|
||||
inj_torch_Tensor___mul__ in:[input_0, input_0] out:[out_10] args:[input_0, input_0]
|
||||
BatchNorm2d.forward in:[out_10] out:[out_17] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': out_10}
|
||||
functional.batch_norm in:[out_10, out_13, out_14, out_15, out_16] out:[out_17]
|
||||
BatchNorm2d.forward in:[out_10] out:[out_17] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': out_10} end
|
||||
inj_torch_Tensor___add__ in:[out_7, out_17] out:[out_20] args:[out_7, out_17]
|
||||
BatchNorm2d.forward in:[out_20] out:[out_27] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': out_20}
|
||||
functional.batch_norm in:[out_20, out_23, out_24, out_25, out_26] out:[out_27]
|
||||
BatchNorm2d.forward in:[out_20] out:[out_27] args:{'self': BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), 'input': out_20} end
|
||||
TestModel.forward in:[input_0] out:[out_27] args:{'self': TestModel(
|
||||
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
(bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
|
||||
), 'x': input_0} end
|
||||
root in:[] out:[] end"""
|
||||
assert str(ct) == ans
|
||||
code = ct.to_jt()
|
||||
lc = {}
|
||||
exec(code, globals(), lc)
|
||||
print(code)
|
||||
def test_pytorch_converter(self):
|
||||
name1 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter_1.py')
|
||||
print(f"save source code into {name1}")
|
||||
with open(name1, 'w') as f:
|
||||
f.write(code)
|
||||
|
||||
jt_model = lc["TestModel"]
|
||||
assert (jt_model(jt.array(x.numpy())).data - y.detach().numpy()).mean() < 1e-5
|
||||
ret = convert(code)
|
||||
|
||||
def test_convert_relu(self):
|
||||
class TestModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(TestModel, self).__init__()
|
||||
self.rl1 = nn.ReLU(inplace=True)
|
||||
self.rl2 = nn.ReLU(inplace=True)
|
||||
self.rl3 = nn.ReLU(inplace=True)
|
||||
def forward(self, x):
|
||||
y = self.rl1(x)
|
||||
z = self.rl2(x*x)
|
||||
x = self.rl3(y+z)
|
||||
return x
|
||||
model = TestModel()
|
||||
|
||||
""" test_code:
|
||||
x = torch.Tensor(np.random.rand(16, 64, 15, 15).astype("float32"))
|
||||
jt_array = jt.array(x.numpy())
|
||||
jt_result = jt.nn.batch_norm(jt_array, is_train=False, eps=1e-05, momentum=0.1)
|
||||
torch_result = nn.BatchNorm2d(64)(x)
|
||||
"""
|
||||
|
||||
x = torch.Tensor(np.random.rand(16, 3, 15, 15).astype("float32"))
|
||||
with pytorch_converter.trace_scope():
|
||||
y = model(x)
|
||||
|
||||
ct = pytorch_converter.call_tree
|
||||
ans = """root in:[] out:[]
|
||||
TestModel.forward in:[input_0] out:[out_8] args:{'self': TestModel(
|
||||
(rl1): ReLU(inplace=True)
|
||||
(rl2): ReLU(inplace=True)
|
||||
(rl3): ReLU(inplace=True)
|
||||
), 'x': input_0}
|
||||
ReLU.forward in:[input_0] out:[input_0] args:{'self': ReLU(inplace=True), 'input': input_0}
|
||||
functional.relu in:[input_0] out:[input_0]
|
||||
ReLU.forward in:[input_0] out:[input_0] args:{'self': ReLU(inplace=True), 'input': input_0} end
|
||||
inj_torch_Tensor___mul__ in:[input_0, input_0] out:[out_4] args:[input_0, input_0]
|
||||
ReLU.forward in:[out_4] out:[out_4] args:{'self': ReLU(inplace=True), 'input': out_4}
|
||||
functional.relu in:[out_4] out:[out_4]
|
||||
ReLU.forward in:[out_4] out:[out_4] args:{'self': ReLU(inplace=True), 'input': out_4} end
|
||||
inj_torch_Tensor___add__ in:[input_0, out_4] out:[out_8] args:[input_0, out_4]
|
||||
ReLU.forward in:[out_8] out:[out_8] args:{'self': ReLU(inplace=True), 'input': out_8}
|
||||
functional.relu in:[out_8] out:[out_8]
|
||||
ReLU.forward in:[out_8] out:[out_8] args:{'self': ReLU(inplace=True), 'input': out_8} end
|
||||
TestModel.forward in:[input_0] out:[out_8] args:{'self': TestModel(
|
||||
(rl1): ReLU(inplace=True)
|
||||
(rl2): ReLU(inplace=True)
|
||||
(rl3): ReLU(inplace=True)
|
||||
), 'x': input_0} end
|
||||
root in:[] out:[] end"""
|
||||
assert str(ct) == ans
|
||||
code = ct.to_jt()
|
||||
lc = {}
|
||||
exec(code, globals(), lc)
|
||||
print(code)
|
||||
name2 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter_2.py')
|
||||
print(f"save destination code into {name2}")
|
||||
with open(name2, 'w') as f:
|
||||
f.write(ret)
|
||||
|
||||
jt_model = lc["TestModel"]
|
||||
assert (jt_model(jt.array(x.numpy())).data == y.detach().numpy()).all()
|
||||
from test_pytorch_converter_1 import resnet18 as torch_resnet18
|
||||
from test_pytorch_converter_2 import resnet18 as jittor_resnet18
|
||||
model_torch = torch_resnet18(False)
|
||||
model_jittor = jittor_resnet18(False)
|
||||
model_jittor.load_parameters(model_torch.state_dict())
|
||||
|
||||
def test_convert_pool(self):
|
||||
class TestModel(nn.Module):
|
||||
def __init__(self):
|
||||
super(TestModel, self).__init__()
|
||||
self.mp1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
|
||||
self.mp2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
|
||||
self.mp3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
|
||||
def forward(self, x):
|
||||
y = self.mp1(x)
|
||||
z = self.mp2(x*x)
|
||||
x = self.mp3(y+z)
|
||||
return x
|
||||
model = TestModel()
|
||||
|
||||
x = torch.Tensor(np.random.rand(16, 3, 15, 15).astype("float32"))
|
||||
with pytorch_converter.trace_scope():
|
||||
y = model(x)
|
||||
ct = pytorch_converter.call_tree
|
||||
ans = """root in:[] out:[]
|
||||
TestModel.forward in:[input_0] out:[out_11] args:{'self': TestModel(
|
||||
(mp1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
(mp2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
(mp3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
), 'x': input_0}
|
||||
MaxPool2d.forward in:[input_0] out:[out_1] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': input_0}
|
||||
functional._max_pool2d in:[input_0] out:[out_1]
|
||||
MaxPool2d.forward in:[input_0] out:[out_1] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': input_0} end
|
||||
inj_torch_Tensor___mul__ in:[input_0, input_0] out:[out_5] args:[input_0, input_0]
|
||||
MaxPool2d.forward in:[out_5] out:[out_6] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': out_5}
|
||||
functional._max_pool2d in:[out_5] out:[out_6]
|
||||
MaxPool2d.forward in:[out_5] out:[out_6] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': out_5} end
|
||||
inj_torch_Tensor___add__ in:[out_1, out_6] out:[out_10] args:[out_1, out_6]
|
||||
MaxPool2d.forward in:[out_10] out:[out_11] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': out_10}
|
||||
functional._max_pool2d in:[out_10] out:[out_11]
|
||||
MaxPool2d.forward in:[out_10] out:[out_11] args:{'self': MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), 'input': out_10} end
|
||||
TestModel.forward in:[input_0] out:[out_11] args:{'self': TestModel(
|
||||
(mp1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
(mp2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
(mp3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
|
||||
), 'x': input_0} end
|
||||
root in:[] out:[] end"""
|
||||
assert str(ct) == ans
|
||||
code = ct.to_jt()
|
||||
lc = {}
|
||||
exec(code, globals(), lc)
|
||||
print(code)
|
||||
|
||||
jt_model = lc["TestModel"]
|
||||
assert (jt_model(jt.array(x.numpy())).data == y.detach().numpy()).all()
|
||||
img = np.random.randn(1,3,224,224).astype("float32")
|
||||
img_torch = torch.Tensor(img)
|
||||
img_jittor = jt.array(img)
|
||||
|
||||
out_torch = model_torch(img_torch)
|
||||
out_jittor = model_jittor(img_jittor)
|
||||
assert abs((out_torch.cpu().detach().numpy() - out_jittor.data)).mean() < 1e-4
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -1,264 +0,0 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import unittest
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
from jittor.utils.pytorch_converter2 import convert
|
||||
import os
|
||||
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
from torch import nn
|
||||
except:
|
||||
torch = None
|
||||
|
||||
code="""
|
||||
import torch.nn as nn
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']
|
||||
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
'''3x3 convolution with padding'''
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
|
||||
def __init__(self, block, layers, num_classes=1000):
|
||||
self.inplanes = 64
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
self.avgpool = nn.AvgPool2d(7, stride=1)
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
def resnet18(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-18 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet34(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-34 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet50(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-50 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-101 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet152(pretrained=False, **kwargs):
|
||||
'''Constructs a ResNet-152 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
'''
|
||||
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
|
||||
return model
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import random
|
||||
|
||||
# setup random seed
|
||||
def setup_seed(seed):
|
||||
np.random.seed(seed)
|
||||
random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = True
|
||||
"""
|
||||
|
||||
@unittest.skipIf(torch is None, "pytorch not found.")
|
||||
class TestPytorchConverter2(unittest.TestCase):
|
||||
def test_pytorch_converter2(self):
|
||||
name1 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter2_1.py')
|
||||
print(f"save source code into {name1}")
|
||||
with open(name1, 'w') as f:
|
||||
f.write(code)
|
||||
|
||||
ret = convert(code)
|
||||
|
||||
name2 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter2_2.py')
|
||||
print(f"save destination code into {name2}")
|
||||
with open(name2, 'w') as f:
|
||||
f.write(ret)
|
||||
|
||||
from test_pytorch_converter2_1 import resnet18 as torch_resnet18
|
||||
from test_pytorch_converter2_2 import resnet18 as jittor_resnet18
|
||||
model_torch = torch_resnet18(False)
|
||||
model_jittor = jittor_resnet18(False)
|
||||
model_jittor.load_parameters(model_torch.state_dict())
|
||||
|
||||
img = np.random.randn(1,3,224,224).astype("float32")
|
||||
img_torch = torch.Tensor(img)
|
||||
img_jittor = jt.array(img)
|
||||
|
||||
out_torch = model_torch(img_torch)
|
||||
out_jittor = model_jittor(img_jittor)
|
||||
assert abs((out_torch.cpu().detach().numpy() - out_jittor.data)).mean() < 1e-4
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -60,5 +60,16 @@ class TestReshapeOp(unittest.TestCase):
|
|||
assert node_dict['a'] == node_dict['d']
|
||||
assert node_dict['a'] == node_dict['e']
|
||||
|
||||
def test_view(self):
|
||||
a = jt.ones([2,3,4])
|
||||
assert a.view(2,-1).shape == [2,12]
|
||||
|
||||
def test_flatten(self):
|
||||
a = jt.ones([2,3,4])
|
||||
assert a.flatten().shape == [24]
|
||||
assert a.flatten(1).shape == [2,12]
|
||||
assert a.flatten(0,-2).shape == [6,4]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -116,7 +116,7 @@ class TestResnet(unittest.TestCase):
|
|||
assert jt.core.number_of_lived_vars() < 3500
|
||||
|
||||
jt.sync_all(True)
|
||||
assert np.mean(loss_list[-50:])<0.2
|
||||
assert np.mean(loss_list[-50:])<0.3
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -61,5 +61,10 @@ class TestTransposeOp(unittest.TestCase):
|
|||
assert ((da-jda.data)<1e-5).all(), (da, jda.data, da-jda.data)
|
||||
assert ((db-jdb.data)<1e-5).all(), (db-jdb.data)
|
||||
|
||||
def test_permute(self):
|
||||
a = jt.ones([2,3,4])
|
||||
assert a.permute().shape == [4,3,2]
|
||||
assert a.permute(0,2,1).shape == [2,4,3]
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -16,7 +16,9 @@ def check(op, *args):
|
|||
x = convert(x)
|
||||
y = convert(y)
|
||||
# str match nan and inf
|
||||
assert x.dtype == y.dtype and x.shape == y.shape and str(x)==str(y), f"{x}\n{y}"
|
||||
assert x.dtype == y.dtype and x.shape == y.shape
|
||||
for a,b in zip(x.flatten(), y.flatten()):
|
||||
assert str(a)[:5] == str(b)[:5], (a,b)
|
||||
|
||||
class TestUnaryOp(unittest.TestCase):
|
||||
def test_unary_op(self):
|
||||
|
@ -34,11 +36,19 @@ class TestUnaryOp(unittest.TestCase):
|
|||
check("sqrt", a)
|
||||
|
||||
def test_grad(self):
|
||||
ops = ["abs", "negative", "log", "exp", "sqrt"]
|
||||
ops = ["abs", "negative", "log", "exp", "sqrt",
|
||||
"sin", "arcsin", "sinh", "arcsinh",
|
||||
"tan", "arctan", "tanh", "arctanh",
|
||||
"cos", "arccos", "cosh", "arccosh",
|
||||
]
|
||||
a = [1.1, 2.2, 3.3, 4.4]
|
||||
for op in ops:
|
||||
if op == "abs":
|
||||
b = np.array(a+[-1,])
|
||||
elif op == "arccosh":
|
||||
b = np.array(a)
|
||||
elif "sin" in op or "cos" in op or "tan" in op:
|
||||
b = np.array(a) / 5
|
||||
else:
|
||||
b = np.array(a)
|
||||
func = lambda x: eval(f"np.{op}(x[0]).sum()")
|
||||
|
@ -46,7 +56,7 @@ class TestUnaryOp(unittest.TestCase):
|
|||
ja = jt.array(b)
|
||||
jb = eval(f"jt.{op}(ja)")
|
||||
jda = jt.grad(jb, ja)
|
||||
assert (np.abs(jda.data-da)<1e-5).all(), (jda.data,da,op)
|
||||
assert (np.allclose(jda.data, da)), (jda.data,da,op)
|
||||
|
||||
class TestUnaryOpCuda(TestUnaryOp, test_cuda(2)):
|
||||
pass
|
||||
|
|
|
@ -24,14 +24,14 @@ skip_model_test = not model_test
|
|||
|
||||
class MnistNet(Module):
|
||||
def __init__(self):
|
||||
self.model = vgg.VGG16_bn()
|
||||
self.model = vgg.vgg16_bn()
|
||||
self.layer = nn.Linear(1000,10)
|
||||
def execute(self, x):
|
||||
x = self.model(x)
|
||||
x = self.layer(x)
|
||||
return x
|
||||
|
||||
@unittest.skipIf(skip_model_test, "skip_this_test")
|
||||
@unittest.skipIf(skip_model_test, "skip_this_test, model_test != 1")
|
||||
class TestVGGClass(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
# ***************************************************************
|
||||
import sys
|
||||
import os
|
||||
os.environ["log_silent"] = "1"
|
||||
import re
|
||||
import jittor_utils as jit_utils
|
||||
from jittor_utils import LOG
|
||||
|
|
|
@ -1,288 +1,473 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Wenyang Zhou <576825820@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import sys
|
||||
import contextlib
|
||||
import os
|
||||
import signal
|
||||
import jittor as jt
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
import ast, astunparse
|
||||
import numpy as np
|
||||
|
||||
class CallTree:
|
||||
def __init__(self, parent, name):
|
||||
self.parent = parent
|
||||
self.name = name
|
||||
self.children = []
|
||||
self.input = []
|
||||
self.output = []
|
||||
self.args = None
|
||||
if parent is not None:
|
||||
parent.children.append(self)
|
||||
pjmap = {
|
||||
# ***************************************************************
|
||||
# Module
|
||||
# ***************************************************************
|
||||
'Conv2d': {
|
||||
'pytorch': {
|
||||
'args': "in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'"
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'Conv',
|
||||
'args': 'in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'ConvTranspose2d': {
|
||||
'pytorch': {
|
||||
'args': "in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'"
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'ConvTranspose',
|
||||
'args': 'in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True, dilation=1'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'MaxPool2d': {
|
||||
'pytorch': {
|
||||
'args': 'kernel_size, stride=None, padding=0, dilation=1, return_indices=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'Pool',
|
||||
'args': 'kernel_size, stride=None, padding=0, dilation=None, return_indices=None, ceil_mode=False, op="maximum"'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {
|
||||
"op": "'maximum'",
|
||||
},
|
||||
},
|
||||
'AvgPool2d': {
|
||||
'pytorch': {
|
||||
'args': 'kernel_size, stride=None, padding=0, dilation=1, return_indices=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'Pool',
|
||||
'args': 'kernel_size, stride=None, padding=0, dilation=None, return_indices=None, ceil_mode=False, op="maximum"'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {
|
||||
"op": "'mean'",
|
||||
},
|
||||
},
|
||||
'ReLU': {
|
||||
'pytorch': {
|
||||
'args': 'inplace=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'ReLU',
|
||||
'args': ''
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'ReLU6': {
|
||||
'pytorch': {
|
||||
'args': 'inplace=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'ReLU6',
|
||||
'args': ''
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'LeakyReLU': {
|
||||
'pytorch': {
|
||||
'args': 'negative_slope=0.01, inplace=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'LeakyReLU',
|
||||
'args': 'scale'
|
||||
},
|
||||
'links': {'negative_slope': 'scale'},
|
||||
'extras': {},
|
||||
},
|
||||
'BatchNorm2d': {
|
||||
'pytorch': {
|
||||
'args': 'num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'BatchNorm',
|
||||
'args': 'num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'Dropout2d': {
|
||||
'pytorch': {
|
||||
'args': 'p=0.5, inplace=False',
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'nn',
|
||||
'name': 'Dropout',
|
||||
'args': 'p=0.5, is_train=False'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'kaiming_normal_': {
|
||||
'pytorch': {
|
||||
'args': "tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'",
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'init',
|
||||
'name': 'relu_invariant_gauss_',
|
||||
'args': 'var, mode="fan_in"'
|
||||
},
|
||||
'links': {'tensor': 'var'},
|
||||
'extras': {},
|
||||
},
|
||||
'constant_': {
|
||||
'pytorch': {
|
||||
'args': "tensor, val",
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'init',
|
||||
'name': 'constant_',
|
||||
'args': 'var, value=0.0'
|
||||
},
|
||||
'links': {'tensor': 'var', 'val': 'value'},
|
||||
'extras': {},
|
||||
},
|
||||
'normal_': {
|
||||
'pytorch': {
|
||||
'args': "tensor, mean=0.0, std=1.0",
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'init',
|
||||
'name': 'gauss_',
|
||||
'args': 'var, mean=0.0, std=1.0'
|
||||
},
|
||||
'links': {'tensor': 'var'},
|
||||
'extras': {},
|
||||
},
|
||||
'cat': {
|
||||
'pytorch': {
|
||||
'args': "tensors, dim=0, out=None",
|
||||
},
|
||||
'jittor': {
|
||||
'module': 'jt.contrib',
|
||||
'name': 'concat',
|
||||
'args': 'vars, dim=0'
|
||||
},
|
||||
'links': {'tensors': 'vars'},
|
||||
'extras': {},
|
||||
},
|
||||
# ***************************************************************
|
||||
# Convert format for function which can be writen as either torch.Tensor.xxx(...) or torch.xxx(torch.Tensor, ...)
|
||||
# Example: x.reshape([2,3]) and torch.reshape(x, [2,3])
|
||||
# ***************************************************************
|
||||
'flatten': {
|
||||
'pytorch': {
|
||||
'prefix': ['torch'],
|
||||
'args_prefix': 'input, start_dim=0, end_dim=-1',
|
||||
'args': 'start_dim=0, end_dim=-1',
|
||||
},
|
||||
'jittor': {
|
||||
'prefix': 'jt',
|
||||
'module': '',
|
||||
'name': 'flatten',
|
||||
'args_prefix': 'input, start_dim=0, end_dim=-1',
|
||||
'args': 'start_dim=0, end_dim=-1'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'reshape': {
|
||||
'pytorch': {
|
||||
'prefix': ['torch'],
|
||||
'args_prefix': 'input, shape',
|
||||
'args': 'shape',
|
||||
},
|
||||
'jittor': {
|
||||
'prefix': 'jt',
|
||||
'module': '',
|
||||
'name': 'reshape',
|
||||
'args_prefix': 'input, shape',
|
||||
'args': 'shape'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
'permute': {
|
||||
'pytorch': {
|
||||
'prefix': [],
|
||||
'args_prefix': '',
|
||||
'args': '*dim',
|
||||
},
|
||||
'jittor': {
|
||||
'prefix': '',
|
||||
'module': '',
|
||||
'name': 'permute',
|
||||
'args_prefix': '',
|
||||
'args': '*dim'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
},
|
||||
# 好像不需要如果一毛一样的话
|
||||
'view': {
|
||||
'pytorch': {
|
||||
'prefix': [],
|
||||
'args_prefix': '',
|
||||
'args': '*shape',
|
||||
},
|
||||
'jittor': {
|
||||
'prefix': '',
|
||||
'module': '',
|
||||
'name': 'view',
|
||||
'args_prefix': '',
|
||||
'args': '*shape'
|
||||
},
|
||||
'links': {},
|
||||
'extras': {},
|
||||
}
|
||||
}
|
||||
|
||||
def __str__(self):
|
||||
ss = []
|
||||
def dfs(v, depth):
|
||||
s = " "*depth+f"{v.name} in:{v.input} out:{v.output}"
|
||||
if v.args is not None:
|
||||
s += f" args:{v.args}"
|
||||
ss.append(s)
|
||||
if len(v.children):
|
||||
for c in v.children:
|
||||
dfs(c, depth+1)
|
||||
ss.append(s + " end")
|
||||
dfs(self, 0)
|
||||
return "\n".join(ss)
|
||||
unsupport_ops = [
|
||||
# ***************************************************************
|
||||
# torch.nn
|
||||
# ***************************************************************
|
||||
'Parameter', 'ModuleList', 'ModuleDict', 'ParameterList', 'ParameterDict',
|
||||
'Conv1d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose3d', 'Unfold', 'Fold',
|
||||
'MaxPool1d', 'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'AvgPool1d', 'AvgPool3d', 'FractionalMaxPool2d', 'LPPool1d', 'LPPool2d', 'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool3d',
|
||||
'ReflectionPad1d', 'ReflectionPad2d', 'ReplicationPad1d', 'ReplicationPad2d', 'ReplicationPad3d', 'ZeroPad2d', 'ConstantPad1d', 'ConstantPad2d', 'ConstantPad3d', 'ELU', 'Hardshrink', 'Hardtanh', 'LogSigmoid', 'MultiheadAttention',
|
||||
'PReLU', 'RReLU', 'SELU', 'CELU', 'GELU', 'Softplus', 'Softshrink', 'Softsign', 'Tanhshrink', 'Threshold', 'Softmin', 'Softmax2d', 'LogSoftmax', 'AdaptiveLogSoftmaxWithLoss', 'BatchNorm1d', 'BatchNorm3d', 'GroupNorm', 'SyncBatchNorm', 'InstanceNorm1d', 'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'LocalResponseNorm', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell', 'Transformer', 'TransformerEncoder', 'TransformerDecoder', 'TransformerEncoderLayer', 'TransformerDecoderLayer', 'Identity', 'Bilinear', 'Dropout3d', 'AlphaDropout', 'Embedding', 'EmbeddingBag', 'CosineSimilarity', 'PairwiseDistance', 'L1Loss', 'MSELoss', 'CTCLoss', 'NLLLoss', 'PoissonNLLLoss', 'KLDivLoss', 'BCELoss', 'BCEWithLogitsLoss', 'MarginRankingLoss', 'HingeEmbeddingLoss', 'MultiLabelMarginLoss', 'SmoothL1Loss', 'SoftMarginLoss', 'MultiLabelSoftMarginLoss', 'CosineEmbeddingLoss', 'MultiMarginLoss', 'TripletMarginLoss', 'PixelShuffle', 'Upsample', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'DataParallel', 'DistributedDataParallel', 'clip_grad_norm_', 'clip_grad_value_', 'parameters_to_vector', 'vector_to_parameters', 'BasePruningMethod', 'PruningContainer', 'Identity', 'RandomUnstructured', 'L1Unstructured', 'RandomStructured', 'LnStructured', 'CustomFromMask', 'identity', 'random_unstructured', 'l1_unstructured', 'random_structured', 'ln_structured', 'global_unstructured', 'custom_from_mask', 'remove', 'is_pruned', 'weight_norm', 'remove_weight_norm', 'spectral_norm', 'remove_spectral_norm', 'PackedSequence', 'pack_padded_sequence', 'pad_packed_sequence', 'pad_sequence', 'pack_sequence'
|
||||
]
|
||||
|
||||
def to_jt(self):
|
||||
defs = []
|
||||
template = {
|
||||
"add": "{0} + {1}",
|
||||
"mul": "{0} * {1}",
|
||||
"getitem": "{0}[{1}]",
|
||||
"gt": "{0} > {1}",
|
||||
}
|
||||
def dfs(v):
|
||||
if len(v.children)==0:
|
||||
return
|
||||
code = []
|
||||
code.append(f"def {v.name.split('.')[0]}({','.join(map(str,v.input))}):")
|
||||
for c in v.children:
|
||||
# parse the argument into jittor code
|
||||
# code.append(f" # {c.args}")
|
||||
if c.name == "BatchNorm2d.forward":
|
||||
bn = c.args["self"]
|
||||
code.append(f" {c.output[0]} = jt.nn.batch_norm({c.input[0]}, is_train={bn.training}, eps={bn.eps}, momentum={bn.momentum})")
|
||||
continue
|
||||
if c.name == "ReLU.forward":
|
||||
code.append(f" {c.output[0]} = jt.nn.relu({c.input[0]})")
|
||||
continue
|
||||
if c.name == "MaxPool2d.forward":
|
||||
po = c.args["self"]
|
||||
code.append(f" {c.output[0]} = jt.nn.pool({c.input[0]}, size={po.kernel_size}, op='maximum', padding={po.padding}, stride={po.stride})")
|
||||
continue
|
||||
if c.name == "Conv2d.forward":
|
||||
mod = c.args["self"]
|
||||
code.append(f" # {mod}")
|
||||
assert mod.kernel_size[0] == mod.kernel_size[1]
|
||||
assert mod.padding[0] == mod.padding[1]
|
||||
assert mod.stride[0] == mod.stride[1]
|
||||
assert mod.bias == False
|
||||
code.append(f" {c.output[0]} = nn.conv({c.output[0]}, {mod.in_channels}, {mod.out_channels}, {mod.kernel_size[0]}, {mod.padding[0]}, {mod.stride[0]})")
|
||||
continue
|
||||
if c.name.startswith("inj"):
|
||||
if c.name.endswith("__init__"):
|
||||
code.append(f" {c.args[0]} = jt.array({c.args[1]})")
|
||||
else:
|
||||
assert c.name.startswith("inj_torch_Tensor___") and \
|
||||
c.name.endswith("__")
|
||||
name = c.name[19:-2]
|
||||
if name in template:
|
||||
code.append(f" {c.output[0]} = {template[name].format(*c.args)}")
|
||||
else:
|
||||
code.append(f" {c.output[0]} = __{name}__({', '.join(map(str,c.args))})")
|
||||
else:
|
||||
dfs(c)
|
||||
out = ""
|
||||
if len(c.output):
|
||||
out = f"{','.join(map(str, c.output))} = "
|
||||
code.append(f" {out}{c.name.split('.')[0]}({','.join(map(str,c.input))})")
|
||||
if len(v.output):
|
||||
code.append(f" return {','.join(map(str, v.output))}")
|
||||
defs.extend(code)
|
||||
dfs(self)
|
||||
return "\n".join(defs)
|
||||
support_ops = {}
|
||||
for key in pjmap.keys():
|
||||
module = pjmap[key]['jittor']['module']
|
||||
name = pjmap[key]['jittor']['name']
|
||||
if module == 'nn':
|
||||
support_ops[key] = name
|
||||
|
||||
class TNode:
|
||||
def __init__(self, s, v):
|
||||
self.s = s
|
||||
self.v = v
|
||||
def __str__(self):
|
||||
return self.s
|
||||
def __repr__(self):
|
||||
return self.s
|
||||
def raise_unsupport(name):
|
||||
raise RuntimeError(f'{name} is not supported in Jittor yet. We will appreciate it if you provide an implementation of {name} and make pull request at https://github.com/Jittor/jittor.')
|
||||
|
||||
trace_depth = 0
|
||||
stack = []
|
||||
g_vars = {}
|
||||
g_var_id = 0
|
||||
g_func_names = []
|
||||
call_tree = CallTree(None, "root")
|
||||
|
||||
def push_stack(name=None, input=[]):
|
||||
global trace_depth, call_tree
|
||||
trace_depth += 1
|
||||
if name is not None:
|
||||
# Do not re record functional
|
||||
if len(stack) and (
|
||||
stack[-1][1].startswith("functional.") or
|
||||
stack[-1][1].startswith("inj_")
|
||||
):
|
||||
return
|
||||
call_tree = CallTree(call_tree, name)
|
||||
call_tree.input = input
|
||||
stack.append((trace_depth, name))
|
||||
return call_tree
|
||||
return None
|
||||
def replace(a):
|
||||
if hasattr(a, "attr") and a.attr in unsupport_ops:
|
||||
raise_unsupport(a.attr)
|
||||
|
||||
def pop_stack(output=[]):
|
||||
global trace_depth, call_tree
|
||||
if len(stack) and stack[-1][0] == trace_depth:
|
||||
stack.pop()
|
||||
call_tree.output = output
|
||||
call_tree = call_tree.parent
|
||||
trace_depth -= 1
|
||||
if hasattr(a, "id") and a.id in unsupport_ops:
|
||||
raise_unsupport(a.id)
|
||||
|
||||
def trace_calls(frame, event, arg):
|
||||
def dfs(obj, func):
|
||||
if isinstance(obj, list):
|
||||
for i,v in enumerate(obj):
|
||||
dfs(v, func)
|
||||
if isinstance(v, torch.Tensor):
|
||||
obj[i] = g_vars[id(v)]
|
||||
elif isinstance(obj, dict):
|
||||
for k,v in obj.items():
|
||||
if isinstance(v, tuple):
|
||||
v = list(v)
|
||||
obj[k] = v
|
||||
dfs(v, func)
|
||||
if isinstance(v, torch.Tensor):
|
||||
obj[k] = g_vars[id(v)]
|
||||
elif isinstance(obj, torch.Tensor):
|
||||
func(obj)
|
||||
global g_var_id
|
||||
if event.endswith('call'):
|
||||
co = frame.f_code
|
||||
func_name = co.co_name
|
||||
func_line_no = frame.f_lineno
|
||||
func_filename = co.co_filename
|
||||
args = "???"
|
||||
t_values = []
|
||||
if event == "c_call":
|
||||
func_name = arg.__name__
|
||||
else:
|
||||
args = list(frame.f_locals.keys())
|
||||
if "self" in frame.f_locals:
|
||||
func_name = type(frame.f_locals["self"]).__name__ + "." + func_name
|
||||
|
||||
val = {k:frame.f_locals[k] for k in args}
|
||||
def func(v):
|
||||
global g_var_id
|
||||
if id(v) not in g_vars:
|
||||
if func_name.endswith("__init__"):
|
||||
g_vars[id(v)] = TNode("array_"+str(g_var_id), v)
|
||||
else:
|
||||
g_vars[id(v)] = TNode("input_"+str(g_var_id), v)
|
||||
g_var_id += 1
|
||||
t_values.append(g_vars[id(v)])
|
||||
dfs(val, func)
|
||||
if hasattr(a, "attr"):
|
||||
if a.attr in support_ops.keys(): a.attr = support_ops[a.attr]
|
||||
|
||||
# get arguments you want
|
||||
if func_name.endswith(".forward"):
|
||||
ct = push_stack(func_name, t_values)
|
||||
ct.args = val
|
||||
elif func_filename.endswith("functional.py"): # TODO: not stable
|
||||
push_stack("functional."+func_name, t_values)
|
||||
elif func_name.startswith("inj_"):
|
||||
ct = push_stack(func_name, t_values)
|
||||
ct.args = val["a"]
|
||||
elif func_name in g_func_names:
|
||||
push_stack(func_name, t_values)
|
||||
if hasattr(a, "id"):
|
||||
if a.id in support_ops.keys(): a.id = support_ops[a.id]
|
||||
|
||||
import_flag = []
|
||||
def convert(code):
|
||||
a = ast.parse(code)
|
||||
dfs(a)
|
||||
a.body.insert(0, ast.parse('import jittor as jt').body[0])
|
||||
if 'init' not in import_flag:
|
||||
a.body.insert(1, ast.parse('from jittor import init').body[0])
|
||||
if 'nn' not in import_flag:
|
||||
a.body.insert(2, ast.parse('from jittor import nn').body[0])
|
||||
return astunparse.unparse(a)
|
||||
|
||||
def convert_(prefix, func_name, ags, kws):
|
||||
info = pjmap[func_name]
|
||||
p_prefix = info['pytorch']['prefix'] if 'prefix' in info['pytorch'].keys() else None
|
||||
if p_prefix is not None and prefix in p_prefix:
|
||||
p_ags = info['pytorch']['args_prefix']
|
||||
j_ags = info['jittor']['args_prefix']
|
||||
else:
|
||||
p_ags = info['pytorch']['args']
|
||||
j_ags = info['jittor']['args']
|
||||
j_prefix = info['jittor']['prefix'] if 'prefix' in info['jittor'].keys() else None
|
||||
j_module = info['jittor']['module']
|
||||
j_name = info['jittor']['name']
|
||||
links = info['links']
|
||||
extras = info['extras']
|
||||
jj_ags = []
|
||||
jj_kws = {}
|
||||
pp_ags = []
|
||||
pp_kws = {}
|
||||
if j_ags == '' and p_ags == '':
|
||||
# no args in Pytorch and Jittor.
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}()"
|
||||
else:
|
||||
push_stack()
|
||||
jt.LOG.vvvv("----"*trace_depth+f"call: {func_name}({args}){t_values} # {func_filename}:{func_line_no}")
|
||||
elif event.endswith('return'):
|
||||
ret = []
|
||||
if event == "c_return":
|
||||
jt.LOG.vvvv("----"*trace_depth+f"return {arg.__name__}: ???")
|
||||
else:
|
||||
co = frame.f_code
|
||||
func_name = co.co_name
|
||||
def func(arg):
|
||||
global g_var_id
|
||||
if id(arg) not in g_vars:
|
||||
node = TNode(f"out_{g_var_id}", arg)
|
||||
g_vars[id(arg)] = node
|
||||
else:
|
||||
node = g_vars[id(arg)]
|
||||
ret.append(node)
|
||||
g_var_id += 1
|
||||
dfs(arg, func)
|
||||
if "self" in frame.f_locals:
|
||||
func_name = type(frame.f_locals["self"]).__name__ + "." + func_name
|
||||
jt.LOG.vvvv("----"*trace_depth+f"return {func_name}: {ret}")
|
||||
pop_stack(ret)
|
||||
return trace_calls
|
||||
|
||||
@contextlib.contextmanager
|
||||
def trace_scope(func_names=[]):
|
||||
global g_func_names
|
||||
g_func_names = func_names
|
||||
with func_injection():
|
||||
try:
|
||||
global trace_depth, g_var_id
|
||||
sys.settrace(trace_calls)
|
||||
trace_depth = 1
|
||||
stack.clear()
|
||||
g_vars.clear()
|
||||
call_tree.children.clear()
|
||||
|
||||
g_var_id = 0
|
||||
yield
|
||||
finally:
|
||||
sys.settrace(None)
|
||||
jt.LOG.v("="*20)
|
||||
jt.LOG.v(call_tree)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def func_injection():
|
||||
names = [
|
||||
"torch.Tensor.__init__",
|
||||
"torch.Tensor.__add__",
|
||||
"torch.Tensor.__mul__",
|
||||
"torch.Tensor.__sub__",
|
||||
"torch.Tensor.__truediv__",
|
||||
"torch.Tensor.__floordiv__",
|
||||
"torch.Tensor.__getitem__",
|
||||
# "torch.Tensor.__setitem__",
|
||||
"torch.Tensor.__pow__",
|
||||
"torch.Tensor.__mod__",
|
||||
"torch.Tensor.__lt__",
|
||||
"torch.Tensor.__le__",
|
||||
"torch.Tensor.__gt__",
|
||||
"torch.Tensor.__ge__",
|
||||
"torch.Tensor.__eq__",
|
||||
"torch.Tensor.__ne__",
|
||||
"torch.Tensor.__lshift__",
|
||||
"torch.Tensor.__rshift__",
|
||||
"torch.Tensor.__and__",
|
||||
"torch.Tensor.__or__",
|
||||
"torch.Tensor.__xor__",
|
||||
"torch.Tensor.__abs__",
|
||||
"torch.Tensor.__neg__",
|
||||
]
|
||||
try:
|
||||
global inject_prevs
|
||||
inject_prevs = []
|
||||
for name in names:
|
||||
inject_prevs.append(eval(name))
|
||||
for i, name in enumerate(names):
|
||||
new_name = "inj_" + name.replace(".", "_")
|
||||
if name.endswith("__getitem__"):
|
||||
exec(f"def {new_name}(*a): return torch._C._TensorBase.__getitem__(a[0], a[1] if isinstance(a[1], tuple) else (a[1],))")
|
||||
elif name.endswith("__init__"):
|
||||
exec(f"def {new_name}(*a, **b): return None")
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}()"
|
||||
else:
|
||||
exec(f"def {new_name}(*a, **b): return inject_prevs[{i}](*a, **b)")
|
||||
jt.LOG.v("inject", new_name)
|
||||
exec(f"{name} = {new_name}")
|
||||
yield
|
||||
finally:
|
||||
for i, name in enumerate(names):
|
||||
prev = inject_prevs[i]
|
||||
exec(f"{name} = prev")
|
||||
torch.Tensor.__getitem__ = \
|
||||
lambda s, a: torch._C._TensorBase.__getitem__(s, a if isinstance(a, tuple) else (a,))
|
||||
return f"{prefix}.{j_name}()"
|
||||
else:
|
||||
j_ags = j_ags.replace(' ','').split(',')
|
||||
for j_ag in j_ags:
|
||||
if '=' in j_ag:
|
||||
k,v = j_ag.split('=')
|
||||
jj_kws[k] = v
|
||||
else:
|
||||
jj_ags.append(j_ag)
|
||||
p_ags = p_ags.replace(' ','').split(',')
|
||||
for p_ag in p_ags:
|
||||
if '=' in p_ag:
|
||||
k,v = p_ag.split('=')
|
||||
pp_kws[k] = v
|
||||
else:
|
||||
pp_ags.append(p_ag)
|
||||
if len(jj_ags) == 0 and len(pp_ags) != 0:
|
||||
raise AttributeError(f"{func_name} in Jittor has no Attribute {pp_ags[0]}")
|
||||
if len(pp_ags) > len(ags) + len(kws):
|
||||
raise RuntimeError(f'There are needed {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you only provide {len(ags) + len(kws)}')
|
||||
ags_ = []
|
||||
for i in range(len(pp_ags)):
|
||||
if i < len(ags):
|
||||
if '*' in pp_ags[i]:
|
||||
ags_.append('(' + ', '.join(ags[i:]) + ')')
|
||||
ags = ags_
|
||||
break
|
||||
else:
|
||||
ags_.append(ags[i])
|
||||
else:
|
||||
break
|
||||
if len(pp_ags) + len(list(pp_kws.keys())) < len(ags) + len(kws):
|
||||
raise RuntimeError(f'There are only {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you provide {len(ags) + len(kws)}')
|
||||
j_ags_flag = np.zeros(len(jj_ags))
|
||||
j_ags_values = {}
|
||||
j_kws_values = {}
|
||||
for i,ag in enumerate(ags):
|
||||
if len(pp_ags) == 0:
|
||||
ag_name = list(pp_kws.keys())[i]
|
||||
elif i < len(pp_ags):
|
||||
ag_name = pp_ags[i]
|
||||
elif i >= len(pp_ags) and (i-len(pp_ags)) <= len(list(pp_kws.keys())):
|
||||
ag_name = list(pp_kws.keys())[i-len(pp_ags)]
|
||||
else:
|
||||
raise RuntimeError(f'The args number is not matc{func_name} in Jittor has no Attribute {ag_name}')
|
||||
if ag_name in links.keys():
|
||||
ag_name = links[ag_name]
|
||||
if ag_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(ag_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(ag_name))] = ag
|
||||
elif ag_name in jj_kws.keys():
|
||||
j_kws_values[ag_name] = ag
|
||||
else:
|
||||
raise AttributeError(f'{func_name} in Jittor has no Attribute {ag_name}')
|
||||
for i,kw in enumerate(kws):
|
||||
kw_name, kw_value = kw.split('=')
|
||||
if kw_name in links.keys():
|
||||
kw_name = links[kw_name]
|
||||
if kw_name in jj_ags:
|
||||
j_ags_flag[jj_ags.index(kw_name)] = 1
|
||||
j_ags_values[str(jj_ags.index(kw_name))] = kw_value
|
||||
elif kw_name in jj_kws.keys():
|
||||
j_kws_values[kw_name] = kw_value
|
||||
else:
|
||||
raise AttributeError(f'{func_name} in Jittor has no Attribute {kw_name}')
|
||||
len_jj_ags = len(jj_ags) if len(jj_ags) == 0 or jj_ags[0] != '' else 0
|
||||
if j_ags_flag.sum() < len_jj_ags:
|
||||
missing_args = []
|
||||
for i in range(len(jj_ags)):
|
||||
if j_ags_flag[i] == 0:
|
||||
missing_args.append(jj_ags[i])
|
||||
raise AttributeError(f"the needed args of {func_name} in Jittor is {', '.join(jj_ags)}, so you need to give value of {', '.join(missing_args)}.")
|
||||
if extras:
|
||||
for k in extras.keys():
|
||||
if k in jj_ags:
|
||||
j_ags_values[str(jj_ags.index(k))] = extras[k]
|
||||
elif k in jj_kws.keys():
|
||||
j_kws_values[k] = extras[k]
|
||||
else:
|
||||
raise AttributeError(f"there is not attribute named {k} in Jittor {func_name}, you should delete it in {func_name} extras.")
|
||||
j_ags_ = [j_ags_values[str(i)] for i in range(len(list(j_ags_values.keys())))]
|
||||
j_kws_ = [key + "=" + j_kws_values[key] for key in j_kws_values.keys()]
|
||||
j_func = f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
if p_prefix is None:
|
||||
return f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
if prefix in p_prefix:
|
||||
return f"{j_prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
else:
|
||||
return f"{prefix}.{j_name}({', '.join(j_ags_+j_kws_)})"
|
||||
return j_func
|
||||
|
||||
def dfs(a):
|
||||
if isinstance(a, ast.Import):
|
||||
if 'torch' in astunparse.unparse(a) and 'init' in astunparse.unparse(a):
|
||||
import_flag.append('init')
|
||||
return ast.parse('from jittor import init').body[0]
|
||||
if 'torch' in astunparse.unparse(a) and 'nn' in astunparse.unparse(a):
|
||||
import_flag.append('nn')
|
||||
return ast.parse('from jittor import nn').body[0]
|
||||
if a.names[0].name == 'torch':
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.ImportFrom):
|
||||
if 'torch' in a.module:
|
||||
return 'delete'
|
||||
elif isinstance(a, ast.Call):
|
||||
for idx, ag in enumerate(a.args):
|
||||
ret = dfs(ag)
|
||||
if ret is not None:
|
||||
a.args[idx] = ret
|
||||
for idx, kw in enumerate(a.keywords):
|
||||
ret = dfs(kw)
|
||||
if ret is not None:
|
||||
a.keywords[idx] = ret
|
||||
func = astunparse.unparse(a.func).strip('\n').split('.')
|
||||
prefix = '.'.join(func[0:-1])
|
||||
func_name = func[-1]
|
||||
if func_name in unsupport_ops:
|
||||
raise_unsupport(func_name)
|
||||
if func_name in pjmap.keys():
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
kws = [astunparse.unparse(kw).strip('\n') for kw in a.keywords]
|
||||
ret = convert_(prefix, func_name, ags, kws)
|
||||
return ast.parse(ret).body[0].value
|
||||
if ".load_state_dict" in astunparse.unparse(a.func):
|
||||
a.func.attr = 'load_parameters'
|
||||
if astunparse.unparse(a.func).strip('\n').endswith(".size"):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
if len(ags) != 0:
|
||||
con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']'
|
||||
else:
|
||||
con = astunparse.unparse(a.func).replace('size', 'shape')
|
||||
return ast.parse(con).body[0].value
|
||||
elif isinstance(a, ast.Expr): pass
|
||||
elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name): replace(a)
|
||||
elif isinstance(a, ast.FunctionDef):
|
||||
if a.name == 'forward': a.name = 'execute'
|
||||
if hasattr(a, '__dict__'):
|
||||
for k in a.__dict__.keys():
|
||||
if isinstance(a.__dict__[k], list):
|
||||
delete_flag = []
|
||||
for i,a_ in enumerate(a.__dict__[k]):
|
||||
ret = dfs(a_)
|
||||
if ret is 'delete':
|
||||
delete_flag.append(True)
|
||||
del a.__dict__[k][i]
|
||||
continue
|
||||
if ret is not None:
|
||||
a.__dict__[k][i] = ret
|
||||
delete_flag.append(False)
|
||||
tmp = [a_ for i,a_ in enumerate(a.__dict__[k]) if delete_flag[i] == False]
|
||||
a.__dict__[k] = tmp
|
||||
else:
|
||||
ret = dfs(a.__dict__[k])
|
||||
if ret is not None:
|
||||
a.__dict__[k] = ret
|
||||
|
|
|
@ -1,117 +0,0 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import ast, astunparse
|
||||
|
||||
def convert(code):
|
||||
a = ast.parse(code)
|
||||
a.body.insert(0, ast.parse('import jittor as jt').body[0])
|
||||
a.body.insert(1, ast.parse('from jittor import init').body[0])
|
||||
dfs(a)
|
||||
return astunparse.unparse(a)
|
||||
|
||||
def replace(a):
|
||||
if hasattr(a, "attr"):
|
||||
if a.attr == "Conv2d": a.attr = "Conv"
|
||||
if a.attr == "BatchNorm2d": a.attr = "BatchNorm"
|
||||
if a.attr == "ReLU": a.attr = "Relu"
|
||||
if a.attr == "AvgPool2d": a.attr = "Pool"
|
||||
if a.attr == "MaxPool2d": a.attr = "Pool"
|
||||
if hasattr(a, "id"):
|
||||
if a.id == "Conv2d": a.id = "Conv"
|
||||
if a.id == "BatchNorm2d": a.id = "BatchNorm"
|
||||
if a.id == "ReLU": a.id = "Relu"
|
||||
if a.id == "AvgPool2d": a.id = "Pool"
|
||||
if a.id == "MaxPool2d": a.id = "Pool"
|
||||
|
||||
def dfs(a):
|
||||
if isinstance(a, ast.Import):
|
||||
if a.names[0].name == 'torch.nn' and a.names[0].asname == 'nn':
|
||||
a.names[0].name = 'jittor.nn'
|
||||
a.names[0].asname = 'nn'
|
||||
elif isinstance(a, ast.ImportFrom):
|
||||
if a.module == 'torch':
|
||||
a.module = a.module.replace('torch', 'jittor')
|
||||
return a
|
||||
elif isinstance(a, ast.Call):
|
||||
for idx, ag in enumerate(a.args):
|
||||
ret = dfs(ag)
|
||||
if ret is not None:
|
||||
a.args[idx] = ret
|
||||
for idx, kw in enumerate(a.keywords):
|
||||
ret = dfs(kw)
|
||||
if ret is not None:
|
||||
a.keywords[idx] = ret
|
||||
if ".load_state_dict" in astunparse.unparse(a.func):
|
||||
a.func.attr = 'load_parameters'
|
||||
if astunparse.unparse(a.func).startswith("torch.Tensor"):
|
||||
a.func.value.id = 'jt'
|
||||
a.func.attr = 'array'
|
||||
if ".cat" in astunparse.unparse(a.func):
|
||||
if len(a.args) == 1:
|
||||
dim = a.keywords[0].value.n
|
||||
else:
|
||||
dim = a.args[1].n
|
||||
if isinstance(a.args[0], ast.List):
|
||||
objs = [elt.id for elt in a.args[0].elts]
|
||||
con = 'jt.contrib.concat([' + ','.join(objs) + '], dim=' + str(dim) + ')'
|
||||
else:
|
||||
con = 'jt.contrib.concat(' + a.args[0].id + ', dim=' + str(dim) + ')'
|
||||
return ast.parse(con).body[0].value
|
||||
if "view" in astunparse.unparse(a.func):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
con = 'jt.reshape(' + a.func.value.id + ', [' + ','.join(ags) + '])'
|
||||
return ast.parse(con).body[0].value
|
||||
if "permute" in astunparse.unparse(a.func):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.func.value.args]
|
||||
con = 'jt.transpose(' + a.func.value.func.value.id + ', [' + ','.join(ags) + '])'
|
||||
return ast.parse(con).body[0].value
|
||||
if astunparse.unparse(a.func).strip('\n').endswith(".size"):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']'
|
||||
return ast.parse(con).body[0].value
|
||||
if astunparse.unparse(a.func).startswith("F."):
|
||||
a.func.value.id = "nn"
|
||||
return a
|
||||
if "kaiming_normal_" in astunparse.unparse(a.func):
|
||||
ag = astunparse.unparse(a.args[0]).strip('\n')
|
||||
kws = {}
|
||||
for kw in a.keywords:
|
||||
tmp = astunparse.unparse(kw).split('=')
|
||||
kws[tmp[0]] = tmp[1].strip('\n')
|
||||
con = 'init.relu_invariant_gauss_(' + ag + ', mode=' + kws['mode'] + ')'
|
||||
return ast.parse(con).body[0].value
|
||||
if "constant_" in astunparse.unparse(a.func):
|
||||
ags = [astunparse.unparse(ag).strip('\n') for ag in a.args]
|
||||
con = 'init.constant_(' + ','.join(ags) + ')'
|
||||
return ast.parse(con).body[0].value
|
||||
if "ReLU" in astunparse.unparse(a.func):
|
||||
a.args.clear()
|
||||
a.keywords.clear()
|
||||
elif "Conv2d" in astunparse.unparse(a.func):
|
||||
pass
|
||||
elif "AvgPool2d" in astunparse.unparse(a.func):
|
||||
a.keywords.append(ast.keyword(arg='op', value=ast.Str(s='mean')))
|
||||
elif "MaxPool2d" in astunparse.unparse(a.func):
|
||||
a.keywords.append(ast.keyword(arg='op', value=ast.Str(s='maximum')))
|
||||
for kw in a.keywords:
|
||||
if kw.arg in ['return_indices', 'groups']:
|
||||
kw.value = ast.NameConstant(value=None)
|
||||
elif isinstance(a, ast.Expr): pass
|
||||
elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name): replace(a)
|
||||
elif isinstance(a, ast.FunctionDef):
|
||||
if a.name == 'forward': a.name = 'execute'
|
||||
if hasattr(a, '__dict__'):
|
||||
for k in a.__dict__.keys():
|
||||
if isinstance(a.__dict__[k], list):
|
||||
for i,a_ in enumerate(a.__dict__[k]):
|
||||
ret = dfs(a_)
|
||||
if ret is not None:
|
||||
a.__dict__[k][i] = ret
|
||||
|
||||
else:
|
||||
ret = dfs(a.__dict__[k])
|
||||
if ret is not None:
|
||||
a.__dict__[k] = ret
|
|
@ -17,7 +17,8 @@ from ctypes import cdll
|
|||
|
||||
class LogWarper:
|
||||
def __init__(self):
|
||||
pass
|
||||
self.log_silent = int(os.environ.get("log_silent", "0"))
|
||||
self.log_v = int(os.environ.get("log_v", "0"))
|
||||
|
||||
def log_capture_start(self):
|
||||
cc.log_capture_start()
|
||||
|
@ -39,6 +40,8 @@ class LogWarper:
|
|||
if cc and hasattr(cc, "log"):
|
||||
cc.log(fileline, level, verbose, msg)
|
||||
else:
|
||||
if self.log_silent or verbose > self.log_v:
|
||||
return
|
||||
time = datetime.datetime.now().strftime("%m%d %H:%M:%S.%f")
|
||||
tid = threading.get_ident()%100
|
||||
v = f" v{verbose}" if verbose else ""
|
||||
|
@ -100,8 +103,10 @@ def try_import_jit_utils_core(silent=None):
|
|||
|
||||
def run_cmd(cmd, cwd=None, err_msg=None, print_error=True):
|
||||
LOG.v(f"Run cmd: {cmd}")
|
||||
if cwd: cmd = f"cd {cwd} && {cmd}"
|
||||
r = sp.run(cmd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
|
||||
if cwd:
|
||||
r = sp.run(cmd, cwd=cwd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
|
||||
else:
|
||||
r = sp.run(cmd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
|
||||
s = r.stdout.decode('utf8')
|
||||
if r.returncode != 0:
|
||||
if print_error:
|
||||
|
@ -150,12 +155,13 @@ def find_cache_path():
|
|||
cache_name = os.environ["cache_name"]
|
||||
else:
|
||||
# try to get branch name from git
|
||||
r = sp.run("git branch", cwd=os.path.dirname(__file__), stdout=sp.PIPE,
|
||||
r = sp.run(["git","branch"], cwd=os.path.dirname(__file__), stdout=sp.PIPE,
|
||||
stderr=sp.PIPE)
|
||||
assert r.returncode == 0
|
||||
bs = r.stdout.decode()
|
||||
bs = r.stdout.decode().splitlines()
|
||||
for b in bs:
|
||||
if b.startswith("* "): break
|
||||
|
||||
cache_name = b[2:]
|
||||
for c in " (){}": cache_name = cache_name.replace(c, "_")
|
||||
except:
|
||||
|
@ -163,17 +169,24 @@ def find_cache_path():
|
|||
for name in cache_name.split("/"):
|
||||
dirs.insert(-1, name)
|
||||
os.environ["cache_name"] = cache_name
|
||||
LOG.v("cache_name", cache_name)
|
||||
for d in dirs:
|
||||
path = os.path.join(path, d)
|
||||
if not os.path.isdir(path):
|
||||
os.mkdir(path)
|
||||
try:
|
||||
os.mkdir(path)
|
||||
except:
|
||||
pass
|
||||
assert os.path.isdir(path)
|
||||
if path not in sys.path:
|
||||
sys.path.append(path)
|
||||
return path
|
||||
|
||||
def get_version(output):
|
||||
version = run_cmd(output+" --version")
|
||||
if output.endswith("mpicc"):
|
||||
version = run_cmd(output+" --showme:version")
|
||||
else:
|
||||
version = run_cmd(output+" --version")
|
||||
v = re.findall("[0-9]+\\.[0-9]+\\.[0-9]+", version)
|
||||
if len(v) == 0:
|
||||
v = re.findall("[0-9]+\\.[0-9]+", version)
|
||||
|
|
|
@ -19,7 +19,7 @@ fi
|
|||
|
||||
if [ "$with_clang" = "1" ]; then
|
||||
sudo apt install wget lsb-release software-properties-common -y
|
||||
wget -O - https://apt.llvm.org/llvm.sh > /tmp/llvm.sh
|
||||
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
|
||||
sudo bash /tmp/llvm.sh 8
|
||||
sudo apt-get install libc++-8-dev libc++abi-8-dev -y
|
||||
sudo apt-get install libomp-8-dev -y
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
#!/bin/bash
|
||||
################################################################################
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https://llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
################################################################################
|
||||
#
|
||||
# This script will install the llvm toolchain on the different
|
||||
# Debian and Ubuntu versions
|
||||
|
||||
set -eux
|
||||
|
||||
# read optional command line argument
|
||||
LLVM_VERSION=8
|
||||
if [ "$#" -eq 1 ]; then
|
||||
LLVM_VERSION=$1
|
||||
fi
|
||||
|
||||
DISTRO=$(lsb_release -is)
|
||||
VERSION=$(lsb_release -sr)
|
||||
DIST_VERSION="${DISTRO}_${VERSION}"
|
||||
|
||||
if [[ $EUID -ne 0 ]]; then
|
||||
echo "This script must be run as root!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
declare -A LLVM_VERSION_PATTERNS
|
||||
LLVM_VERSION_PATTERNS[8]="-8"
|
||||
LLVM_VERSION_PATTERNS[9]="-9"
|
||||
LLVM_VERSION_PATTERNS[10]=""
|
||||
|
||||
if [ ! ${LLVM_VERSION_PATTERNS[$LLVM_VERSION]+_} ]; then
|
||||
echo "This script does not support LLVM version $LLVM_VERSION"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
LLVM_VERSION_STRING=${LLVM_VERSION_PATTERNS[$LLVM_VERSION]}
|
||||
|
||||
# find the right repository name for the distro and version
|
||||
case "$DIST_VERSION" in
|
||||
Debian_9* ) REPO_NAME="deb http://apt.llvm.org/stretch/ llvm-toolchain-stretch$LLVM_VERSION_STRING main" ;;
|
||||
Debian_10* ) REPO_NAME="deb http://apt.llvm.org/buster/ llvm-toolchain-buster$LLVM_VERSION_STRING main" ;;
|
||||
Debian_unstable ) REPO_NAME="deb http://apt.llvm.org/unstable/ llvm-toolchain$LLVM_VERSION_STRING main" ;;
|
||||
Debian_testing ) REPO_NAME="deb http://apt.llvm.org/unstable/ llvm-toolchain$LLVM_VERSION_STRING main" ;;
|
||||
Ubuntu_16.04 ) REPO_NAME="deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial$LLVM_VERSION_STRING main" ;;
|
||||
Ubuntu_18.04 ) REPO_NAME="deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic$LLVM_VERSION_STRING main" ;;
|
||||
Ubuntu_18.10 ) REPO_NAME="deb http://apt.llvm.org/cosmic/ llvm-toolchain-cosmic$LLVM_VERSION_STRING main" ;;
|
||||
Ubuntu_19.04 ) REPO_NAME="deb http://apt.llvm.org/disco/ llvm-toolchain-disco$LLVM_VERSION_STRING main" ;;
|
||||
* )
|
||||
echo "Distribution '$DISTRO' in version '$VERSION' is not supported by this script (${DIST_VERSION})."
|
||||
exit 2
|
||||
esac
|
||||
|
||||
|
||||
cat /etc/apt/sources.list
|
||||
# install everything
|
||||
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
|
||||
add-apt-repository "${REPO_NAME}"
|
||||
cat /etc/apt/sources.list
|
||||
apt-get update
|
||||
apt-get install -y clang-$LLVM_VERSION lldb-$LLVM_VERSION lld-$LLVM_VERSION clangd-$LLVM_VERSION
|
2
setup.py
2
setup.py
|
@ -21,7 +21,7 @@ with open(os.path.join(path, "README.src.md")) as fh:
|
|||
|
||||
setuptools.setup(
|
||||
name='jittor',
|
||||
version='1.0.0',
|
||||
version='1.0.1',
|
||||
# scripts=[],
|
||||
author="Jittor Group",
|
||||
author_email="ran.donglang@gmail.com",
|
||||
|
|
|
@ -54,17 +54,22 @@ struct EventQueue {
|
|||
static void worker_caller();
|
||||
|
||||
void run_sync(Func func) {
|
||||
// send work to worker and do something by self
|
||||
std::unique_lock<std::mutex> l(mtx);
|
||||
this->func = func;
|
||||
run_sync_done = false;
|
||||
// send func to worker
|
||||
worker.run(worker_caller);
|
||||
while (1) {
|
||||
// check self work or worker's status
|
||||
cv.wait(l);
|
||||
list<Func> ts = move(tasks);
|
||||
l.unlock();
|
||||
// do self works
|
||||
for (auto func : ts)
|
||||
func();
|
||||
l.lock();
|
||||
// worker is finished
|
||||
if (run_sync_done)
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ Init() {
|
|||
checkCudaErrors(cudaStreamDestroy(stream));
|
||||
checkCudaErrors(cudaEventDestroy(event));
|
||||
}
|
||||
} init;
|
||||
};
|
||||
|
||||
}
|
||||
using namespace fetcher_local;
|
||||
|
@ -59,6 +59,9 @@ using namespace fetcher_local;
|
|||
#endif
|
||||
|
||||
void fetch(const vector<VarHolder*>& vh, FetchFunc&& func) {
|
||||
#ifdef HAS_CUDA
|
||||
static Init init;
|
||||
#endif
|
||||
sync(vh);
|
||||
vector<Allocation> allocations(vh.size());
|
||||
vector<ArrayArgs> arrays(vh.size());
|
||||
|
|
27
src/init.cc
27
src/init.cc
|
@ -3,6 +3,10 @@
|
|||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
#endif
|
||||
#include <random>
|
||||
|
||||
#include "init.h"
|
||||
|
@ -10,16 +14,39 @@
|
|||
|
||||
namespace jittor {
|
||||
|
||||
DEFINE_FLAG(vector<int>, cuda_archs, {}, "Cuda arch");
|
||||
|
||||
unique_ptr<std::default_random_engine> eng;
|
||||
|
||||
vector<set_seed_callback> callbacks;
|
||||
int current_seed;
|
||||
|
||||
static void init_cuda_devices() {
|
||||
#ifdef HAS_CUDA
|
||||
int count;
|
||||
cudaGetDeviceCount(&count);
|
||||
for (int i=0; i<count; i++) {
|
||||
cudaDeviceProp devProp;
|
||||
cudaGetDeviceProperties(&devProp, i);
|
||||
int number = devProp.major * 10 + devProp.minor;
|
||||
int found = 0;
|
||||
for (auto v : cuda_archs)
|
||||
if (v==number) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
if (!found) cuda_archs.push_back(number);
|
||||
}
|
||||
LOGi << "Found cuda archs:" << cuda_archs;
|
||||
#endif
|
||||
}
|
||||
|
||||
void init() {
|
||||
// init default_random_engine
|
||||
set_seed(time(0));
|
||||
// init fused op
|
||||
op_registe({"fused","",""});
|
||||
init_cuda_devices();
|
||||
}
|
||||
|
||||
void set_seed(int seed) {
|
||||
|
|
|
@ -44,7 +44,7 @@ jit_op_entry_t load_jit_lib(string name, string symbol_name="jit_entry") {
|
|||
}
|
||||
|
||||
void run_cmd(string cmd, string cwd="") {
|
||||
if (cwd.size()) cmd = "cd "+cwd + " && " + cmd;
|
||||
if (cwd.size()) cmd = "cd '"+cwd + "' && " + cmd;
|
||||
LOGvvv << "Run cmd:" << cmd;
|
||||
system_with_check(cmd.c_str());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. Authors:
|
||||
// Wenyang Zhou <576825820@qq.com>
|
||||
// Dun Liang <randonlang@gmail.com>
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "lock.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
static int lock_fd = -1;
|
||||
|
||||
void set_lock_path(string path) {
|
||||
lock_fd = open(path.c_str(), O_RDWR);
|
||||
ASSERT(lock_fd >= 0);
|
||||
LOGv << "OPEN LOCK path:" << path << "Pid:" << getpid();
|
||||
}
|
||||
|
||||
void lock() {
|
||||
ASSERT(lock_fd >= 0);
|
||||
struct flock lock = {
|
||||
.l_type = F_WRLCK,
|
||||
.l_whence = SEEK_SET,
|
||||
.l_start = 0,
|
||||
.l_len = 0
|
||||
};
|
||||
ASSERT(fcntl(lock_fd, F_SETLKW, &lock) == 0);
|
||||
LOGvv << "LOCK Pid:" << getpid();
|
||||
}
|
||||
|
||||
void unlock() {
|
||||
ASSERT(lock_fd >= 0);
|
||||
struct flock lock = {
|
||||
.l_type = F_UNLCK,
|
||||
.l_whence = SEEK_SET,
|
||||
.l_start = 0,
|
||||
.l_len = 0
|
||||
};
|
||||
ASSERT(fcntl(lock_fd, F_SETLKW, &lock) == 0);
|
||||
LOGvv << "UNLOCK Pid:" << getpid();
|
||||
}
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,26 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. Authors:
|
||||
// Wenyang Zhou <576825820@qq.com>
|
||||
// Dun Liang <randonlang@gmail.com>
|
||||
// All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "common.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
// @pyjt(set_lock_path)
|
||||
void set_lock_path(string path);
|
||||
|
||||
void lock();
|
||||
|
||||
void unlock();
|
||||
|
||||
struct lock_guard {
|
||||
inline lock_guard() { lock(); }
|
||||
inline ~lock_guard() { unlock(); }
|
||||
};
|
||||
|
||||
} // jittor
|
|
@ -61,6 +61,18 @@ static unordered_set<string> unary_ops = {
|
|||
"floor",
|
||||
"ceil",
|
||||
"cast",
|
||||
"sin",
|
||||
"asin",
|
||||
"sinh",
|
||||
"asinh",
|
||||
"tan",
|
||||
"atan",
|
||||
"tanh",
|
||||
"atanh",
|
||||
"cos",
|
||||
"acos",
|
||||
"cosh",
|
||||
"acosh",
|
||||
};
|
||||
|
||||
static unordered_set<string> unary_float_ops = {
|
||||
|
@ -135,6 +147,7 @@ static void init_ns() {
|
|||
#define INIT_NS(T) func(#T, ns_##T);
|
||||
FOR_ALL_NS(INIT_NS);
|
||||
ASSERT(NanoString::__ns_to_string.size()<=(1<<NanoString::_index_nbits));
|
||||
NanoString::__string_to_ns["sum"] = ns_add;
|
||||
LOGvv << "init __string_to_ns" << NanoString::__string_to_ns;
|
||||
LOGvv << "init __ns_to_string" << NanoString::__ns_to_string;
|
||||
}
|
||||
|
|
|
@ -63,6 +63,19 @@ namespace jittor {
|
|||
m(floor) \
|
||||
m(ceil) \
|
||||
m(cast) \
|
||||
\
|
||||
m(sin) \
|
||||
m(asin) \
|
||||
m(sinh) \
|
||||
m(asinh) \
|
||||
m(tan) \
|
||||
m(atan) \
|
||||
m(tanh) \
|
||||
m(atanh) \
|
||||
m(cos) \
|
||||
m(acos) \
|
||||
m(cosh) \
|
||||
m(acosh) \
|
||||
|
||||
struct NanoString;
|
||||
#define DECLEAR_NS(T) extern NanoString ns_##T;
|
||||
|
@ -115,13 +128,14 @@ struct NanoString {
|
|||
inline ns_t is_unary() const { return get(_type, _type_nbits)==_unary; }
|
||||
|
||||
inline NanoString() {}
|
||||
inline NanoString(const NanoString& other) : data(other.data) {}
|
||||
// @pyjt(__init__)
|
||||
inline NanoString(const char* s) {
|
||||
auto iter = __string_to_ns.find(s);
|
||||
ASSERT(iter != __string_to_ns.end()) << s;
|
||||
data = iter->second.data;
|
||||
}
|
||||
// @pyjt(__init__)
|
||||
inline NanoString(const NanoString& other) : data(other.data) {}
|
||||
inline NanoString(const string& s) : NanoString(s.c_str()) {}
|
||||
// @pyjt(__repr__)
|
||||
inline const char* to_cstring() const
|
||||
|
|
|
@ -24,4 +24,6 @@ bool endswith(const string& a, const string& b);
|
|||
// max_split: maximun split number(include)
|
||||
vector<string> split(const string& s, const string& sep, int max_split=0);
|
||||
|
||||
string strip(const string& s);
|
||||
|
||||
} // jittor
|
12
src/node.h
12
src/node.h
|
@ -7,6 +7,7 @@
|
|||
#include "common.h"
|
||||
#include "misc/nano_string.h"
|
||||
#include "misc/nano_vector.h"
|
||||
#include "pybind/py_var_tracer.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
@ -105,8 +106,15 @@ struct Node {
|
|||
list<output_t> _outputs;
|
||||
|
||||
#ifdef NODE_MEMCHECK
|
||||
Node();
|
||||
virtual ~Node();
|
||||
inline Node() {
|
||||
lived_nodes[(void*)this] = lived_nodes.size()+1;
|
||||
registe_node_trace(this);
|
||||
}
|
||||
|
||||
inline virtual ~Node() {
|
||||
lived_nodes.erase((void*)this);
|
||||
unregiste_node_trace(this);
|
||||
}
|
||||
#else
|
||||
inline Node() {};
|
||||
inline virtual ~Node() {};
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#include "misc/str_utils.h"
|
||||
#include "ops/op_register.h"
|
||||
#include "ops/array_op.h"
|
||||
#include "lock.h"
|
||||
#include "opt/expr.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
@ -103,48 +105,6 @@ int OpCompiler::total_member_count() {
|
|||
return member_count;
|
||||
}
|
||||
|
||||
#define FOR_ALL_UOPS(m) \
|
||||
m(!,3) m(~,3)
|
||||
#define FOR_ALL_BOPS(m) \
|
||||
m(*,5) m(/,5) m(%,5) \
|
||||
m(+,6) m(-,6) \
|
||||
m(<<,7) m(>>,7) \
|
||||
m(<,9) m(<=,9) m(>,9) m(>=,9) \
|
||||
m(!=,10) m(==,10) \
|
||||
m(&,11) \
|
||||
m(^,12) \
|
||||
m(|,13) \
|
||||
m(&&,14) \
|
||||
m(||,15)
|
||||
|
||||
#define FOR_ALL_OPS(m) FOR_ALL_UOPS(m) FOR_ALL_BOPS(m)
|
||||
|
||||
inline bool is_unary_op(const string& op) {
|
||||
#define _u(o, _) if (op == #o) return true;
|
||||
FOR_ALL_UOPS(_u);
|
||||
return false;
|
||||
}
|
||||
|
||||
inline int precedence(const string& op) {
|
||||
#define _prior(o, p) if (op == #o) return p;
|
||||
FOR_ALL_OPS(_prior);
|
||||
return 20;
|
||||
}
|
||||
|
||||
inline bool check_precedence(const string& op1, const string& op2) {
|
||||
if (op1 == op2 && is_unary_op(op1)) return false;
|
||||
return precedence(op1) <= precedence(op2);
|
||||
}
|
||||
|
||||
inline int64_t calc_op(int64_t a, int64_t b, const string& op) {
|
||||
#define _calc_b(o, _) if (op == #o) return a o b;
|
||||
FOR_ALL_BOPS(_calc_b);
|
||||
#define _calc_u(o, _) if (op == #o) return o b;
|
||||
FOR_ALL_UOPS(_calc_u);
|
||||
ASSERT(0) << "Unrecognized op " << op;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t OpCompiler::eval(const string& expr, const unordered_map<string,string>& vars) {
|
||||
if (expr.find("@") != string::npos) {
|
||||
string new_expr;
|
||||
|
@ -165,15 +125,36 @@ int64_t OpCompiler::eval(const string& expr, const unordered_map<string,string>&
|
|||
presum++;
|
||||
k++;
|
||||
}
|
||||
ASSERT(presum==0) << "Jit error: braces are not matched.";
|
||||
CHECK(presum==0) << "Jit error: braces are not matched.";
|
||||
new_expr += S(eval(expr.substr(j+1, k-j-2), vars));
|
||||
i = k-1;
|
||||
continue;
|
||||
} else {
|
||||
if (expr[j] == '@') {
|
||||
// syntax @@
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// syntax: @x
|
||||
ASSERT(isvar(expr[j]));
|
||||
CHECK(isvar(expr[j])) << expr[j] << "is not var";
|
||||
size_t k=j+1;
|
||||
while (k<expr.size() && isvar(expr[k])) k++;
|
||||
if (k<expr.size() && expr[k]=='(') {
|
||||
// syntax @xx(...)
|
||||
// ij k l
|
||||
size_t l=k+1;
|
||||
int presum = 1;
|
||||
while (l<expr.size() && presum) {
|
||||
if (expr[l] == ')')
|
||||
presum--;
|
||||
else if (expr[l] == '(')
|
||||
presum++;
|
||||
l++;
|
||||
}
|
||||
new_expr += precompile(vars, expr.substr(i, l-i));
|
||||
i = l-1;
|
||||
continue;
|
||||
}
|
||||
string var = expr.substr(j, k-j);
|
||||
auto iter = vars.find(var);
|
||||
ASSERT(iter!=vars.end()) << "Jit var " << var << " not found." << vars;
|
||||
|
@ -184,68 +165,18 @@ int64_t OpCompiler::eval(const string& expr, const unordered_map<string,string>&
|
|||
}
|
||||
return eval(new_expr, vars);
|
||||
}
|
||||
vector<int64> values = {0};
|
||||
vector<string> ops;
|
||||
auto pop_values_and_calc_op = [&]() {
|
||||
CHECK(ops.size());
|
||||
auto op = ops.back();
|
||||
ops.pop_back();
|
||||
CHECK(values.size());
|
||||
auto val2 = values.back();
|
||||
values.pop_back();
|
||||
auto val1 = val2;
|
||||
if (!is_unary_op(op)) {
|
||||
CHECK(values.size());
|
||||
val1 = values.back();
|
||||
values.pop_back();
|
||||
auto e = expr::make(expr);
|
||||
e->dfs([&](expr::Expr* s) {
|
||||
if (s->is_sym()) {
|
||||
auto iter = vars.find(s->str);
|
||||
ASSERT(iter!=vars.end()) << "Jit var " << s->str << " not found.";
|
||||
auto e = expr::make(iter->second);
|
||||
s->swap(e.get());
|
||||
}
|
||||
values.push_back(calc_op(val1, val2, op));
|
||||
};
|
||||
for (size_t i=0; i<expr.size(); i++) {
|
||||
if (expr[i] == ' ')
|
||||
continue;
|
||||
if (expr[i] == '(')
|
||||
ops.push_back(string()+expr[i]);
|
||||
else if (isdigit(expr[i])) {
|
||||
int64_t val = 0;
|
||||
while (i<expr.length() && isdigit(expr[i])) {
|
||||
val = val*10 + (expr[i]-'0');
|
||||
i++;
|
||||
}
|
||||
i--;
|
||||
values.push_back(val);
|
||||
} else if (isvar(expr[i])) {
|
||||
auto j=i+1;
|
||||
while (j<expr.size() && isvar(expr[j])) j++;
|
||||
auto var_name = expr.substr(i,j-i);
|
||||
auto iter = vars.find(var_name);
|
||||
ASSERT(iter!=vars.end()) << "Jit var " << var_name << " not found.";
|
||||
try {
|
||||
values.push_back(std::stoll(iter->second));
|
||||
} catch (...) {
|
||||
ASSERT(0) << "'" << iter->second << "' is not integer, expr " << expr;
|
||||
}
|
||||
i = j-1;
|
||||
} else if (expr[i] == ')') {
|
||||
while (ops.size() && ops.back() != "(")
|
||||
pop_values_and_calc_op();
|
||||
ops.pop_back();
|
||||
} else {
|
||||
auto j=i+1;
|
||||
while (j<expr.size() && expr[j] != ' ' &&
|
||||
expr[j] != '!' && expr[j] != '~' &&
|
||||
!isdigit(expr[j]) && !isvar(expr[j]) &&
|
||||
expr[j] != '(' && expr[j] != ')') j++;
|
||||
auto op = expr.substr(i, j-i);
|
||||
while (ops.size() && check_precedence(ops.back(), op))
|
||||
pop_values_and_calc_op();
|
||||
ops.push_back(op);
|
||||
i = j-1;
|
||||
}
|
||||
}
|
||||
while (ops.size())
|
||||
pop_values_and_calc_op();
|
||||
return values.back();
|
||||
});
|
||||
e = e->eval();
|
||||
ASSERT(e->is(expr::_int));
|
||||
return e->as_int();
|
||||
}
|
||||
|
||||
void load_macros(const string& src, unordered_map<string,string>& macros) {
|
||||
|
@ -274,7 +205,9 @@ void load_macros(const string& src, unordered_map<string,string>& macros) {
|
|||
auto r=k;
|
||||
while (r<l && src[r] != '(') r++;
|
||||
auto body = q>p ? src.substr(p,q-p) : "";
|
||||
body = (r<l?src.substr(r,l-r):"()") + body;
|
||||
auto args = "<"+ (r+1<l?src.substr(r+1,l-r-2):"") + ">";
|
||||
// header <args>body
|
||||
body = args + body;
|
||||
auto header = src.substr(k,r-k);
|
||||
LOGvvvv << "header:" << header << "body:" << body;
|
||||
macros[header] = body;
|
||||
|
@ -285,9 +218,13 @@ void load_macros(const string& src, unordered_map<string,string>& macros) {
|
|||
|
||||
void expand_macro(const string& macro, const vector<string>& args, string& new_src) {
|
||||
LOGvvvv << "expand_macro" << macro << "args:" << args;
|
||||
auto i = macro.find(")");
|
||||
if (macro.size() == 0 || macro[0] != '<') {
|
||||
new_src += macro;
|
||||
return;
|
||||
}
|
||||
auto i = macro.find(">");
|
||||
ASSERT(i != string::npos);
|
||||
// (a1, a2, ...)body
|
||||
// <a1, a2, ...>body
|
||||
// j k i
|
||||
unordered_map<string, int> args_map;
|
||||
for (uint j=1, l=0; j<i; l++) {
|
||||
|
@ -447,7 +384,7 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
presum++;
|
||||
k++;
|
||||
}
|
||||
ASSERT(presum==0) << "Jit error: braces are not matched.";
|
||||
CHECK(presum==0) << "Jit error: braces are not matched.";
|
||||
new_src += S(OpCompiler::eval(src.substr(j+1, k-j-2), defs));
|
||||
i = k-1;
|
||||
continue;
|
||||
|
@ -463,7 +400,7 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
presum++;
|
||||
k++;
|
||||
}
|
||||
ASSERT(presum==0) << "Jit error: braces are not matched.";
|
||||
CHECK(presum==0) << "Jit error: braces are not matched.";
|
||||
new_src += precompile(defs, src.substr(j+1, k-j-2), macros);
|
||||
i = k-1;
|
||||
continue;
|
||||
|
@ -488,7 +425,7 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
comma.push_back(l);
|
||||
l++;
|
||||
}
|
||||
ASSERT(presum==0) << "Jit error: braces are not matched.";
|
||||
CHECK(presum==0) << "Jit error: braces are not matched.";
|
||||
comma.push_back(l-1);
|
||||
for (uint i=0; i+1<comma.size(); i++)
|
||||
args.push_back(src.substr(comma[i]+1, comma[i+1]-comma[i]-1));
|
||||
|
@ -587,27 +524,89 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
i = l-1;
|
||||
continue;
|
||||
} else
|
||||
if (expr == "strcmp") {
|
||||
// syntax: @strcmp(s1,s2)
|
||||
// ij k l
|
||||
CHECK(args.size()==2u)
|
||||
<< "Jit error: strcmp wrong arguments.";
|
||||
auto s1 = precompile(defs, args[0], macros);
|
||||
auto s2 = precompile(defs, args[1], macros);
|
||||
if (s1<s2) new_src += "-1"; else
|
||||
if (s1==s2) new_src += "0"; else
|
||||
new_src += "1";
|
||||
i = l-1;
|
||||
continue;
|
||||
} else
|
||||
if (expr == "alias") {
|
||||
// syntax: @alias(s1,s2)
|
||||
// ij k l
|
||||
|
||||
// alias(a,b)
|
||||
// a->b
|
||||
// a_type->b_type
|
||||
// a_dim -> b_dim
|
||||
// for i in a_dim:
|
||||
// a_shapei -> b_shapei
|
||||
// a_stridei -> b_stridei
|
||||
CHECK(args.size()==2u)
|
||||
<< "Jit error: alias wrong arguments.";
|
||||
auto key = strip(precompile(defs, args[0], macros));
|
||||
auto value = strip(precompile(defs, args[1], macros));
|
||||
CHECK(defs.count(value+"_dim")) << '"' >> value >> '"' << "not exsit";
|
||||
int dim = std::stoi(defs.at(value+"_dim"));
|
||||
vector<string> keys = {"", "p", "dim", "type"};
|
||||
for (int i=0; i<dim; i++) {
|
||||
keys.push_back("stride"+S(i));
|
||||
keys.push_back("shape"+S(i));
|
||||
}
|
||||
new_src += '\n';
|
||||
for (auto& s : keys) {
|
||||
string from = value+"_"+s;
|
||||
string to = key+"_"+s;
|
||||
if (!s.size()) {
|
||||
from = value;
|
||||
to = key;
|
||||
}
|
||||
if (defs.count(from))
|
||||
from = defs.at(from);
|
||||
else if (macros.count(from))
|
||||
from = macros.at(from);
|
||||
defs[to] = from;
|
||||
macros[to] = from;
|
||||
new_src += "#define "+to+" "+from+"\n";
|
||||
}
|
||||
i = l-1;
|
||||
continue;
|
||||
} else
|
||||
if (args.size()) {
|
||||
// syntax: @e0(i0,i1,...,in) -> e0p[i0*e0stride0+i1*e0stride1+...]
|
||||
// syntax: @e0(i0,i1,...,in) -> e0_p[i0*e0_stride0+i1*e0_stride1+...]
|
||||
ASSERT(expr.size());
|
||||
|
||||
int nid=(int)expr.size();
|
||||
while (nid && isdigit(expr[nid-1])) nid--;
|
||||
// xyz123 ---> prefix: xxx; suffix: 123
|
||||
string prefix = expr.substr(0, nid);
|
||||
string suffix = expr.substr(nid);
|
||||
string up_prefix = prefix;
|
||||
for (auto& c : up_prefix)
|
||||
if (c>='a' && c<='z') c = c-'a'+'A';
|
||||
string dim = up_prefix + "DIM" + suffix;
|
||||
if (prefix == "e") prefix = "extras";
|
||||
ASSERT(defs.count(dim)) << dim;
|
||||
ASSERTop(defs.at(dim),==,S(args.size()));
|
||||
expr = prefix + suffix; // e0 ->extras0
|
||||
string dim;
|
||||
if (expr == "x" && defs.count("XDIM")) {
|
||||
dim = "XDIM";
|
||||
prefix = "x";
|
||||
} else
|
||||
if (prefix == "e") {
|
||||
// TODO: unify interface
|
||||
prefix = "extras" + suffix;
|
||||
dim = "EDIM" + suffix;
|
||||
} else {
|
||||
prefix = expr+"_";
|
||||
dim = prefix + "dim";
|
||||
}
|
||||
CHECK(macros.count(dim)) << expr << "not exsit" << macros;
|
||||
CHECKop(macros.at(dim),==,S(args.size())) << expr << "dimension not matched";
|
||||
std::stringstream ss;
|
||||
ss << expr << "p[";
|
||||
ss << prefix << "p[";
|
||||
for (uint ii=0; ii<args.size(); ii++) {
|
||||
string arg = precompile(defs, args[ii], macros);
|
||||
if (ii) ss << "+";
|
||||
ss << '(' << arg << ")*" << expr << "stride" << ii;
|
||||
ss << '(' << arg << ")*" << prefix << "stride" << ii;
|
||||
}
|
||||
ss << ']';
|
||||
new_src += ss.str();
|
||||
|
@ -629,10 +628,10 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
} else
|
||||
new_src += src[i];
|
||||
} catch (std::exception& e) {
|
||||
uint il = i, ir = i;
|
||||
while (il && src[il] != '\n') il--;
|
||||
while (ir<src.size() && src[ir] != '\n') ir++;
|
||||
string this_line = src.substr(il+1, ir-il-1);
|
||||
int il = i, ir = i;
|
||||
while (il>0 && src[il-1] != '\n') il--;
|
||||
while (ir+1<src.size() && src[ir+1] != '\n') ir++;
|
||||
string this_line = src.substr(il, ir-il+1);
|
||||
LOGf << e.what() >> "\nJit compiler error:\n" >> this_line;
|
||||
}
|
||||
}
|
||||
|
@ -640,7 +639,7 @@ string precompile(unordered_map<string,string> defs, string src, unordered_map<s
|
|||
}
|
||||
|
||||
string OpCompiler::precompile(const unordered_map<string,string>& defs, const string& src) {
|
||||
unordered_map<string, string> macros;
|
||||
unordered_map<string, string> macros = defs;
|
||||
return jittor::precompile(defs, src, macros);
|
||||
}
|
||||
|
||||
|
@ -661,6 +660,10 @@ string OpCompiler::get_jit_src(Op* op) {
|
|||
string after_include_src = "";
|
||||
auto jit_define = op->get_jit_define();
|
||||
for (auto &t : jit_define) {
|
||||
// don't add CODE in define
|
||||
// this allowed comment exsit in CODE
|
||||
if (t.first == "CODE" || t.first == "HEADER")
|
||||
continue;
|
||||
string src = "#define " + t.first + " ";
|
||||
for (char c : t.second) {
|
||||
if (c=='\n') src += '\\';
|
||||
|
@ -672,7 +675,7 @@ string OpCompiler::get_jit_src(Op* op) {
|
|||
else
|
||||
after_include_src += src;
|
||||
}
|
||||
ASSERT(file_exist(src_path));
|
||||
ASSERT(file_exist(src_path)) << src_path;
|
||||
LOGvvv << "Read from" << src_path;
|
||||
string src = read_all(src_path);
|
||||
ASSERT(src.size()) << "Source read failed:" << src_path;
|
||||
|
@ -859,7 +862,7 @@ string OpCompiler::__get_fused_src(
|
|||
presum++;
|
||||
k++;
|
||||
}
|
||||
ASSERT(presum==0) << "Jit error: braces are not matched.";
|
||||
CHECK(presum==0) << "Jit error: braces are not matched.";
|
||||
for (;j < k-2; j++) {
|
||||
if (isvar(src[j])) {
|
||||
uint l=j;
|
||||
|
@ -945,6 +948,7 @@ jit_op_entry_t OpCompiler::compile(const string& jit_key, const string& src) {
|
|||
}
|
||||
|
||||
jit_op_entry_t OpCompiler::do_compile(Op* op) {
|
||||
jittor::lock_guard lg;
|
||||
OpCompiler oc(op);
|
||||
string* src = &oc.src;
|
||||
string src_after_passes;
|
||||
|
@ -954,8 +958,8 @@ jit_op_entry_t OpCompiler::do_compile(Op* op) {
|
|||
src_after_passes = tm.tune();
|
||||
src = &src_after_passes;
|
||||
}
|
||||
return oc.compile(op->get_jit_key(), *src);
|
||||
auto ret = oc.compile(op->get_jit_key(), *src);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -13,6 +13,8 @@
|
|||
namespace jittor {
|
||||
|
||||
#ifndef JIT
|
||||
static auto make_array = get_op_info("array")
|
||||
.get_constructor<VarPtr, const void*, NanoVector, NanoString>();
|
||||
static auto make_broadcast_to = get_op_info("broadcast_to")
|
||||
.get_constructor<VarPtr, Var*, Var*, NanoVector>();
|
||||
static auto make_binary = get_op_info("binary")
|
||||
|
@ -122,7 +124,9 @@ VarPtr BinaryOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
|||
if (v_index == 0) {
|
||||
// dout * y * x^(y-1)
|
||||
auto d = make_binary(dout, y, ns_multiply);
|
||||
auto ones = make_number(1, dout);
|
||||
// auto ones = make_number(1, dout);
|
||||
int number = 1;
|
||||
auto ones = make_array(&number, 1, ns_int32);
|
||||
auto y_1 = make_binary(y, ones, ns_subtract);
|
||||
auto x_y_1 = make_binary(x, y_1, ns_pow);
|
||||
return make_binary(d, x_y_1, ns_multiply);
|
||||
|
|
|
@ -16,50 +16,100 @@ namespace jittor {
|
|||
static auto make_code = get_op_info("code")
|
||||
.get_constructor<VarPtr, NanoVector, NanoString, vector<Var*>&&, string&&, vector<string>&&, string&&, string&&, vector<string>&&, string&&>();
|
||||
|
||||
static inline void check_vary_shape(NanoVector v) {
|
||||
ASSERT(v.size()) << "Vary shape should not be zero dimension";
|
||||
for (int i=0; i<v.size(); i++)
|
||||
ASSERT((i == 0) ^ (v[0] >= 0))
|
||||
<< "Vary shape should only occur in the first dimension:" << v;
|
||||
}
|
||||
|
||||
CodeOp::CodeOp(NanoVector shape, NanoString dtype, vector<Var*>&& inputs,
|
||||
string&& cpu_src, vector<string>&& cpu_grad_src, string&& cpu_header,
|
||||
string&& cuda_src, vector<string>&& cuda_grad_src, string&& cuda_header)
|
||||
: in(inputs), cpu_src(move(cpu_src)), cpu_grad_src(move(cpu_grad_src)), cpu_header(move(cpu_header)),
|
||||
: _inputs(inputs), cpu_src(move(cpu_src)), cpu_grad_src(move(cpu_grad_src)), cpu_header(move(cpu_header)),
|
||||
cuda_src(move(cuda_src)), cuda_grad_src(move(cuda_grad_src)), cuda_header(move(cuda_header))
|
||||
{
|
||||
flags.set(NodeFlags::_cpu, !!this->cpu_src.size());
|
||||
flags.set(NodeFlags::_cuda, !!this->cuda_src.size());
|
||||
out = create_output(shape, dtype);
|
||||
ASSERTop(inputs.size(),<=,10);
|
||||
_outputs.push_back(create_output(shape, dtype));
|
||||
CHECKop(_inputs.size(),<=,10);
|
||||
|
||||
if (_outputs[0]->num < 0) {
|
||||
flags.set(NodeFlags::_vary_shape);
|
||||
check_vary_shape(_outputs[0]->shape);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CodeOp::CodeOp(
|
||||
vector<NanoVector>&& shapes, vector<NanoString>&& dtypes, vector<Var*>&& inputs,
|
||||
string&& cpu_src, vector<string>&& cpu_grad_src, string&& cpu_header,
|
||||
string&& cuda_src, vector<string>&& cuda_grad_src, string&& cuda_header)
|
||||
: _inputs(inputs), cpu_src(move(cpu_src)), cpu_grad_src(move(cpu_grad_src)), cpu_header(move(cpu_header)),
|
||||
cuda_src(move(cuda_src)), cuda_grad_src(move(cuda_grad_src)), cuda_header(move(cuda_header))
|
||||
{
|
||||
flags.set(NodeFlags::_cpu, !!this->cpu_src.size());
|
||||
flags.set(NodeFlags::_cuda, !!this->cuda_src.size());
|
||||
CHECKop(shapes.size(),==,dtypes.size()) << "Number of outputs' shapes and dtypes should be the same";
|
||||
_outputs.resize(shapes.size());
|
||||
CHECKop(_inputs.size(),<=,10);
|
||||
CHECKop(_outputs.size(),<=,10);
|
||||
CHECKop(_outputs.size(),>,0);
|
||||
for (int i=0; i<shapes.size(); i++) {
|
||||
_outputs[i] = create_output(shapes[i], dtypes[i]);
|
||||
if (_outputs[i]->num < 0) {
|
||||
flags.set(NodeFlags::_vary_shape);
|
||||
check_vary_shape(_outputs[i]->shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
VarPtr CodeOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
||||
// Do not have grad to extras input
|
||||
string cpu_src = v_index < cpu_grad_src.size() ? cpu_grad_src[v_index] : "";
|
||||
string cuda_src = v_index < cuda_grad_src.size() ? cuda_grad_src[v_index] : "";
|
||||
if (!cuda_src.size() && !cpu_src.size()) return nullptr;
|
||||
auto inputs = clone(in);
|
||||
inputs.push_back(out);
|
||||
auto inputs = clone(_inputs);
|
||||
// TODO: remove unused deps
|
||||
// dout -> dout
|
||||
std::stringstream new_alias;
|
||||
new_alias << "\n@alias(dout,in" << inputs.size() << ")\n";
|
||||
inputs.push_back(dout);
|
||||
// _outputs[i] -> poutj
|
||||
for (int i=0; i<_outputs.size(); i++) {
|
||||
new_alias << "\n@alias(pout" << i << ",in" << inputs.size() << ")\n";
|
||||
if (_outputs[i] == out)
|
||||
new_alias << "\n@alias(pout,in" << inputs.size() << ")\n";
|
||||
inputs.push_back(_outputs[i]);
|
||||
}
|
||||
auto alias = new_alias.str();
|
||||
return make_code(
|
||||
in[v_index]->shape,
|
||||
in[v_index]->dtype(),
|
||||
_inputs[v_index]->shape,
|
||||
_inputs[v_index]->dtype(),
|
||||
move(inputs),
|
||||
move(cpu_src), {}, clone(cpu_header),
|
||||
move(cuda_src), {}, clone(cuda_header)
|
||||
move(cpu_src), {}, alias+cpu_header,
|
||||
move(cuda_src), {}, alias+cuda_header
|
||||
);
|
||||
}
|
||||
|
||||
void CodeOp::jit_prepare() {
|
||||
add_jit_define("Tout", out->dtype());
|
||||
add_jit_define("OUTDIM", JK::hex1(out->shape.size()));
|
||||
if (in.size()>=2) {
|
||||
auto pout = in.rbegin()[1];
|
||||
auto dout = in.rbegin()[0];
|
||||
add_jit_define("Tpout", pout->dtype());
|
||||
add_jit_define("POUTDIM", JK::hex1(pout->shape.size()));
|
||||
add_jit_define("Tdout", dout->dtype());
|
||||
add_jit_define("DOUTDIM", JK::hex1(dout->shape.size()));
|
||||
|
||||
// forward: in0 in1 in2 -> out0 out1
|
||||
// backward: in0 in1 in2 in3(pout0) in4(pout1)
|
||||
add_jit_define("IN_SIZE", JK::hex1(_inputs.size()));
|
||||
for (uint i=0; i<_inputs.size(); i++) {
|
||||
jk << JK::key << "in" << JK::hex1(i) << "_dim" <<
|
||||
JK::val << JK::hex1(_inputs[i]->shape.size()) << JK::end;
|
||||
jk << JK::key << "in" << JK::hex1(i) << "_type" <<
|
||||
JK::val << _inputs[i]->dtype() << JK::end;
|
||||
}
|
||||
add_jit_define("INSIZE", JK::hex1(in.size()));
|
||||
for (uint i=0; i<in.size(); i++) {
|
||||
add_jit_define("INDIM", JK::hex1(i), JK::hex1(in[i]->shape.size()));
|
||||
add_jit_define("Tin", JK::hex1(i), in[i]->dtype());
|
||||
add_jit_define("OUT_SIZE", JK::hex1(_outputs.size()));
|
||||
for (uint i=0; i<_outputs.size(); i++) {
|
||||
jk << JK::key << "out" << JK::hex1(i) << "_dim" <<
|
||||
JK::val << JK::hex1(_outputs[i]->shape.size()) << JK::end;
|
||||
jk << JK::key << "out" << JK::hex1(i) << "_type" <<
|
||||
JK::val << _outputs[i]->dtype() << JK::end;
|
||||
}
|
||||
if (flags.get(NodeFlags::_cuda)) {
|
||||
jk << JK::key << "HEADER" << JK::val << cuda_header;
|
||||
|
@ -90,7 +140,8 @@ void CodeOp::jit_prepare() {
|
|||
jk << JK::end;
|
||||
} else {
|
||||
add_jit_define("HEADER", cpu_header);
|
||||
add_jit_define("CODE", cpu_src);
|
||||
jk << JK::key << "CODE" << JK::val;
|
||||
jk << cpu_src << JK::end;
|
||||
ASSERT(cpu_src.size());
|
||||
}
|
||||
}
|
||||
|
@ -101,49 +152,47 @@ void CodeOp::jit_prepare() {
|
|||
|
||||
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||
|
||||
@for(i, 0, INSIZE,
|
||||
@define(in@i@@stride@{INDIM@i-1},1)
|
||||
@for(i, 0, IN_SIZE,
|
||||
@define(in@i@@_stride@{in@i@@_dim-1},1)
|
||||
)
|
||||
@for(i, 0, OUT_SIZE,
|
||||
@define(out@i@@_stride@{out@i@@_dim-1},1)
|
||||
)
|
||||
@define(outstride@{OUTDIM-1},1)
|
||||
@if(INSIZE>=2,
|
||||
@define(poutstride@{POUTDIM-1},1)
|
||||
@define(doutstride@{DOUTDIM-1},1)
|
||||
,)
|
||||
|
||||
@define(ARGS_DEF,
|
||||
@for(i, 0, INSIZE, @(
|
||||
Tin@i* __restrict__ in@i@@p,
|
||||
@for(j, 0, INDIM@i, @(index_t in@i@@shape@j,))
|
||||
@for(i, 0, IN_SIZE, @(
|
||||
in@i@@_type* __restrict__ in@i@@_p,
|
||||
@for(j, 0, in@i@@_dim, @(index_t in@i@@_shape@j,))
|
||||
))
|
||||
@for(i, 0, OUTDIM, @(index_t outshape@i,))
|
||||
Tout* __restrict__ outp
|
||||
@for(i, 0, OUT_SIZE, @(
|
||||
out@i@@_type* __restrict__ out@i@@_p,
|
||||
@for(j, 0, out@i@@_dim, @(index_t out@i@@_shape@j,))
|
||||
))
|
||||
int __tmp
|
||||
)
|
||||
|
||||
@define(ARGS,
|
||||
@for(i, 0, INSIZE, @(
|
||||
in@i@@p,
|
||||
@for(j, 0, INDIM@i, @(in@i@@shape@j,))
|
||||
@for(i, 0, IN_SIZE, @(
|
||||
in@i@@_p,
|
||||
@for(j, 0, in@i@@_dim, @(in@i@@_shape@j,))
|
||||
))
|
||||
@for(i, 0, OUTDIM, @(outshape@i,))
|
||||
outp
|
||||
@for(i, 0, OUT_SIZE, @(
|
||||
out@i@@_p,
|
||||
@for(j, 0, out@i@@_dim, @(out@i@@_shape@j,))
|
||||
))
|
||||
0
|
||||
)
|
||||
|
||||
@define(PRECALC,
|
||||
@for(i, 0, INSIZE,
|
||||
@for(j, INDIM@i-2, -1, -1, auto in@i@@stride@j = in@i@@stride@{j+1} * in@i@@shape@{j+1};)
|
||||
@for(i, 0, IN_SIZE,
|
||||
@for(j, in@i@@_dim-2, -1, -1, auto in@i@@_stride@j = in@i@@_stride@{j+1} * in@i@@_shape@{j+1};)
|
||||
)
|
||||
@for(i, 0, OUT_SIZE,
|
||||
@for(j, out@i@@_dim-2, -1, -1, auto out@i@@_stride@j = out@i@@_stride@{j+1} * out@i@@_shape@{j+1};)
|
||||
)
|
||||
@for(i, OUTDIM-2, -1, -1, auto outstride@i = outstride@{i+1} * outshape@{i+1};)
|
||||
@if(INSIZE>=2,
|
||||
auto* __restrict__ poutp = in@{INSIZE-2}@@p;
|
||||
@for(i, 0, POUTDIM, index_t poutshape@i = in@{INSIZE-2}@@shape@i;)
|
||||
@for(i, POUTDIM-2, -1, -1, auto poutstride@i = in@{INSIZE-2}@@stride@i;)
|
||||
|
||||
auto* __restrict__ doutp = in@{INSIZE-1}@@p;
|
||||
@for(i, 0, DOUTDIM, index_t doutshape@i = in@{INSIZE-1}@@shape@i;)
|
||||
@for(i, DOUTDIM-2, -1, -1, auto doutstride@i = in@{INSIZE-1}@@stride@i;)
|
||||
,)
|
||||
)
|
||||
|
||||
@alias(out, out0)
|
||||
|
||||
@HEADER
|
||||
|
||||
|
@ -151,14 +200,17 @@ namespace jittor {
|
|||
|
||||
void CodeOp::jit_run() {
|
||||
// define inputs
|
||||
@for(i, 0, INSIZE,
|
||||
auto in@i = in[@i];
|
||||
auto* __restrict__ in@i@@p = in[@i]->ptr<Tin@i>();
|
||||
@for(j, 0, INDIM@i, index_t in@i@@shape@j = in[@i]->shape[@j];)
|
||||
@for(i, 0, IN_SIZE,
|
||||
auto in@i = _inputs[@i];
|
||||
auto* __restrict__ in@i@@_p = _inputs[@i]->ptr<in@i@@_type>();
|
||||
@for(j, 0, in@i@@_dim, index_t in@i@@_shape@j = _inputs[@i]->shape[@j];)
|
||||
)
|
||||
// define outputs
|
||||
@for(i, 0, OUT_SIZE,
|
||||
auto out@i = _outputs[@i];
|
||||
auto* __restrict__ out@i@@_p = _outputs[@i]->ptr<out@i@@_type>();
|
||||
@for(j, 0, out@i@@_dim, index_t out@i@@_shape@j = _outputs[@i]->shape[@j];)
|
||||
)
|
||||
// define out
|
||||
auto* __restrict__ outp = out->ptr<Tout>();
|
||||
@for(i, 0, OUTDIM, index_t outshape@i = out->shape[@i];)
|
||||
|
||||
@PRECALC
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
namespace jittor {
|
||||
|
||||
struct CodeOp : Op {
|
||||
vector<Var*> in;
|
||||
Var* out;
|
||||
vector<Var*> _inputs;
|
||||
vector<Var*> _outputs;
|
||||
string cpu_src;
|
||||
vector<string> cpu_grad_src;
|
||||
string cpu_header;
|
||||
|
@ -29,16 +29,19 @@ struct CodeOp : Op {
|
|||
@param[in] inputs A list of input jittor Vars
|
||||
|
||||
@param[in] cpu_src cpu source code string, buildin value:
|
||||
* in{x}, in{x}shape{y}, in{x}stride{y}, Tin{x}, in{x}p, @in0(...)
|
||||
* out, outshape{y}, outstride{y}, Tout, outp, @out(...)
|
||||
* in{x}, in{x}_shape{y}, in{x}_stride{y}, in{x}_type, in{x}_p, @in0(...)
|
||||
* out{x}, out{x}_shape{y}, out{x}_stride{y}, out{x}_type, out{x}_p, @out0(...)
|
||||
* out, out_shape{y}, out_stride{y}, out_type, out_p, @out(...)
|
||||
|
||||
@param[in] cpu_grad_src A list of string,
|
||||
cpu source code string for gradient, represents gradiant
|
||||
for each inputm buildin value, buildin value:
|
||||
* in{x}, in{x}shape{y}, in{x}stride{y}, Tin{x}, in{x}p, @in0(...)
|
||||
* out, outshape{y}, outstride{y}, Tout, outp, @out(...)
|
||||
* pout, poutshape{y}, poutstride{y}, Tpout, poutp, @pout(...)
|
||||
* dout, doutshape{y}, doutstride{y}, Tdout, doutp, @dout(...)
|
||||
* in{x}, in{x}_shape{y}, in{x}_stride{y}, in{x}_type, in{x}_p, @in0(...)
|
||||
* out{x}, out{x}_shape{y}, out{x}_stride{y}, out{x}_type, out{x}_p, @out0(...)
|
||||
* out, out_shape{y}, out_stride{y}, out_type, out_p, @out(...)
|
||||
* pout{x}, pout{x}_shape{y}, pout{x}_stride{y}, pout{x}_type, pout{x}_p, @pout{x}(...)
|
||||
* pout, pout_shape{y}, pout_stride{y}, pout_type, pout_p, @pout(...)
|
||||
* dout, dout_shape{y}, dout_stride{y}, dout_type, dout_p, @dout(...)
|
||||
|
||||
@param[in] cpu_header cpu header code string.
|
||||
|
||||
|
@ -47,25 +50,96 @@ struct CodeOp : Op {
|
|||
@param[in] cuda_grad_src A list of string.
|
||||
|
||||
@param[in] cuda_header cuda header code string.
|
||||
|
||||
|
||||
----------------
|
||||
|
||||
Example
|
||||
Example-1:
|
||||
|
||||
```
|
||||
a = jt.random([10])
|
||||
b = jt.code(a.shape, a.dtype, [a],
|
||||
b = jt.code(a.shape, "float32", [a],
|
||||
cpu_src='''
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @in0(i)*@in0(i)*2;
|
||||
''',
|
||||
cpu_grad_src = ['''
|
||||
for (int i=0; i<in0shape0; i++)
|
||||
for (int i=0; i<in0_shape0; i++)
|
||||
@out(i) = @dout(i)*@in0(i)*4;
|
||||
'''])
|
||||
```
|
||||
|
||||
Example2(CUDA):
|
||||
Example-2:
|
||||
```
|
||||
a = jt.array([3,2,1])
|
||||
b = jt.code(a.shape, a.dtype, [a],
|
||||
cpu_header="""
|
||||
#include <algorithm>
|
||||
@alias(a, in0)
|
||||
@alias(b, out)
|
||||
"""",
|
||||
cpu_src="""
|
||||
for (int i=0; i<a_shape0; i++)
|
||||
@b(i) = @a(i);
|
||||
std::sort(&@b(0), &@b(in0_shape0));
|
||||
"""
|
||||
)
|
||||
assert (b.data==[1,2,3]).all()
|
||||
```
|
||||
|
||||
Example-3:
|
||||
This example shows how to set multiple outputs in code op.
|
||||
```
|
||||
a = jt.array([3,2,1])
|
||||
b,c = jt.code([(1,), (1,)], [a.dtype, a.dtype], [a],
|
||||
cpu_header="""
|
||||
#include <iostream>
|
||||
using namespace std;
|
||||
""",
|
||||
cpu_src="""
|
||||
@alias(a, in0)
|
||||
@alias(b, out0)
|
||||
@alias(c, out1)
|
||||
@b(0) = @c(0) = @a(0);
|
||||
for (int i=0; i<a_shape0; i++) {
|
||||
@b(0) = std::min(@b(0), @a(i));
|
||||
@c(0) = std::max(@c(0), @a(i));
|
||||
}
|
||||
cout << "min:" << @b(0) << " max:" << @c(0) << endl;
|
||||
"""
|
||||
)
|
||||
assert b.data == 1, b
|
||||
assert c.data == 3, c
|
||||
```
|
||||
|
||||
Example-4:
|
||||
This example shows how to use dynamic shape of jittor variables.
|
||||
```
|
||||
a = jt.array([5,-4,3,-2,1])
|
||||
|
||||
# negtive shape for max size of vary dimension
|
||||
b,c = jt.code([(-5,), (-5,)], [a.dtype, a.dtype], [a],
|
||||
cpu_src="""
|
||||
@alias(a, in0)
|
||||
@alias(b, out0)
|
||||
@alias(c, out1)
|
||||
int num_b=0, num_c=0;
|
||||
for (int i=0; i<a_shape0; i++) {
|
||||
if (@a(i)>0)
|
||||
@b(num_b++) = @a(i);
|
||||
else
|
||||
@c(num_c++) = @a(i);
|
||||
}
|
||||
b->set_shape({num_b});
|
||||
c->set_shape({num_c});
|
||||
"""
|
||||
)
|
||||
assert (b.data == [5,3,1]).all()
|
||||
assert (c.data == [-4,-2]).all()
|
||||
```
|
||||
|
||||
|
||||
CUDA Example-1:
|
||||
This example shows how to use CUDA in code op.
|
||||
```
|
||||
a = jt.random([100000])
|
||||
b = jt.random([100000])
|
||||
|
@ -75,33 +149,34 @@ struct CodeOp : Op {
|
|||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @in0(i)*@in1(i);
|
||||
}
|
||||
kernel1<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel1<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
''',
|
||||
cuda_grad_src = ['''
|
||||
__global__ static void kernel2(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @dout(i)*@in1(i);
|
||||
}
|
||||
kernel2<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel2<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
''', '''
|
||||
__global__ static void kernel3(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (; i<in0shape0; i+=stride)
|
||||
for (; i<in0_shape0; i+=stride)
|
||||
@out(i) = @dout(i)*@in0(i);
|
||||
}
|
||||
kernel3<<<(in0shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
kernel3<<<(in0_shape0-1)/1024+1, 1024>>>(@ARGS);
|
||||
'''])
|
||||
```
|
||||
|
||||
Example3(CUDA):
|
||||
CUDA Example-2:
|
||||
This example shows how to use multi dimension data with CUDA.
|
||||
```
|
||||
a = jt.random((100,100))
|
||||
b = jt.random((100,100))
|
||||
|
@ -109,8 +184,8 @@ struct CodeOp : Op {
|
|||
cuda_src='''
|
||||
__global__ static void kernel1(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @in0(i,j)*@in1(i,j);
|
||||
}
|
||||
kernel1<<<32, 32>>>(@ARGS);
|
||||
|
@ -118,16 +193,16 @@ struct CodeOp : Op {
|
|||
cuda_grad_src = ['''
|
||||
__global__ static void kernel2(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @dout(i,j)*@in1(i,j);
|
||||
}
|
||||
kernel2<<<32, 32>>>(@ARGS);
|
||||
''', '''
|
||||
__global__ static void kernel3(@ARGS_DEF) {
|
||||
@PRECALC
|
||||
for (int i=blockIdx.x; i<in0shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0shape1; j+=blockDim.x)
|
||||
for (int i=blockIdx.x; i<in0_shape0; i+=gridDim.x)
|
||||
for (int j=threadIdx.x; j<in0_shape1; j+=blockDim.x)
|
||||
@out(i,j) = @dout(i,j)*@in0(i,j);
|
||||
}
|
||||
kernel3<<<32, 32>>>(@ARGS);
|
||||
|
@ -136,6 +211,9 @@ struct CodeOp : Op {
|
|||
*/
|
||||
CodeOp(NanoVector shape, NanoString dtype, vector<Var*>&& inputs={}, string&& cpu_src="", vector<string>&& cpu_grad_src={}, string&& cpu_header="", string&& cuda_src="", vector<string>&& cuda_grad_src={}, string&& cuda_header="");
|
||||
|
||||
// @attrs(multiple_outputs)
|
||||
CodeOp(vector<NanoVector>&& shapes, vector<NanoString>&& dtypes, vector<Var*>&& inputs={}, string&& cpu_src="", vector<string>&& cpu_grad_src={}, string&& cpu_header="", string&& cuda_src="", vector<string>&& cuda_grad_src={}, string&& cuda_header="");
|
||||
|
||||
const char* name() const override { return "code"; }
|
||||
VarPtr grad(Var* out, Var* dout, Var* v, int v_index) override;
|
||||
DECLARE_jit_run;
|
||||
|
|
|
@ -49,6 +49,24 @@ static unordered_set<string> unary_ops = {
|
|||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"sin",
|
||||
// @pybind(asin, arcsin)
|
||||
"asin",
|
||||
"sinh",
|
||||
// @pybind(asinh, arcsinh)
|
||||
"asinh",
|
||||
"tan",
|
||||
// @pybind(atan, arctan)
|
||||
"atan",
|
||||
"tanh",
|
||||
// @pybind(atanh, arctanh)
|
||||
"atanh",
|
||||
"cos",
|
||||
// @pybind(acos, arccos)
|
||||
"acos",
|
||||
"cosh",
|
||||
// @pybind(acosh, arccosh)
|
||||
"acosh",
|
||||
};
|
||||
|
||||
UnaryOp::UnaryOp(Var* x, NanoString op) : x(x) {
|
||||
|
@ -92,6 +110,79 @@ VarPtr UnaryOp::grad(Var* out, Var* dout, Var* v, int v_index) {
|
|||
auto twoy = make_binary(two, y, ns_multiply);
|
||||
return make_binary(dout, twoy, ns_divide);
|
||||
}
|
||||
// dsin(x) = cos(x)
|
||||
if (ns == ns_sin)
|
||||
return make_binary(dout, make_unary(x, ns_cos), ns_multiply);
|
||||
// dcos(x) = -sin(x)
|
||||
if (ns == ns_cos)
|
||||
return make_binary(dout, make_unary(make_unary(x, ns_sin), ns_negative), ns_multiply);
|
||||
// dtan(x) = 1/cos^2(x)
|
||||
if (ns == ns_tan) {
|
||||
auto one = make_number(1, x);
|
||||
auto cosx = make_unary(x, ns_cos);
|
||||
auto cos2x = make_binary(cosx, cosx, ns_multiply);
|
||||
return make_binary(dout, cos2x, ns_divide);
|
||||
}
|
||||
// dasin(x) = 1/sqrt(1-x^2)
|
||||
if (ns == ns_asin) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(one, x2, ns_subtract);
|
||||
x2 = make_unary(x2, ns_sqrt);
|
||||
return make_binary(dout, x2, ns_divide);
|
||||
}
|
||||
// dacos(x) = -1/sqrt(1-x^2)
|
||||
if (ns == ns_acos) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(one, x2, ns_subtract);
|
||||
x2 = make_unary(x2, ns_sqrt);
|
||||
return make_unary(make_binary(dout, x2, ns_divide), ns_negative);
|
||||
}
|
||||
// datan(x) = 1/(x^2+1)
|
||||
if (ns == ns_atan) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(one, x2, ns_add);
|
||||
return make_binary(dout, x2, ns_divide);
|
||||
}
|
||||
|
||||
// dsinh(x) = cosh(x)
|
||||
if (ns == ns_sinh)
|
||||
return make_binary(dout, make_unary(x, ns_cosh), ns_multiply);
|
||||
// dcosh(x) = sinh(x)
|
||||
if (ns == ns_cosh)
|
||||
return make_binary(dout, make_unary(x, ns_sinh), ns_multiply);
|
||||
// dtanh(x) = 1/cosh^2(x)
|
||||
if (ns == ns_tanh) {
|
||||
auto cosx = make_unary(x, ns_cosh);
|
||||
auto cos2x = make_binary(cosx, cosx, ns_multiply);
|
||||
return make_binary(dout, cos2x, ns_divide);
|
||||
}
|
||||
|
||||
// dasinh(x) = 1/sqrt(x^2+1)
|
||||
if (ns == ns_asinh) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(x2, one, ns_add);
|
||||
x2 = make_unary(x2, ns_sqrt);
|
||||
return make_binary(dout, x2, ns_divide);
|
||||
}
|
||||
// dacosh(x) = 1/sqrt(x^2-1)
|
||||
if (ns == ns_acosh) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(x2, one, ns_subtract);
|
||||
x2 = make_unary(x2, ns_sqrt);
|
||||
return make_binary(dout, x2, ns_divide);
|
||||
}
|
||||
// datanh(x) = 1/(1-x^2)
|
||||
if (ns == ns_atanh) {
|
||||
auto one = make_number(1, x);
|
||||
auto x2 = make_binary(x, x, ns_multiply);
|
||||
x2 = make_binary(one, x2, ns_subtract);
|
||||
return make_binary(dout, x2, ns_divide);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -11,13 +11,30 @@ namespace jittor {
|
|||
#define bitwise_not(T,x) (~(x))
|
||||
#define negative(T,x) (-(x))
|
||||
#ifdef JIT_cuda
|
||||
// TODO: add float64 version
|
||||
#define abs(T,x) ::abs(x)
|
||||
#define log(T,x) ::log((T)(x))
|
||||
#define exp(T,x) ::exp((T)(x))
|
||||
#define sqrt(T,x) ::sqrt((T)(x))
|
||||
#define log(T,x) ::logf((T)(x))
|
||||
#define exp(T,x) ::expf((T)(x))
|
||||
#define sqrt(T,x) ::sqrtf((T)(x))
|
||||
#define round(T,x) ((T) ::roundf((x)))
|
||||
#define floor(T,x) ((T) ::floorf((x)))
|
||||
#define ceil(T,x) ((T) ::ceilf((x)))
|
||||
|
||||
#define sin(T,x) ((T) ::sinf((x)))
|
||||
#define asin(T,x) ((T) ::asinf((x)))
|
||||
#define sinh(T,x) ((T) ::sinhf((x)))
|
||||
#define asinh(T,x) ((T) ::asinhf((x)))
|
||||
|
||||
#define cos(T,x) ((T) ::cosf((x)))
|
||||
#define acos(T,x) ((T) ::acosf((x)))
|
||||
#define cosh(T,x) ((T) ::coshf((x)))
|
||||
#define acosh(T,x) ((T) ::acoshf((x)))
|
||||
|
||||
#define tan(T,x) ((T) ::tanf((x)))
|
||||
#define atan(T,x) ((T) ::atanf((x)))
|
||||
#define tanh(T,x) ((T) ::tanhf((x)))
|
||||
#define atanh(T,x) ((T) ::atanhf((x)))
|
||||
|
||||
#else
|
||||
#define abs(T,x) std::abs(x)
|
||||
#define log(T,x) std::log((T)(x))
|
||||
|
@ -26,7 +43,24 @@ namespace jittor {
|
|||
#define round(T,x) ((T)std::round((x)))
|
||||
#define floor(T,x) ((T)std::floor((x)))
|
||||
#define ceil(T,x) ((T)std::ceil((x)))
|
||||
|
||||
#define sin(T,x) ((T) std::sin((x)))
|
||||
#define asin(T,x) ((T) std::asin((x)))
|
||||
#define sinh(T,x) ((T) std::sinh((x)))
|
||||
#define asinh(T,x) ((T) std::asinh((x)))
|
||||
|
||||
#define cos(T,x) ((T) std::cos((x)))
|
||||
#define acos(T,x) ((T) std::acos((x)))
|
||||
#define cosh(T,x) ((T) std::cosh((x)))
|
||||
#define acosh(T,x) ((T) std::acosh((x)))
|
||||
|
||||
#define tan(T,x) ((T) std::tan((x)))
|
||||
#define atan(T,x) ((T) std::atan((x)))
|
||||
#define tanh(T,x) ((T) std::tanh((x)))
|
||||
#define atanh(T,x) ((T) std::atanh((x)))
|
||||
|
||||
#endif
|
||||
|
||||
#define cast(T,x) ((T)(x))
|
||||
|
||||
} // jittor
|
|
@ -57,6 +57,14 @@ vector<string> split(const string& s, const string& sep, int max_split) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
string strip(const string& s) {
|
||||
int i=0;
|
||||
while (i<s.size() && (s[i]==' ' || s[i]=='\t' || s[i]=='\n')) i++;
|
||||
int j = s.size();
|
||||
while (j>i && (s[j]==' ' || s[j]=='\t' || s[j]=='\n')) j--;
|
||||
return s.substr(i,j-i);
|
||||
}
|
||||
|
||||
void KernelIR::del_scope() {
|
||||
if (father && (type=="define" || type=="func")) {
|
||||
father->scope[attrs["lvalue"]].remove(this);
|
||||
|
|
|
@ -267,10 +267,12 @@ void ConvTuner::forwardTune(FusedOp* fop) {
|
|||
LOGvvvv << "Expr not match" << src_h << expr_h;
|
||||
continue;
|
||||
}
|
||||
LOGvvvv << "H Expr matched" << src_h << expr_h;
|
||||
if (!rh[0]->is(expr::_number) || !rh[1]->is(expr::_number) || !rh[2]->is(expr::_number)) return;
|
||||
auto src_w = expr::make(riop1->indexes[xw]);
|
||||
if (!expr::match(src_w.get(), expr_w.get(), {"stride", "padding", "dilation"}, {"i"+S(zw), "i"+S(zww)}, rw))
|
||||
return;
|
||||
LOGvvvv << "W Expr matched" << src_w << expr_w;
|
||||
if (!rw[0]->is(expr::_number) || !rw[1]->is(expr::_number) || !rw[2]->is(expr::_number)) return;
|
||||
int stride_h = rh[0]->as_int();
|
||||
int padding_h = -rh[1]->as_int();
|
||||
|
@ -285,7 +287,10 @@ void ConvTuner::forwardTune(FusedOp* fop) {
|
|||
continue;
|
||||
}
|
||||
LOGvvvv << "get stride padding and dilation" << stride_h << padding_h << dilation_h;
|
||||
|
||||
if (xformat == "bacd" && dilation_h != 1) {
|
||||
LOGvvvv << "mkl not support bacd dilation, continue";
|
||||
continue;
|
||||
}
|
||||
int stride = stride_h;
|
||||
int padding = padding_h;
|
||||
int dilation = dilation_h;
|
||||
|
@ -395,6 +400,7 @@ void ConvTuner::backwardTune(FusedOp* fop) {
|
|||
x = riop1->x;
|
||||
y = riop2->x;
|
||||
bo++;
|
||||
LOGvvvv << "backward_w get stride padding and dilation" << stride << padding << dilation;
|
||||
} else if (op->name_ex() == "reindex_reduce.add") {
|
||||
auto rop = (ReindexReduceOp*)op;
|
||||
if (!(rop->y->input() && rop->y->input()->name_ex()=="binary.multiply" && rop->x->input()->tflag==op->tflag))
|
||||
|
@ -470,6 +476,7 @@ void ConvTuner::backwardTune(FusedOp* fop) {
|
|||
w = riop1->x;
|
||||
y = riop2->x;
|
||||
bo+=2;
|
||||
LOGvvvv << "backward_x get stride padding and dilation" << stride << padding << dilation;
|
||||
}
|
||||
|
||||
// TODO: CUDA only support nchw(abcd)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "pybind/py_var_tracer.h"
|
||||
#include "misc/str_utils.h"
|
||||
#include "op.h"
|
||||
#include "var.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
using namespace pybind11::literals;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue