first commit

This commit is contained in:
张仪 2024-08-21 22:15:12 +08:00
commit b4244090ae
775 changed files with 134703 additions and 0 deletions

29
.dockerignore Normal file
View File

@ -0,0 +1,29 @@
Dockerfile
**/publish.py
my
.git
.refresh
__pycache__
.ipynb_checkpoints/
.vscode/
__res/
perf.data
perf.data.old
*.swp
*.ipynb
*.pdf
*.zip
*.tgz
test.py
extern/mkl/mkldnn_lnx*/*
data/
build/
venv/
*.md
!*.src.md
!README.md
!README.cn.md
python/jittor.egg-info
dist/
!doc/source/*
__data__

57
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,57 @@
# This is a basic workflow to help you get started with Actions
name: CI
# Controls when the action will run. Triggers the workflow on push or pull request
# events but only for the master branch
on: [ push ]
# push:
# branches: [ master ]
# pull_request:
# branches: [ master ]
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
test_clang_8_cuda_10:
# The type of runner that the job will run on
runs-on: self-hosted
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- name: test
run: |
export cache_name=github_${GITHUB_REF##*/}
export cc_path="clang++-8"
export cc_flags=" -g "
export log_sync=0
export log_v=0
export PYTHONIOENCODING=utf8
export PYTHONPATH=`pwd`/python
export nvcc_path=/usr/local/cuda/bin/nvcc
python3.7 -c "import jittor"
python3.7 -m jittor.test -v
test_gcc:
# The type of runner that the job will run on
runs-on: self-hosted
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
- name: test
run: |
export cache_name=github_${GITHUB_REF##*/}
export cc_path="g++"
export cc_flags=" -g "
export log_sync=0
export log_v=0
export PYTHONIOENCODING=utf8
export PYTHONPATH=`pwd`/python
export nvcc_path=
python3.7 -c "import jittor"
python3.7 -m jittor.test -v

30
.gitignore vendored Normal file
View File

@ -0,0 +1,30 @@
my
.refresh
.DS_Store
__pycache__
.ipynb_checkpoints/
.vscode/
__res/
perf.data
perf.data.old
*.swp
*.ipynb
*.pdf
*.zip
*.tgz
*.obj
test.py
extern/mkl/mkldnn_lnx*/*
data/
build/
venv/
*.md
!*.src.md
!README.md
!README.cn.md
!CHANGELOG.md
python/jittor.egg-info
dist/
!doc/source/*
core
__data__

46
.gitlab-ci.yml Normal file
View File

@ -0,0 +1,46 @@
test_clang_8_cuda_10:
tags:
- clang
- cuda
script:
- export cache_name=$CI_COMMIT_REF_NAME
- export cc_path="clang-8"
- export cc_flags=" -g "
- export log_sync=0
- export log_v=0
- export PYTHONIOENCODING=utf8
- export PYTHONPATH=`pwd`/python
- export nvcc_path=/usr/local/cuda/bin/nvcc
- python3.7 -c "import jittor"
- python3.7 -m jittor.test -v
# test_icc_19:
# tags:
# - icc
# script:
# - export cache_name=$CI_COMMIT_REF_NAME
# - export cc_path="/opt/intel/system_studio_2019/bin/icc"
# - export cc_flags=" -g "
# - export log_sync=0
# - export log_v=0
# - export PYTHONIOENCODING=utf8
# - export PYTHONPATH=`pwd`/python
# - export LD_LIBRARY_PATH="/opt/intel/system_studio_2019/compilers_and_libraries/linux/lib/intel64"
# - python3.7 -c "import jittor"
# - python3.7 -m jittor.test -v
test_g++:
tags:
- gcc
script:
- export cache_name=$CI_COMMIT_REF_NAME
- export cc_path="g++"
- export cc_flags=" -g "
- export log_sync=0
- export log_v=0
- export PYTHONIOENCODING=utf8
- export PYTHONPATH=`pwd`/python
- export nvcc_path=
- python3.7 -c "import jittor"
- python3.7 -m jittor.test -v

75
CHANGELOG.md Normal file
View File

@ -0,0 +1,75 @@
# CHANGELOG
### 计图 1.1.5.5
* 新增numpy code算子现在可以直接使用numpy来自定义算子了使用用例
```python
import jittor as jt
def forward_code(np, data):
a = data["inputs"][0]
b = data["outputs"][0]
np.add(a,a,out=b)
def backward_code(np, data):
dout = data["dout"]
out = data["outputs"][0]
np.copyto(out, dout*2.0)
a = jt.random((5,1))
b = jt.numpy_code(
a.shape,
a.dtype,
[a],
forward_code,
[backward_code],
)
```
* 新增 Function 模块,用户可以自定义反向传播了,使用用例:
```python
import jittor as jt
from jittor import Function
class MyFunc(Function):
def execute(self, x, y):
self.x = x
self.y = y
return x*y, x/y
def grad(self, grad0, grad1):
return grad0 * self.y, grad1 * self.x
a = jt.array(3.0)
b = jt.array(4.0)
func = MyFunc()
c,d = func(a, b)
da, db = jt.grad(c+d*3, [a, b])
assert da.data == 4
assert db.data == 9
```
* 新增 no_grad scope, 在这个scope中创建的所有变量都会停止梯度
```python
import jittor as jt
with jt.no_grad():
...
```
* 新增 bmmbatch matrix multiply 支持:
```python
import jittor as jt
from jittor import nn
batch, n, m, k = 100, 5, 6, 7
a = jt.random((batch, n, m))
b = jt.random((batch, m, k))
c = nn.bmm(a, b)
```
* 修复 unsqueeze

50
Dockerfile Normal file
View File

@ -0,0 +1,50 @@
# docker build commands
ARG FROM_IMAGE=ubuntu:18.04
FROM ${FROM_IMAGE}
RUN apt update && apt install ca-certificates -y
# change tsinghua mirror
RUN echo \
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
RUN apt update && apt install wget \
python3.7 python3.7-dev \
g++ build-essential openssh-server -y
WORKDIR /usr/src/jittor
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
ENV PYTHONIOENCODING utf8
# change tsinghua mirror
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip3 install \
numpy \
tqdm \
pillow \
astunparse \
notebook
RUN pip3 install matplotlib
RUN apt install openmpi-bin openmpi-common libopenmpi-dev -y
RUN pip3 install jittor --timeout 100 && python3.7 -m jittor.test.test_example
RUN pip3 uninstall jittor -y
COPY . .
RUN pip3 install . --timeout 100
RUN python3.7 -m jittor.test.test_example
CMD python3.7 -m jittor.notebook --allow-root --ip=0.0.0.0

203
LICENSE.txt Normal file
View File

@ -0,0 +1,203 @@
Copyright (c) 2023 Jittor. All Rights Reserved
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (c) 2023 Jittor. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

5
MANIFEST.in Normal file
View File

@ -0,0 +1,5 @@
exclude __data__
exclude __pycache__
prune **/__data__/
prune **/__pycache__
prune *.pyc

422
README.cn.md Normal file
View File

@ -0,0 +1,422 @@
# Jittor: 即时编译深度学习框架
![Jittor Logo](https://cg.cs.tsinghua.edu.cn/jittor/favicon_package_v0/JittorLogo_Final1220.svg)
[快速开始](#快速开始) | [安装](#安装) | [教程](#教程) | [English](./README.md)
Jittor 是一个基于即时编译和元算子的高性能深度学习框架整个框架在即时编译的同时还集成了强大的Op编译器和调优器为您的模型生成定制化的高性能代码。Jittor还包含了丰富的高性能模型库涵盖范围包括图像识别检测分割生成可微渲染几何学习强化学习等等。
Jittor前端语言为Python。前端使用了模块化和动态图执行的设计这是目前最主流的深度学习框架接口设计。后端则使用高性能语言编写如CUDAC++。
相关链接:
* [Jittor官网](https://cg.cs.tsinghua.edu.cn/jittor/)
* [Jittor教程](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/)
* [Jittor模型库](https://cg.cs.tsinghua.edu.cn/jittor/resources/)
* [Jittor文档](https://cg.cs.tsinghua.edu.cn/jittor/assets/docs/index.html)
* [Github](https://github.com/jittor/jittor) [GitLink](https://www.gitlink.org.cn/jittor/jittor) [Gitee](https://gitee.com/jittor/jittor)
* [Jittor 论坛](https://discuss.jittor.org/)
* [Jittor 精选仓库](https://github.com/Jittor/jittor/blob/master/AWESOME-JITTOR-LIST.md)
* 即时通信: QQ Group(761222083)
下面的代码演示了如何一步一步使用Python代码从头对一个双层神经网络建模。
```python
import jittor as jt
from jittor import Module
from jittor import nn
import numpy as np
class Model(Module):
def __init__(self):
self.layer1 = nn.Linear(1, 10)
self.relu = nn.Relu()
self.layer2 = nn.Linear(10, 1)
def execute (self,x) :
x = self.layer1(x)
x = self.relu(x)
x = self.layer2(x)
return x
def get_data(n): # generate random data for training test.
for i in range(n):
x = np.random.rand(batch_size, 1)
y = x*x
yield jt.float32(x), jt.float32(y)
learning_rate = 0.1
batch_size = 50
n = 1000
model = Model()
optim = nn.SGD(model.parameters(), learning_rate)
for i,(x,y) in enumerate(get_data(n)):
pred_y = model(x)
dy = pred_y - y
loss = dy * dy
loss_mean = loss.mean()
optim.step(loss_mean)
print(f"step {i}, loss = {loss_mean.data.sum()}")
```
## 大纲
- [快速开始](#快速开始)
- [安装](#安装)
- [教程](#教程)
- [贡献](#贡献)
- [团队](#团队)
- [版权声明](#版权声明)
## 快速开始
我们提供了一些jupyter notebooks来帮助您快速入门Jittor。
- [示例:模型定义与训练][1]
- [基础Op, Var][2]
- [元算子:通过元算子实现自己的卷积层][3]
## 安装
Jittor框架对环境要求如下:
| OS | CPU | Python | Compiler | (Optional) GPU platform |
|--------------------------------------------------------|-------------------------------------|--------|--------------|---------------------------------------------|
| Linux<br>(Ubuntu, CentOS, Arch, <br>UOS, KylinOS, ...) | x86 <br>x86_64 <br>ARM <br>loongson | >= 3.7 | g++ >=5.4 | Nvidia CUDA >= 10.0, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-tar) <br> or [AMD ROCm](https://docs.amd.com/) >= 4.0 <br> or [Hygon DCU DTK](https://tycloud.hpccube.com/doc/1.0.6/11277/general-handbook/software-tutorial/jittor.html) >= 22.04 |
| macOS <br>(>= 10.14 Mojave) | intel<br>Apple Silicon | >= 3.7 | clang >= 8.0 | - |
| Windows 10 & 11 | x86_64 | [>= 3.8](https://www.python.org/downloads/windows/) | - | Nvidia CUDA >= 10.2 [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#install-windows) |
Jittor 提供了三种安装方法pip、docker和手动安装
## Pip 安装
下面将展示Ubuntu的安装命令如果您在使用其他Linux操作系统如CentOS 请安装好依赖Python>=3.7, g++>=5.4)或者使用**docker安装** 如果您已经装好编译器和对应版本的Python,我们强烈推荐您使用这种方法
(如果无法访问github, 可以通过Jittor主页下载):
```bash
sudo apt install python3.7-dev libomp-dev
python3.7 -m pip install jittor
# or install from github(latest version)
# python3.7 -m pip install git+https://github.com/Jittor/jittor.git
python3.7 -m jittor.test.test_example
```
如果测试运行通过,恭喜你已经安装完成.
jittor会自动在路径中寻找合适的编译器, 如果您希望手动指定编译器, 请使用环境变量 `cc_path``nvcc_path`(可选).
### macOS 安装
macOS 请使用 [homebrew](https://brew.sh) 安装额外的依赖。
```bash
brew install libomp
```
之后您可以通过 pip 安装 jittor并测试是否可以成功运行。
```bash
python3.7 -m pip install jittor
python3.7 -m jittor.test.test_example
```
目前在 macOS 中jittor 只支持 CPU 计算。
### Windows安装
Windows 请准备好Python>=3.8,安装方法如下(conda安装需要额外命令)
Windows user please prepare Python>=3.8, install instructions are list below(conda needs extra instructions)
```bash
# check your python version(>=3.8)
python --version
python -m pip install jittor
# if conda is used
conda install pywin32
```
Windows 下jittor会自动检测显卡并安装对应的 CUDA 请确保您的NVIDIA驱动支持CUDA 10.2 以上您还可以使用如下命令手动为Jittor安装CUDA
```bash
python -m jittor_utils.install_cuda
```
## Docker 安装
我们提供了Docker安装方式免去您配置环境Docker安装方法如下
```
# CPU only(Linux)
docker run -it --network host jittor/jittor
# CPU and CUDA(Linux)
docker run -it --network host --gpus all jittor/jittor-cuda
# CPU only(Mac and Windows)
docker run -it -p 8888:8888 jittor/jittor
```
关于Docker安装的详细教程可以参考[Windows/Mac/Linux通过Docker安装计图](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/2020-5-15-00-00-docker/)
## 手动安装
我们将逐步演示如何在Ubuntu 16.04中安装Jittor其他Linux发行版可能可以使用类似的命令。
### 步骤一:选择您的后端编译器
```bash
# g++
sudo apt install g++ build-essential libomp-dev
# OR clang++-8
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
bash /tmp/llvm.sh 8
```
### 步骤二安装Python和python-dev
Jittor需要python的版本>=3.7。
```bash
sudo apt install python3.7 python3.7-dev
```
### 步骤三运行Jittor
整个框架是即时编译的。 让我们通过pip安装jittor
```bash
git clone https://github.com/Jittor/jittor.git
sudo pip3.7 install ./jittor
export cc_path="clang++-8"
# if other compiler is used, change cc_path
# export cc_path="g++"
# export cc_path="icc"
# run a simple test
python3.7 -m jittor.test.test_example
```
如果通过了测试那么您的Jittor已经准备就绪。
### 可选步骤四启用CUDA
在Jittor中使用CUDA非常简单只需设置环境值`nvcc_path`
```bash
# replace this var with your nvcc location
export nvcc_path="/usr/local/cuda/bin/nvcc"
# run a simple cuda test
python3.7 -m jittor.test.test_cuda
```
如果测试通过,则可以通过设置`use_cuda`标识符在Jittor中启用CUDA。
```python
import jittor as jt
jt.flags.use_cuda = 1
```
### 可选步骤五测试训练Resnet18
要检查Jittor的完整性您可以运行Resnet18训练测试。需要注意的是这个测试需要6G显存。
```bash
python3.7 -m jittor.test.test_resnet
```
如果这些测试失败请为我们报告错误我们十分欢迎您为Jittor做出贡献^ _ ^
## 教程
在教程部分我们将简要解释Jittor的基本概念。
要使用Jittor训练模型您需要了解两个主要概念
* VarJittor的基本数据类型
* OperationsJittor的算子与numpy类似
### 数据类型
首先让我们开始使用Var。Var是jittor的基本数据类型为了运算更加高效Jittor中的计算过程是异步的。 如果要访问数据,可以使用`Var.data`进行同步数据访问。
```python
import jittor as jt
a = jt.float32([1,2,3])
print (a)
print (a.data)
# Output: float32[3,]
# Output: [ 1. 2. 3.]
```
此外我们可以给变量起一个名字。
```python
a.name('a')
print(a.name())
# Output: a
```
### 数据运算
Jittor的算子与numpy类似。 让我们尝试一些运算, 我们通过Op`jt.float32`创建Var `a`和`b`,并将它们相加。 输出这些变量相关信息,可以看出它们具有相同的形状和类型。
```python
import jittor as jt
a = jt.float32([1,2,3])
b = jt.float32([4,5,6])
c = a*b
print(a,b,c)
print(type(a), type(b), type(c))
# Output: float32[3,] float32[3,] float32[3,]
# Output: <class 'jittor_core.Var'> <class 'jittor_core.Var'> <class 'jittor_core.Var'>
```
除此之外,我们使用的所有算子`jt.xxx(Var,...)`都具有别名`Var.xxx(...)`。 例如:
```python
c.max() # alias of jt.max(c)
c.add(a) # alias of jt.add(c, a)
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
```
如果您想知道Jittor支持的所有运算可以运行`help(jt.ops)`。 您在`jt.ops.xxx`中找到的所有运算都可以通过别名`jt.xxx`。
```python
help(jt.ops)
# Output:
# abs(x: core.Var) -> core.Var
# add(x: core.Var, y: core.Var) -> core.Var
# array(data: array) -> core.Var
# binary(x: core.Var, y: core.Var, op: str) -> core.Var
# ......
```
### 更多教程
如果您想进一步了解Jittor请查看以下notebooks
* 快速开始
* [示例:模型定义与训练][1]
* [基本概念Op, Var][2]
* [元算子:通过元算子实现自己的卷积层][3]
* 进阶
* [自定义算子使用C ++和CUDA编写您的算子并其进行即时编译][4]
* [性能分析器:分析您的模型][5]
* Jtune性能调优工具
[1]: python/jittor/notebook/example.src.md "示例"
[2]: python/jittor/notebook/basics.src.md "基本概念"
[3]: python/jittor/notebook/meta_op.src.md "元算子"
[4]: python/jittor/notebook/custom_op.src.md "自定义算子"
[5]: python/jittor/notebook/profiler.src.md "性能分析器"
这些notebooks可以通过python3.7 -m jittor.notebook在您自己的计算机中运行。
## 贡献
Jittor还很年轻。 它可能存在错误和问题。 请在我们的错误跟踪系统中报告它们。 我们欢迎您为Jittor做出贡献。 此外如果您对Jittor有任何想法请告诉我们。
您可以用以下方式帮助Jittor
* 在论文中引用 Jittor
* 向身边的好朋友推荐 Jittor
* 贡献代码
* 贡献教程和文档
* 提出issue
* 回答 jittor 相关问题
* 点亮小星星
* 持续关注 jittor
* ……
## 联系我们
官方主页: http://cg.cs.tsinghua.edu.cn/jittor/
电子邮件jittor@qq.com
提出issuehttps://github.com/Jittor/jittor/issues
QQ 群761222083
## 团队
Jittor目前由[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)维护。 如果您也对Jittor感兴趣并希望对其进行改进请加入我们
## 引用
```
@article{hu2020jittor,
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
journal={Science China Information Sciences},
volume={63},
number={222103},
pages={1--21},
year={2020}
}
```
## 版权声明
如LICENSE.txt文件中所示Jittor使用Apache 2.0版权协议。

416
README.md Normal file
View File

@ -0,0 +1,416 @@
# Jittor: a Just-in-time(JIT) deep learning framework
![Jittor Logo](https://cg.cs.tsinghua.edu.cn/jittor/favicon_package_v0/JittorLogo_Final1220.svg)
[Quickstart](#quickstart) | [Install](#install) | [Tutorial](#tutorial) | [简体中文](./README.cn.md)
Jittor is a high-performance deep learning framework based on JIT compiling and meta-operators. The whole framework and meta-operators are compiled just-in-time. A powerful op compiler and tuner are integrated into Jittor. It allowed us to generate high-performance code with specialized for your model. Jittor also contains a wealth of high-performance model libraries, including: image recognition, detection, segmentation, generation, differentiable rendering, geometric learning, reinforcement learning, etc. .
The front-end language is Python. Module Design and Dynamic Graph Execution is used in the front-end, which is the most popular design for deeplearning framework interface. The back-end is implemented by high performance language, such as CUDA,C++.
Related Links:
* [Jittor Website](https://cg.cs.tsinghua.edu.cn/jittor/)
* [Jittor Tutorials](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/)
* [Jittor Models](https://cg.cs.tsinghua.edu.cn/jittor/resources/)
* [Jittor Documents](https://cg.cs.tsinghua.edu.cn/jittor/assets/docs/index.html)
* [Github](https://github.com/jittor/jittor), [GitLink](https://www.gitlink.org.cn/jittor/jittor), [Gitee](https://gitee.com/jittor/jittor)
* [Jittor Forum](https://discuss.jittor.org/)
* [Awesome Jittor List](https://github.com/Jittor/jittor/blob/master/AWESOME-JITTOR-LIST.md)
* IM: QQ Group(761222083)
The following example shows how to model a two-layer neural network step by step and train from scratch In a few lines of Python code.
```python
import jittor as jt
from jittor import Module
from jittor import nn
import numpy as np
class Model(Module):
def __init__(self):
self.layer1 = nn.Linear(1, 10)
self.relu = nn.Relu()
self.layer2 = nn.Linear(10, 1)
def execute (self,x) :
x = self.layer1(x)
x = self.relu(x)
x = self.layer2(x)
return x
def get_data(n): # generate random data for training test.
for i in range(n):
x = np.random.rand(batch_size, 1)
y = x*x
yield jt.float32(x), jt.float32(y)
learning_rate = 0.1
batch_size = 50
n = 1000
model = Model()
optim = nn.SGD(model.parameters(), learning_rate)
for i,(x,y) in enumerate(get_data(n)):
pred_y = model(x)
dy = pred_y - y
loss = dy * dy
loss_mean = loss.mean()
optim.step(loss_mean)
print(f"step {i}, loss = {loss_mean.data.sum()}")
```
## Contents
* [Quickstart](#quickstart)
* [Install](#install)
* [Tutorial](#tutorial)
* [Contributing](#contributing)
* [The Team](#theteam)
* [License](#license)
## Quickstart
We provide some jupyter notebooks to help you quick start with Jittor.
- [Example: Model definition and training][1]
- [Basics: Op, Var][2]
- [Meta-operator: Implement your own convolution with Meta-operator][3]
## Install
Jittor environment requirements:
| OS | CPU | Python | Compiler | (Optional) GPU platform |
|--------------------------------------------------------|-------------------------------------|--------|--------------|---------------------------------------------|
| Linux<br>(Ubuntu, CentOS, Arch, <br>UOS, KylinOS, ...) | x86 <br>x86_64 <br>ARM <br>loongson | >= 3.7 | g++ >=5.4 | Nvidia CUDA >= 10.0, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-tar) <br> or [AMD ROCm](https://docs.amd.com/) >= 4.0 <br> or [Hygon DCU DTK](https://tycloud.hpccube.com/doc/1.0.6/11277/general-handbook/software-tutorial/jittor.html) >= 22.04 |
| macOS <br>(>= 10.14 Mojave) | intel<br>Apple Silicon | >= 3.7 | clang >= 8.0 | - |
| Windows 10 & 11 | x86_64 | [>= 3.8](https://www.python.org/downloads/windows/) | - | Nvidia CUDA >= 10.2 [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#install-windows) |
Jittor offers three ways to install: pip, docker, or manual.
## Pip install
```bash
sudo apt install python3.7-dev libomp-dev
python3.7 -m pip install jittor
# or install from github(latest version)
# python3.7 -m pip install git+https://github.com/Jittor/jittor.git
python3.7 -m jittor.test.test_example
```
### macOS install
Please first install additional dependencies with [homebrew](https://brew.sh).
```bash
brew install libomp
```
Then you can install jittor through pip and run the example.
```bash
python3.7 -m pip install jittor
python3.7 -m jittor.test.test_example
```
Currently jittor only supports CPU on macOS.
### Windows install
```bash
# check your python version(>=3.8)
python --version
python -m pip install jittor
# if conda is used
conda install pywin32
```
In Windows, jittor will automatically detect and install CUDA, please make sure your NVIDIA driver support CUDA 10.2 or above, or you can manually let jittor install CUDA for you:
```bash
python -m jittor_utils.install_cuda
```
## Docker Install
We provide a Docker installation method to save you from configuring the environment. The Docker installation method is as follows:
```
# CPU only(Linux)
docker run -it --network host jittor/jittor
# CPU and CUDA(Linux)
docker run -it --network host --gpus all jittor/jittor-cuda
# CPU only(Mac and Windows)
docker run -it -p 8888:8888 jittor/jittor
```
## manual install
We will show how to install Jittor in Ubuntu 16.04 step by step, Other Linux distributions may have similar commands.
### Step 1: Choose your back-end compiler
```bash
# g++
sudo apt install g++ build-essential libomp-dev
# OR clang++-8
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
bash /tmp/llvm.sh 8
```
### Step 2: Install Python and python-dev
Jittor need python version >= 3.7.
```bash
sudo apt install python3.7 python3.7-dev
```
### Step 3: Run Jittor
The whole framework is compiled Just-in-time. Let's install jittor via pip
```bash
git clone https://github.com/Jittor/jittor.git
sudo pip3.7 install ./jittor
export cc_path="clang++-8"
# if other compiler is used, change cc_path
# export cc_path="g++"
# export cc_path="icc"
# run a simple test
python3.7 -m jittor.test.test_example
```
if the test is passed, your Jittor is ready.
### Optional Step 4: Enable CUDA
Using CUDA in Jittor is very simple, Just setup environment value `nvcc_path`
```bash
# replace this var with your nvcc location
export nvcc_path="/usr/local/cuda/bin/nvcc"
# run a simple cuda test
python3.7 -m jittor.test.test_cuda
```
if the test is passed, your can use Jittor with CUDA by setting `use_cuda` flag.
```python
import jittor as jt
jt.flags.use_cuda = 1
```
### Optional Step 5: Test Resnet18 training
To check the integrity of Jittor, you can run Resnet18 training test. Note: 6G GPU RAM is requires in this test.
```bash
python3.7 -m jittor.test.test_resnet
```
if those tests are failed, please report bugs for us, and feel free to contribute ^_^
## Tutorial
In the tutorial section, we will briefly explain the basic concept of Jittor.
To train your model with Jittor, there are only three main concepts you need to know:
* Var: basic data type of jittor
* Operations: Jittor'op is simular with numpy
### Var
First, let's get started with Var. Var is the basic data type of jittor. Computation process in Jittor is asynchronous for optimization. If you want to access the data, `Var.data` can be used for synchronous data accessing.
```python
import jittor as jt
a = jt.float32([1,2,3])
print (a)
print (a.data)
# Output: float32[3,]
# Output: [ 1. 2. 3.]
```
And we can give the variable a name.
```python
a.name('a')
print(a.name())
# Output: a
```
### Operations
Jittor'op is simular with numpy. Let's try some operations. We create Var `a` and `b` via operation `jt.float32`, and add them. Printing those variables shows they have the same shape and dtype.
```python
import jittor as jt
a = jt.float32([1,2,3])
b = jt.float32([4,5,6])
c = a*b
print(a,b,c)
print(type(a), type(b), type(c))
# Output: float32[3,] float32[3,] float32[3,]
# Output: <class 'jittor_core.Var'> <class 'jittor_core.Var'> <class 'jittor_core.Var'>
```
Beside that, All the operators we used `jt.xxx(Var, ...)` have alias `Var.xxx(...)`. For example:
```python
c.max() # alias of jt.max(c)
c.add(a) # alias of jt.add(c, a)
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
```
if you want to know all the operation which Jittor supports. try `help(jt.ops)`. All the operation you found in `jt.ops.xxx`, can be used via alias `jt.xxx`.
```python
help(jt.ops)
# Output:
# abs(x: core.Var) -> core.Var
# add(x: core.Var, y: core.Var) -> core.Var
# array(data: array) -> core.Var
# binary(x: core.Var, y: core.Var, op: str) -> core.Var
# ......
```
### More
If you want to know more about Jittor, please check out the notebooks below:
* Quickstart
- [Example: Model definition and training][1]
- [Basics: Op, Var][2]
- [Meta-operator: Implement your own convolution with Meta-operator][3]
* Advanced
- [Custom Op: write your operator with C++ and CUDA and JIT compile it][4]
- [Profiler: Profiling your model][5]
- Jtune: Tool for performance tuning
[1]: python/jittor/notebook/example.src.md "example"
[2]: python/jittor/notebook/basics.src.md "basics"
[3]: python/jittor/notebook/meta_op.src.md "meta_op"
[4]: python/jittor/notebook/custom_op.src.md "custom_op"
[5]: python/jittor/notebook/profiler.src.md "profiler"
Those notebooks can be started in your own computer by `python3.7 -m jittor.notebook`
## Contributing
Jittor is still young. It may contain bugs and issues. Please report them in our bug track system. Contributions are welcome. Besides, if you have any ideas about Jittor, please let us know.
You can help Jittor in the following ways:
* Citing Jittor in your paper
* recommend Jittor to your friends
* Contributing code
* Contributed tutorials and documentation
* File an issue
* Answer jittor related questions
* Light up the stars
* Keep an eye on jittor
* ......
## Contact Us
Website: http://cg.cs.tsinghua.edu.cn/jittor/
Email: jittor@qq.com
File an issue: https://github.com/Jittor/jittor/issues
QQ Group: 836860279
<img src="https://github.com/Jittor/jittor/assets/62846124/8dd830bd-b31c-4e4f-9a78-5fd7a3409145" width="200"/>
## The Team
Jittor is currently maintained by the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
## Citation
```
@article{hu2020jittor,
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
journal={Science China Information Sciences},
volume={63},
number={222103},
pages={1--21},
year={2020}
}
```
## License
Jittor is Apache 2.0 licensed, as found in the LICENSE.txt file.

524
README.src.md Normal file
View File

@ -0,0 +1,524 @@
# Jittor: a Just-in-time(JIT) deep learning framework
# Jittor: 即时编译深度学习框架
![Jittor Logo](https://cg.cs.tsinghua.edu.cn/jittor/favicon_package_v0/JittorLogo_Final1220.svg)
[Quickstart](#quickstart) | [Install](#install) | [Tutorial](#tutorial) | [Chinese](./README.cn.md)
[快速开始](#快速开始) | [安装](#安装) | [教程](#教程) | [English](./README.md)
Jittor is a high-performance deep learning framework based on JIT compiling and meta-operators. The whole framework and meta-operators are compiled just-in-time. A powerful op compiler and tuner are integrated into Jittor. It allowed us to generate high-performance code with specialized for your model. Jittor also contains a wealth of high-performance model libraries, including: image recognition, detection, segmentation, generation, differentiable rendering, geometric learning, reinforcement learning, etc. .
Jittor 是一个基于即时编译和元算子的高性能深度学习框架整个框架在即时编译的同时还集成了强大的Op编译器和调优器为您的模型生成定制化的高性能代码。Jittor还包含了丰富的高性能模型库涵盖范围包括图像识别检测分割生成可微渲染几何学习强化学习等等。
The front-end language is Python. Module Design and Dynamic Graph Execution is used in the front-end, which is the most popular design for deeplearning framework interface. The back-end is implemented by high performance language, such as CUDA,C++.
Jittor前端语言为Python。前端使用了模块化和动态图执行的设计这是目前最主流的深度学习框架接口设计。后端则使用高性能语言编写如CUDAC++。
Related Links:
* [Jittor Website](https://cg.cs.tsinghua.edu.cn/jittor/)
* [Jittor Tutorials](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/)
* [Jittor Models](https://cg.cs.tsinghua.edu.cn/jittor/resources/)
* [Jittor Documents](https://cg.cs.tsinghua.edu.cn/jittor/assets/docs/index.html)
* [Github](https://github.com/jittor/jittor), [GitLink](https://www.gitlink.org.cn/jittor/jittor), [Gitee](https://gitee.com/jittor/jittor)
* [Jittor Forum](https://discuss.jittor.org/)
* [Awesome Jittor List](https://github.com/Jittor/jittor/blob/master/AWESOME-JITTOR-LIST.md)
* IM: QQ Group(761222083)
相关链接:
* [Jittor官网](https://cg.cs.tsinghua.edu.cn/jittor/)
* [Jittor教程](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/)
* [Jittor模型库](https://cg.cs.tsinghua.edu.cn/jittor/resources/)
* [Jittor文档](https://cg.cs.tsinghua.edu.cn/jittor/assets/docs/index.html)
* [Github](https://github.com/jittor/jittor) [GitLink](https://www.gitlink.org.cn/jittor/jittor) [Gitee](https://gitee.com/jittor/jittor)
* [Jittor 论坛](https://discuss.jittor.org/)
* [Jittor 精选仓库](https://github.com/Jittor/jittor/blob/master/AWESOME-JITTOR-LIST.md)
* 即时通信: QQ Group(761222083)
The following example shows how to model a two-layer neural network step by step and train from scratch In a few lines of Python code.
下面的代码演示了如何一步一步使用Python代码从头对一个双层神经网络建模。
```python
import jittor as jt
from jittor import Module
from jittor import nn
import numpy as np
class Model(Module):
def __init__(self):
self.layer1 = nn.Linear(1, 10)
self.relu = nn.Relu()
self.layer2 = nn.Linear(10, 1)
def execute (self,x) :
x = self.layer1(x)
x = self.relu(x)
x = self.layer2(x)
return x
def get_data(n): # generate random data for training test.
for i in range(n):
x = np.random.rand(batch_size, 1)
y = x*x
yield jt.float32(x), jt.float32(y)
learning_rate = 0.1
batch_size = 50
n = 1000
model = Model()
optim = nn.SGD(model.parameters(), learning_rate)
for i,(x,y) in enumerate(get_data(n)):
pred_y = model(x)
dy = pred_y - y
loss = dy * dy
loss_mean = loss.mean()
optim.step(loss_mean)
print(f"step {i}, loss = {loss_mean.data.sum()}")
```
## Contents
* [Quickstart](#quickstart)
* [Install](#install)
* [Tutorial](#tutorial)
* [Contributing](#contributing)
* [The Team](#theteam)
* [License](#license)
## 大纲
- [快速开始](#快速开始)
- [安装](#安装)
- [教程](#教程)
- [贡献](#贡献)
- [团队](#团队)
- [版权声明](#版权声明)
## Quickstart
## 快速开始
We provide some jupyter notebooks to help you quick start with Jittor.
我们提供了一些jupyter notebooks来帮助您快速入门Jittor。
- [Example: Model definition and training][1]
- [示例:模型定义与训练][1]
- [Basics: Op, Var][2]
- [基础Op, Var][2]
- [Meta-operator: Implement your own convolution with Meta-operator][3]
- [元算子:通过元算子实现自己的卷积层][3]
## Install
## 安装
Jittor框架对环境要求如下:
Jittor environment requirements:
| OS | CPU | Python | Compiler | (Optional) GPU platform |
|--------------------------------------------------------|-------------------------------------|--------|--------------|---------------------------------------------|
| Linux<br>(Ubuntu, CentOS, Arch, <br>UOS, KylinOS, ...) | x86 <br>x86_64 <br>ARM <br>loongson | >= 3.7 | g++ >=5.4 | Nvidia CUDA >= 10.0, [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-tar) <br> or [AMD ROCm](https://docs.amd.com/) >= 4.0 <br> or [Hygon DCU DTK](https://tycloud.hpccube.com/doc/1.0.6/11277/general-handbook/software-tutorial/jittor.html) >= 22.04 |
| macOS <br>(>= 10.14 Mojave) | intel<br>Apple Silicon | >= 3.7 | clang >= 8.0 | - |
| Windows 10 & 11 | x86_64 | [>= 3.8](https://www.python.org/downloads/windows/) | - | Nvidia CUDA >= 10.2 [cuDNN](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#install-windows) |
Jittor 提供了三种安装方法pip、docker和手动安装
Jittor offers three ways to install: pip, docker, or manual.
## Pip 安装
## Pip install
下面将展示Ubuntu的安装命令如果您在使用其他Linux操作系统如CentOS 请安装好依赖Python>=3.7, g++>=5.4)或者使用**docker安装** 如果您已经装好编译器和对应版本的Python,我们强烈推荐您使用这种方法
(如果无法访问github, 可以通过Jittor主页下载):
```bash
sudo apt install python3.7-dev libomp-dev
python3.7 -m pip install jittor
# or install from github(latest version)
# python3.7 -m pip install git+https://github.com/Jittor/jittor.git
python3.7 -m jittor.test.test_example
```
如果测试运行通过,恭喜你已经安装完成.
jittor会自动在路径中寻找合适的编译器, 如果您希望手动指定编译器, 请使用环境变量 `cc_path``nvcc_path`(可选).
### macOS 安装
### macOS install
macOS 请使用 [homebrew](https://brew.sh) 安装额外的依赖。
Please first install additional dependencies with [homebrew](https://brew.sh).
```bash
brew install libomp
```
之后您可以通过 pip 安装 jittor并测试是否可以成功运行。
Then you can install jittor through pip and run the example.
```bash
python3.7 -m pip install jittor
python3.7 -m jittor.test.test_example
```
目前在 macOS 中jittor 只支持 CPU 计算。
Currently jittor only supports CPU on macOS.
### Windows安装
### Windows install
Windows 请准备好Python>=3.8,安装方法如下(conda安装需要额外命令)
Windows user please prepare Python>=3.8, install instructions are list below(conda needs extra instructions)
```bash
# check your python version(>=3.8)
python --version
python -m pip install jittor
# if conda is used
conda install pywin32
```
Windows 下jittor会自动检测显卡并安装对应的 CUDA 请确保您的NVIDIA驱动支持CUDA 10.2 以上您还可以使用如下命令手动为Jittor安装CUDA
In Windows, jittor will automatically detect and install CUDA, please make sure your NVIDIA driver support CUDA 10.2 or above, or you can manually let jittor install CUDA for you:
```bash
python -m jittor_utils.install_cuda
```
## Docker Install
## Docker 安装
我们提供了Docker安装方式免去您配置环境Docker安装方法如下
We provide a Docker installation method to save you from configuring the environment. The Docker installation method is as follows:
```
# CPU only(Linux)
docker run -it --network host jittor/jittor
# CPU and CUDA(Linux)
docker run -it --network host --gpus all jittor/jittor-cuda
# CPU only(Mac and Windows)
docker run -it -p 8888:8888 jittor/jittor
```
关于Docker安装的详细教程可以参考[Windows/Mac/Linux通过Docker安装计图](https://cg.cs.tsinghua.edu.cn/jittor/tutorial/2020-5-15-00-00-docker/)
## 手动安装
## manual install
We will show how to install Jittor in Ubuntu 16.04 step by step, Other Linux distributions may have similar commands.
我们将逐步演示如何在Ubuntu 16.04中安装Jittor其他Linux发行版可能可以使用类似的命令。
### Step 1: Choose your back-end compiler
### 步骤一:选择您的后端编译器
```bash
# g++
sudo apt install g++ build-essential libomp-dev
# OR clang++-8
wget -O - https://raw.githubusercontent.com/Jittor/jittor/master/script/install_llvm.sh > /tmp/llvm.sh
bash /tmp/llvm.sh 8
```
### Step 2: Install Python and python-dev
### 步骤二安装Python和python-dev
Jittor need python version >= 3.7.
Jittor需要python的版本>=3.7。
```bash
sudo apt install python3.7 python3.7-dev
```
### Step 3: Run Jittor
### 步骤三运行Jittor
The whole framework is compiled Just-in-time. Let's install jittor via pip
整个框架是即时编译的。 让我们通过pip安装jittor
```bash
git clone https://github.com/Jittor/jittor.git
sudo pip3.7 install ./jittor
export cc_path="clang++-8"
# if other compiler is used, change cc_path
# export cc_path="g++"
# export cc_path="icc"
# run a simple test
python3.7 -m jittor.test.test_example
```
if the test is passed, your Jittor is ready.
如果通过了测试那么您的Jittor已经准备就绪。
### Optional Step 4: Enable CUDA
### 可选步骤四启用CUDA
Using CUDA in Jittor is very simple, Just setup environment value `nvcc_path`
在Jittor中使用CUDA非常简单只需设置环境值`nvcc_path`
```bash
# replace this var with your nvcc location
export nvcc_path="/usr/local/cuda/bin/nvcc"
# run a simple cuda test
python3.7 -m jittor.test.test_cuda
```
if the test is passed, your can use Jittor with CUDA by setting `use_cuda` flag.
如果测试通过,则可以通过设置`use_cuda`标识符在Jittor中启用CUDA。
```python
import jittor as jt
jt.flags.use_cuda = 1
```
### Optional Step 5: Test Resnet18 training
### 可选步骤五测试训练Resnet18
To check the integrity of Jittor, you can run Resnet18 training test. Note: 6G GPU RAM is requires in this test.
要检查Jittor的完整性您可以运行Resnet18训练测试。需要注意的是这个测试需要6G显存。
```bash
python3.7 -m jittor.test.test_resnet
```
if those tests are failed, please report bugs for us, and feel free to contribute ^_^
如果这些测试失败请为我们报告错误我们十分欢迎您为Jittor做出贡献^ _ ^
## Tutorial
## 教程
In the tutorial section, we will briefly explain the basic concept of Jittor.
在教程部分我们将简要解释Jittor的基本概念。
To train your model with Jittor, there are only three main concepts you need to know:
要使用Jittor训练模型您需要了解两个主要概念
* Var: basic data type of jittor
* VarJittor的基本数据类型
* Operations: Jittor'op is simular with numpy
* OperationsJittor的算子与numpy类似
### Var
### 数据类型
First, let's get started with Var. Var is the basic data type of jittor. Computation process in Jittor is asynchronous for optimization. If you want to access the data, `Var.data` can be used for synchronous data accessing.
首先让我们开始使用Var。Var是jittor的基本数据类型为了运算更加高效Jittor中的计算过程是异步的。 如果要访问数据,可以使用`Var.data`进行同步数据访问。
```python
import jittor as jt
a = jt.float32([1,2,3])
print (a)
print (a.data)
# Output: float32[3,]
# Output: [ 1. 2. 3.]
```
And we can give the variable a name.
此外我们可以给变量起一个名字。
```python
a.name('a')
print(a.name())
# Output: a
```
### Operations
### 数据运算
Jittor'op is simular with numpy. Let's try some operations. We create Var `a` and `b` via operation `jt.float32`, and add them. Printing those variables shows they have the same shape and dtype.
Jittor的算子与numpy类似。 让我们尝试一些运算, 我们通过Op`jt.float32`创建Var `a`和`b`,并将它们相加。 输出这些变量相关信息,可以看出它们具有相同的形状和类型。
```python
import jittor as jt
a = jt.float32([1,2,3])
b = jt.float32([4,5,6])
c = a*b
print(a,b,c)
print(type(a), type(b), type(c))
# Output: float32[3,] float32[3,] float32[3,]
# Output: <class 'jittor_core.Var'> <class 'jittor_core.Var'> <class 'jittor_core.Var'>
```
Beside that, All the operators we used `jt.xxx(Var, ...)` have alias `Var.xxx(...)`. For example:
除此之外,我们使用的所有算子`jt.xxx(Var,...)`都具有别名`Var.xxx(...)`。 例如:
```python
c.max() # alias of jt.max(c)
c.add(a) # alias of jt.add(c, a)
c.min(keepdims=True) # alias of jt.min(c, keepdims=True)
```
if you want to know all the operation which Jittor supports. try `help(jt.ops)`. All the operation you found in `jt.ops.xxx`, can be used via alias `jt.xxx`.
如果您想知道Jittor支持的所有运算可以运行`help(jt.ops)`。 您在`jt.ops.xxx`中找到的所有运算都可以通过别名`jt.xxx`。
```python
help(jt.ops)
# Output:
# abs(x: core.Var) -> core.Var
# add(x: core.Var, y: core.Var) -> core.Var
# array(data: array) -> core.Var
# binary(x: core.Var, y: core.Var, op: str) -> core.Var
# ......
```
### More
### 更多教程
If you want to know more about Jittor, please check out the notebooks below:
如果您想进一步了解Jittor请查看以下notebooks
* Quickstart
- [Example: Model definition and training][1]
- [Basics: Op, Var][2]
- [Meta-operator: Implement your own convolution with Meta-operator][3]
* 快速开始
* [示例:模型定义与训练][1]
* [基本概念Op, Var][2]
* [元算子:通过元算子实现自己的卷积层][3]
* Advanced
- [Custom Op: write your operator with C++ and CUDA and JIT compile it][4]
- [Profiler: Profiling your model][5]
- Jtune: Tool for performance tuning
* 进阶
* [自定义算子使用C ++和CUDA编写您的算子并其进行即时编译][4]
* [性能分析器:分析您的模型][5]
* Jtune性能调优工具
[1]: python/jittor/notebook/example.src.md "example"
[2]: python/jittor/notebook/basics.src.md "basics"
[3]: python/jittor/notebook/meta_op.src.md "meta_op"
[4]: python/jittor/notebook/custom_op.src.md "custom_op"
[5]: python/jittor/notebook/profiler.src.md "profiler"
[1]: python/jittor/notebook/example.src.md "示例"
[2]: python/jittor/notebook/basics.src.md "基本概念"
[3]: python/jittor/notebook/meta_op.src.md "元算子"
[4]: python/jittor/notebook/custom_op.src.md "自定义算子"
[5]: python/jittor/notebook/profiler.src.md "性能分析器"
Those notebooks can be started in your own computer by `python3.7 -m jittor.notebook`
这些notebooks可以通过python3.7 -m jittor.notebook在您自己的计算机中运行。
## Contributing
## 贡献
Jittor is still young. It may contain bugs and issues. Please report them in our bug track system. Contributions are welcome. Besides, if you have any ideas about Jittor, please let us know.
Jittor还很年轻。 它可能存在错误和问题。 请在我们的错误跟踪系统中报告它们。 我们欢迎您为Jittor做出贡献。 此外如果您对Jittor有任何想法请告诉我们。
您可以用以下方式帮助Jittor
* 在论文中引用 Jittor
* 向身边的好朋友推荐 Jittor
* 贡献代码
* 贡献教程和文档
* 提出issue
* 回答 jittor 相关问题
* 点亮小星星
* 持续关注 jittor
* ……
You can help Jittor in the following ways:
* Citing Jittor in your paper
* recommend Jittor to your friends
* Contributing code
* Contributed tutorials and documentation
* File an issue
* Answer jittor related questions
* Light up the stars
* Keep an eye on jittor
* ......
## Contact Us
## 联系我们
官方主页: http://cg.cs.tsinghua.edu.cn/jittor/
电子邮件jittor@qq.com
提出issuehttps://github.com/Jittor/jittor/issues
Website: http://cg.cs.tsinghua.edu.cn/jittor/
Email: jittor@qq.com
File an issue: https://github.com/Jittor/jittor/issues
QQ Group: 761222083
QQ 群761222083
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/news/2020-12-8-21-19-1_2_2/fig4.png" width="200"/>
## The Team
## 团队
Jittor is currently maintained by the [Tsinghua CSCG Group](https://cg.cs.tsinghua.edu.cn/). If you are also interested in Jittor and want to improve it, Please join us!
Jittor目前由[清华大学计算机图形学组](https://cg.cs.tsinghua.edu.cn/)维护。 如果您也对Jittor感兴趣并希望对其进行改进请加入我们
## Citation
## 引用
```
@article{hu2020jittor,
title={Jittor: a novel deep learning framework with meta-operators and unified graph execution},
author={Hu, Shi-Min and Liang, Dun and Yang, Guo-Ye and Yang, Guo-Wei and Zhou, Wen-Yang},
journal={Science China Information Sciences},
volume={63},
number={222103},
pages={1--21},
year={2020}
}
```
## License
## 版权声明
Jittor is Apache 2.0 licensed, as found in the LICENSE.txt file.
如LICENSE.txt文件中所示Jittor使用Apache 2.0版权协议。

20
doc/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

18
doc/build_doc.sh Executable file
View File

@ -0,0 +1,18 @@
# sudo python3.7 -m pip install \
# recommonmark \
# sphinx sphinx-autobuild sphinx_rtd_theme \
# sphinx-autobuild \
# --timeout 100
bpath=$(readlink -f "${BASH_SOURCE[0]}")
bpath=$(dirname "${bpath}")
jittor_path=$(readlink -f "${bpath}/..")
echo "[doc path] $bpath"
echo "[jittor path] $jittor_path"
export PYTHONPATH=$jittor_path/python
cd $bpath
sphinx-autobuild -b html source build -H 0.0.0.0 -p 8890

BIN
doc/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

View File

@ -0,0 +1,176 @@
Jittor性能测试与对比方法
=====================
下面代码以AlexNet为例用于演示 Jittor 性能测试的正确方法:
```python
import time
import jittor as jt
from jittor.models import resnet50
jt.flags.use_cuda = jt.has_cuda
warmup = 10
rerun = 100
batch_size = 8
data = jt.random((batch_size, 3, 224, 224))
model = resnet50()
model.eval()
# 此段代码对jittor进行热身确保时间测试准确
jt.sync_all(True)
for i in range(warmup):
pred = model(data)
# sync是把计算图发送到计算设备上
pred.sync()
# sync_all(true)是把计算图发射到计算设备上,并且同步。
# 只有运行了jt.sync_all(True)才会真正地运行时间才是有效的因此执行forward前后都要执行这句话
jt.sync_all(True)
# 开始测试运行时间
start = time.time()
for i in range(rerun):
pred = model(data)
pred.sync()
jt.sync_all(True)
end = time.time()
print("Jittor FPS:", (rerun*batch_size)/(end-start))
```
在这段代码中,我们定义了几个参数`batch_size`, `warmup`, `rerun`, batch_size代表批大小warmup是用于热身的循环次数而rerun是用于测速的循环次数最终输出FPS对Jittor进行正确测速的关键是 热身部分和同步部分热身部分确保测试时间稳定没有包含编译用的时间而同步部分确保计算完成因为jittor是一个异步框架只有同步操作能保证计算完成。
以上代码的运行结果如下RTX Titanbatch 8
```
Compiling Operators(8/8) used: 7.35s eta: 0s
Compiling Operators(13/13) used: 8.36s eta: 0s
Jittor FPS: 908.9853866375396
```
我们还可以使用类似的代码测试 PyTorch的性能
```python
import time
import torch
from torchvision.models import resnet50
warmup = 10
rerun = 100
batch_size = 8
data = torch.randn((batch_size, 3, 224, 224)).cuda()
model = resnet50()
model.cuda()
model.eval()
# 此段代码对pytorch进行热身确保时间测试准确
torch.cuda.synchronize()
for i in range(warmup):
pred = model(data)
# synchronize用于确保PyTorch计算完成
torch.cuda.synchronize()
# 开始测试运行时间
start = time.time()
for i in range(rerun):
pred = model(data)
torch.cuda.synchronize()
end = time.time()
print("PyTorch FPS:", (rerun*batch_size)/(end-start))
```
以上代码的运行结果如下RTX Titanbatch 8
```
PyTorch FPS: 807.4806873965665
```
我们还可以对这两段代码合并,并对比结果的一致性:
```python
import time
import jittor as jt
from jittor.models import resnet50
jt.flags.use_cuda = jt.has_cuda
warmup = 100
rerun = 1000
batch_size = 8
data = jt.random((batch_size, 3, 224, 224))
model = resnet50()
model.eval()
# 此段代码对jittor进行热身确保时间测试准确
jt.sync_all(True)
for i in range(warmup):
pred = model(data)
# sync是把计算图发送到计算设备上
pred.sync()
# sync_all(true)是把计算图发射到计算设备上,并且同步。
# 只有运行了jt.sync_all(True)才会真正地运行时间才是有效的因此执行forward前后都要执行这句话
jt.sync_all(True)
# 开始测试运行时间
start = time.time()
for i in range(rerun):
pred = model(data)
pred.sync()
jt.sync_all(True)
end = time.time()
print("Jittor FPS:", (rerun*batch_size)/(end-start))
# 将 jittor 数据和参数导出为 numpy 和 torch 格式
jittor_data = pred.numpy()
jittor_param = model.state_dict(to="torch")
import numpy as np
import torch
from torchvision.models import resnet50
data = torch.Tensor(data.numpy()).cuda()
model = resnet50()
# 加载 jittor 参数
model.load_state_dict(jittor_param)
model.cuda()
model.eval()
# 此段代码对pytorch进行热身确保时间测试准确
torch.cuda.synchronize()
for i in range(warmup):
pred = model(data)
# synchronize用于确保PyTorch计算完成
torch.cuda.synchronize()
# 开始测试运行时间
start = time.time()
for i in range(rerun):
pred = model(data)
torch.cuda.synchronize()
end = time.time()
print("PyTorch FPS:", (rerun*batch_size)/(end-start))
pytorch_data = pred.detach().cpu().numpy()
err = np.mean(np.abs(pytorch_data - jittor_data))
print("mean error:", err)
```
以上代码运行结果如下:
```
Jittor FPS: 908.9853866375396
PyTorch FPS: 807.4806873965665
mean error: 1e-5
```
误差输出为1e-5, 在可接受范围内。正确测速与对比的几大关键点为:
1. 充分热身,除去框架的准备时间。
2. 多次运行,确保测试时间稳定。
3. 加上同步语句,确保测试时间准确。
4. 保证显存充足在显存不足时jittor会调用统一内存来弥补会产生性能损失请密切关注`nvidia-smi`的输出结果。
5. 保证对比模型的一致性,检查输出结果的一致。
如果您对测试结果有疑问或者有优化需求欢迎随时联系Jittor开发团队。

View File

@ -0,0 +1,75 @@
Jittor显存以及内存优化方法
=====================
您可以主要通过两种方法,来改进内存消耗:
1. 优化消耗内存比较大的变量
2. 使用Jittor自动交换技术将变量在显存-内存-硬盘之间交换,降低运行部署门槛。
## 优化消耗内存比较大的变量
您可以使用jittor的memory profiler来分析显存消耗较大的代码并且针对特定代码进行优化。使用方法如下
```
net = jt.models.resnet18()
with jt.flag_scope(trace_py_var=3, profile_memory_enable=1):
imgs = jt.randn((1,3,224,224))
net(imgs).sync()
jt.get_max_memory_treemap()
```
输出如下:
```
|
├─./python/jittor/test/test_memory_profiler.py:100(test_sample)
| [19.03 MB; 29.67%]
| ./python/jittor/test/test_memory_profiler.py:100
| |
| └─./python/jittor/__init__.py:730(__call__)
| [19.03 MB; 29.67%]
| ./python/jittor/__init__.py:730
| |
| └─./python/jittor/models/resnet.py:152(execute)
| [19.03 MB; 29.67%]
| ./python/jittor/models/resnet.py:152
| |
| ├─./python/jittor/models/resnet.py:142(_forward_impl)
| | [6.13 MB; 9.55%]
| | ./python/jittor/models/resnet.py:142
| | |
```
## 使用自动交换技术
该技术确保Jittor在显存或者内存不足的情况下都能以一定速度运行。
节省内存方法请安装Jittor版本大于1.3.7.5,并添加如下环境变量:
```bash
export JT_SAVE_MEM=1
# 限制cpu最多使用16G
export cpu_mem_limit=16000000000
# 限制device内存如gpu、tpu等最多使用8G
export device_mem_limit=8000000000
# windows 用户请使用powershell
# $env:JT_SAVE_MEM="1"
# $env:cpu_mem_limit="16000000000"
# $env:device_mem_limit="8000000000"
```
用户可以自由设定cpu和设备内存的使用量如果不希望对内存进行限制可以设置为`-1`。
```bash
# 限制cpu最多使用16G
export cpu_mem_limit=-1
# 限制device内存如gpu、tpu等最多使用8G
export device_mem_limit=-1
# windows 用户请使用powershell
# $env:JT_SAVE_MEM="1"
# $env:cpu_mem_limit="-1"
# $env:device_mem_limit="-1"
```
如果想要清理磁盘交换文件,可以运行如下命令
```bash
python3 -m jittor_utils.clean_cache swap
```

View File

@ -0,0 +1,90 @@
Jittor调试技巧
=====================
该文档包含了几种异常情况的调试方法和技巧。
## 爆Nan、Inf
在模型训练的过程中可能因为数值不稳定而出现Nan或者Inf为了帮助您定位出现nan的代码您可以设置如下环境变量
```bash
export JT_CHECK_NAN=1
export trace_py_var=3
```
其中,环境变量`JT_CHECK_NAN=1`的用途是:当算子的输出出现异常浮点数时,自动报错并停止程序,环境变量`trace_py_var=3`的用途是输出算子对应的Python代码行数3代表输出的详细等级为最高等级。
需要注意的是开启这两个特性之后jittor速度会大幅下降并且触发重编译请不要在训练环境或者生产环境开启该模式也不要长时间开启该模式。
## 错误信息定位不准确
Jittor框架默认采用延迟执行Lazy execution的方式进行加速算子的执行和创建是不同步的这可能导致报错信息定位不准确您可以手动关闭延迟执行采取立刻执行eager execution的模式使用如下环境变量即可
```bash
export lazy_execution=0
```
或者在python代码中通过flag关闭
```python
jt.flags.lazy_execution=0
```
## 内存不足
当您发现Jittor由于内存相关问题无法运行时Jittor会向您报告内存使用情况内存不足可能有两种情况
1. 训练模型过大,一个迭代就崩溃报错。
2. 多次迭代的过程中,内存占用不断增长,直到最后内存耗尽报错。
**对于第一种情况** ,您可能需要调整模型或者数据大小,或者使用[多卡训练](jittor.mpi)此外您还可以在每个迭代内部让Jittor强制回收内存
```python
for ...:
...
jt.sync_all()
jt.gc()
```
如果您使用到了CUDA和卷积还有可能是卷积消耗的临时空间过大在这种情况下可以关闭cudnn的临时内存申请请将如下代码插入到最开始
```python
jt.cudnn.set_max_workspace_ratio(0.0)
```
**对于第二种情况**,可能是存在内存内存泄漏,请检查您是否存在全局变量没有释放,或者全局变量没有停止梯度,导致计算图不断增加,检查方法如下,您可以在每个迭代内部,插入如下调试代码:
```python
for ...:
...
jt.sync_all()
jt.display_memory_info()
```
Jittor会输出内存消耗以及计算图的大小`lived_var,lived_op`,以及用户持有的变量数`hold_var`, 如果计算图规模不断增大请检查代码或者提交github issue联系我们并且附上错误日志和代码复现脚本。
## 段错误
如果Jittor出现了段错误建议您将错误提交github issue联系我们并且附上错误日志和代码复现脚本。您也可以使用如下环境变量对程序以及框架进行诊断
```bash
export debug=1
export gdb_attach=1
```
其中,环境变量`debug=1`代表开启jittor的debug模式性能会大幅下降但会保留调试信息`gdb_attach=1`将会自动将gdb贴在jittor的主进程上方便您进行单步调试。关于gdb的使用您可以参考[GDB Cheat Sheet](https://darkdust.net/files/GDB%20Cheat%20Sheet.pdf)
## 管理Jittor cache
Jittor会在`/.cache/jittor`目录下创建cache cache里面可能包括 core内核、cuda编译器、cuda库、数据集dataset、预训练参数等等在某些情况下cache可能失效如系统更新、驱动更新等等这种情况可能需要用户手动清除cache 清除的方法如下:
```
python3 -m jittor_utils.clean_cache all
```
以上命令会清除jittor的所有cache如果您不想全部清除可以参考命令行帮助
```
python3 -m jittor_utils.clean_cache help
```

1
doc/source/README.cn.md Normal file
View File

@ -0,0 +1 @@
../../README.cn.md

101
doc/source/conf.py Normal file
View File

@ -0,0 +1,101 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
jittor_path = os.path.abspath('../../python')
print(f"[jittor_path] {jittor_path}")
sys.path.insert(0, jittor_path)
import jittor
# -- Project information -----------------------------------------------------
project = 'Jittor'
copyright = '2020, Jittor'
author = 'Jittor'
# The full version, including alpha/beta/rc tags
release = jittor.__version__
# fix AttributeError for "typing.get_type_hints(jt.Var)"
jittor.Var.__module__ = "jittor_core"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'zh_CN'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
# 'recommonmark',
'myst_parser',
'sphinx.ext.autodoc',
# Auto-generate section labels.
'sphinx.ext.autosectionlabel',
'sphinx.ext.viewcode',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
import sphinx_rtd_theme
html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
source_suffix = {
'.rst': 'restructuredtext',
'.txt': 'markdown',
'.md': 'markdown',
}
import recommonmark
from recommonmark.transform import AutoStructify
# At the bottom of conf.py
def setup(app):
app.add_config_value('recommonmark_config', {
# 'url_resolver': lambda url: github_doc_root + url,
'auto_toc_tree_section': 'Contents',
}, True)
app.add_transform(AutoStructify)
# Prefix document path to section labels, otherwise autogenerated labels would look like 'heading'
# rather than 'path/to/file:heading'
autosectionlabel_prefix_document = True

60
doc/source/index.rst Normal file
View File

@ -0,0 +1,60 @@
.. Jittor documentation master file, created by
sphinx-quickstart on Mon May 18 23:05:53 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
欢迎查阅计图文档
==================================
.. toctree::
:maxdepth: 2
:caption: 内容一览:
README.cn.md
.. toctree::
:maxdepth: 2
:caption: 模块API:
jittor
jittor.nn
jittor.models
jittor.optim
jittor.init
jittor.contrib
jittor.dataset
jittor.transform
jittor.mpi
jittor.linalg
jittor.console
jittor.distributions
jittor.attention
jittor.loss3d
.. toctree::
:maxdepth: 2
:caption: 计图模型库:
JDet
segmentation-jittor
InstanceSegmentation-jittor
gan-jittor
PointCloudLib
jrender
.. toctree::
:maxdepth: 1
:caption: 其他:
Jittor调试技巧
Jittor性能测试与对比方法
Jittor显存以及内存优化方法
教程 <https://cg.cs.tsinghua.edu.cn/jittor/tutorial/>
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -0,0 +1,10 @@
jittor.attention
=====================
这里是Jittor的 注意力 模块的API文档您可以通过`from jittor import attention`来获取该模块。
```eval_rst
.. automodule:: jittor.attention
:members:
:undoc-members:
```

View File

@ -0,0 +1,237 @@
jittor.console
=====================
这里是Jittor的console api文档console功能主要面向c/c++, 方便c++用户通过console使用jittorjittor console 优化了
c++数组和jittor内核之间的数据传输减少了python额外开销是通过c++使用jittor的高性能接口。
该功能要求 jittor版本大于1.2.2.17, 编译器支持c++17。
## 简单教程
我们提供了一个完整的教程,用户可以通过如下几行命令编译运行:
```bash
# 生成c++ example源代码文件
python3.7 -m jittor_utils.config --cxx-example > example.cc
# 调用g++编译example, 需要g++支持std=c++17
g++ example.cc $(python3.7 -m jittor_utils.config --include-flags --libs-flags --cxx-flags) -o example
# 运行example
./example
```
运行结果可能如下:
```bash
hello jt console
1
hello
1 2 3 4
jt.Var([[-1 5 4]
[ 3 2 1]], dtype=int32)
2 3
1 25 16
9 4 1
pred.shape 2 1000
```
用户可以打开 example.cc, 修改成所需的应用,接下来我们会为大家讲解 example.cc 中的细节。
打开example.cc, 我们可以看到如下代码:
```cpp
#include <pyjt/pyjt_console.h>
#include <iostream>
using namespace std;
int main() {
...
}
```
这里我们导入了使用 console 所需的头文件 `pyjt/pyjt_console.h`
接下来是jittor console的实例化 并且使用python的print输出hello jt console
```cpp
jittor::Console console;
// run python code in console
console.run("print('hello jt console', flush=True)");
```
输出结果:
```
hello jt console
```
注意到这里我们在 python print的时候使用了flush keyword这是为了让python的输出流和c++的输出流保持一致,
不会错乱。
接下来我们调用了 `console.set<T>(name, data)``console.get<T>(name)` 往 console 里面设置了一个int变量a并且再从console里面取出来。
```cpp
// set a python value: a = 1
console.set<int>("a", 1);
// get a python value
cout << console.get<int>("a") << endl;
```
输出结果:
```
1
```
同样的方法,我们还设置了 `string``vector<int>` 如下所示
```cpp
// set a python string
console.set<string>("b", "hello");
cout << console.get<string>("b") << endl;
// set a python array
vector<int> x{1,2,3,4};
console.set("x", x);
auto x2 = console.get<std::vector<int>>("x");
for (auto a : x2) cout << a << " "; cout << endl;
```
输出结果:
```
hello
1 2 3 4
```
我们还可以往console里面设置jittor变量这里我们使用了下面几个新的接口
1. `jittor::array<T, NDIM>(shape, data)`: 这个接口创建了一个jittor的array类型是`T` 维度大小为`NDIM` 形状为 `shape` 注意shape的长度需要和`NDIM`保持一致最后是传入的数据可以是一个vector也可以是一个指针。
2. `console.set_array(name, arr)`: 往console里面设置该jittor array 名称为`name`。
3. `console.get<T, NDIM>(name)`: 从console里取出一个jittor array类型为`T`,维度大小为`NDIM`需要注意的是类型和维度大小必须和console中的变量匹配否则会抛出异常。
4. `arr(i,j)`: 对jittor变量取值。
5. `arr.shape[i]`: 获取jittor变量的维度大小。
在这段代码中我们首先创建了一个2x3的矩阵 然后修改了矩阵中的值随即设置到了python console里面并且取出输出
```cpp
// set and get a jittor array
jittor::array<int, 2> arr2({2,3}, {6,5,4,3,2,1});
arr2(0,0) = -1;
console.set_array("arr2", arr2);
console.run("print(arr2, flush=True); arr3 = arr2**2;");
auto arr3 = console.get_array<int, 2>("arr3");
cout << arr3.shape[0] << ' ' << arr3.shape[1] << endl;
for (int i=0; i<arr3.shape[0]; i++) {
for (int j=0; j<arr3.shape[1]; j++)
cout << arr3(i,j) << ' ';
cout << endl;
}
```
输出结果如下:
```
jt.Var([[-1 5 4]
[ 3 2 1]], dtype=int32)
2 3
1 25 16
9 4 1
```
最后,我们演示了从`jittor.models`中导入`resnet`并且将结果从console中取出。
```cpp
jittor::array<float, 4> input({2, 3, 224, 224});
memset(input.data.get(), 0, input.nbyte());
console.set_array("input", input);
console.run(R"(
import jittor as jt
from jittor.models import resnet
model = resnet.resnet18()
pred = model(input)
)");
auto pred = console.get_array<float, 2>("pred");
cout << "pred.shape " << pred.shape[0] << ' ' << pred.shape[1] << endl;
```
我们输出了取出的变量的形状,结果如下:
```
pred.shape 2 1000
```
## jittor array 接口一览
`jittor::array` 是 c++和jittor console交互的 array类型他的定义如下
```cpp
// T 类型, N 维度数量
template<class T, int N>
struct array {
// N维 形状大小
int64 shape[N];
// 数据指针
unique_ptr<T[]> data;
// 是否为浮点数
bool is_float();
// 是否为无符号类型
bool is_unsigned();
// 数组总大小为shape数组累乘的结果
int64 size();
// 数组总比特数
int64 nbyte();
// 数据类型的字符串表示
string dtype();
// 维度数量, 同 N
int ndim();
// array 构造函数shape为形状数据未被初始化
array(const vector<int64>& shape);
// array 构造函数shape为形状数据从data指针拷贝初始化
array(const vector<int64>& shape, const T* data);
// array 构造函数shape为形状数据从data vector拷贝初始化
array(const vector<int64>& shape, const vector<T>& data);
T& operator()(...);
};
```
## Console 接口一览
console接口主要用于设置变量取出变量运行脚本 三部分构成。
```cpp
struct Console {
// 运行代码接口
void run(const string& src);
// 设置变量名称为s 值为data
template<class T>
void set(const string& s, const T& data);
// 获取变量名称为s
template<class T>
T get(const string& s)
// 设置 array 变量
void set_array(const string& s, const array<T,N>& data);
// 获取一个jittor array类型为`T`,维度大小为`NDIM`需要注意的是类型和维度大小必须和console中的变量匹配否则会抛出异常。
void get_array<T,N>(const string& s);
};
```
其中 `get``set` 支持常见的c++类型有:
1. int uint, int64, uint64, float, double
2. string
3. vector
4. map, unordered_map

View File

@ -0,0 +1,10 @@
jittor.contrib
=====================
这里是Jittor的贡献代码模块模块的API文档此模块的代码可能还没有完全成熟我们将在后续迭代开发中继续完善您可以通过`from jittor import contrib`来获取该模块。
```eval_rst
.. automodule:: jittor.contrib
:members:
:undoc-members:
```

View File

@ -0,0 +1,11 @@
jittor.dataset
=====================
这里是Jittor的数据集模块的API文档您可以通过`from jittor import dataset`来获取该模块。
```eval_rst
.. automodule:: jittor.dataset
:imported-members:
:members:
:undoc-members:
```

View File

@ -0,0 +1,10 @@
jittor.distributions
=====================
这里是Jittor的随机分布模块的API文档您可以通过`from jittor import distributions`来获取该模块。
```eval_rst
.. automodule:: jittor.distributions
:members:
:undoc-members:
```

10
doc/source/jittor.init.md Normal file
View File

@ -0,0 +1,10 @@
jittor.init
=====================
这里是Jittor的参数初始化模块的API文档您可以通过`from jittor import init`来获取该模块。
```eval_rst
.. automodule:: jittor.init
:members:
:undoc-members:
```

View File

@ -0,0 +1,57 @@
jittor.linalg
=====================
这里是Jittor的线性代数函数的API文档您可以通过`from jittor import linalg`来获取该模块。
## 基本函数简介
#### 基本线性代数运算API
- linalg.inv(a)
对a进行求逆运算
- linalg.pinv(a)
对a进行广义求逆运算。该运算不要求原矩阵a可逆。
- linalg.slogdet(a)
对a求取slogdet。会返回值以及符号。
- linalg.det(a)
对a求行列式。
- linalg.solve(a,b)
求解线性方程Ax=b的解。
#### 分解API
- linalg.cholesky(a)
对a进行cholesky分解。
- linalg.qr(a)
对a进行qr分解。
- linalg.svd
对a进行奇异值分解。
#### 特征值API
- linalg.eig(a)
求取a的特征值以及特征向量。
- linalg.eigh(a)
针对埃尔米特矩阵或者对称矩阵求特征值以及特征向量。
目前的linalg库支持
```eval_rst
.. automodule:: jittor.linalg
:members:
:undoc-members:
```

View File

@ -0,0 +1,10 @@
jittor.loss3d
=====================
这里是Jittor的 3d 损失函数 模块的API文档您可以通过`from jittor import loss3d`来获取该模块。
```eval_rst
.. automodule:: jittor.loss3d
:members: chamfer_loss, ChamferLoss, earth_mover_distance, EarthMoverDistance
:undoc-members:
```

54
doc/source/jittor.md Normal file
View File

@ -0,0 +1,54 @@
jittor
=====================
## jittor
这里是Jittor主模块的API文档您可以通过`import jittor`来获取该模块。
```eval_rst
.. automodule:: jittor
:members:
:undoc-members:
```
## jittor.core
以下为Jittor的内核API内核API可以通过`jittor.core.XXX`或者`jittor.XXX`直接访问。
```eval_rst
.. automodule:: jittor_core
:imported-members:
:members:
:undoc-members:
```
## jittor.ops
这里是Jittor的基础算子模块的API文档该API可以通过`jittor.ops.XXX`或者`jittor.XXX`直接访问。
```eval_rst
.. automodule:: jittor_core.ops
:members:
:undoc-members:
```
## jittor.Var
这里是Jittor的基础变量类的API文档。该API可以通过`my_jittor_var.XXX`直接访问。
```eval_rst
.. automodule:: jittor_core.Var
:members:
:undoc-members:
```
## jittor.Misc
这里是Jittor的基础算子模块的API文档该API可以通过`jittor.misc.XXX`或者`jittor.XXX`直接访问。
```eval_rst
.. automodule:: jittor.misc
:members:
:undoc-members:
```

View File

@ -0,0 +1,14 @@
jittor.models
=====================
这里是Jittor的骨干网络模块的API文档您可以通过`from jittor import models`来获取该模块。
```eval_rst
.. automodule:: jittor.models
:members:
:imported-members:
:undoc-members:
:exclude-members: ResNet,ShuffleNetV2,SqueezeNet,VGG
```

215
doc/source/jittor.mpi.md Normal file
View File

@ -0,0 +1,215 @@
jittor.mpi
=====================
计图分布式基于MPIMessage Passing Interface本文档主要阐述使用计图MPI进行多卡和分布式训练的教程。
## 计图MPI安装
计图依赖`OpenMPI`,用户可以使用如下命令安装`OpenMPI`
```bash
sudo apt install openmpi-bin openmpi-common libopenmpi-dev
```
也可以参考 [OpenMPI 文档](https://www.open-mpi.org/faq/?category=building#easy-build),自行编译安装。
计图会自动检测环境变量中是否包含`mpicc`,如果计图成功的检测到了`mpicc`,那么会输出如下信息:
```
[i 0502 14:09:55.758481 24 __init__.py:203] Found mpicc(1.10.2) at /usr/bin/mpicc
```
如果计图没有在环境变量中找到mpi用户也可以手动指定mpicc的路径告诉计图添加环境变量即可`export mpicc_path=/you/mpicc/path`
`OpenMPI`安装完成以后,用户无需修改代码,需要做的仅仅是修改启动命令行,计图就会用数据并行的方式自动完成并行操作。
```bash
# 单卡训练代码
python3.7 -m jittor.test.test_resnet
# 分布式多卡训练代码
mpirun -np 4 python3.7 -m jittor.test.test_resnet
# 指定特定显卡的多卡训练代码
CUDA_VISIBLE_DEVICES="2,3" mpirun -np 2 python3.7 -m jittor.test.test_resnet
```
这种便捷性的背后是计图的分布式算子的支撑计图支持的mpi算子后端会使用nccl进行进一步的加速。计图所有分布式算法的开发均在Python前端完成这让分布式算法的灵活度增强开发分布式算法的难度也大大降低。
## 如何从单卡代码适配多卡代码
使用`mpirun`时以下几种模块会自动检测mpi环境并且自动切换成多卡版本
* jittor.optimizer: 自动同步梯度
* jittor.nn.BatchNorm* 同步batch norm
* jittor.dataset 自动数据并行
用户在使用MPI进行分布式训练时计图内部的Dataset类会自动并行分发数据需要注意的是Dataset类中设置的Batch size是**所有节点的batch size之和**也就是总batch size 不是单个节点接收到的batch size。
大部分情况下,单卡训练的代码可以直接使用`mpirun`实现分布式多卡运行。 但仍然如下几种情况下,需要对代码进行调整:
1. 对硬盘进行写操作(保存模型,保存曲线)
2. 需要统计全局信息validation 上的全局准确率)
### 对硬盘进行写操作
对于第一点,假设原来您的代码如下:
```python
for i, (images, labels) in enumerate(dataset):
output = model(images)
loss = nn.cross_entropy_loss(output, labels)
acc1 = accuracy(output, labels)
SGD.step(loss)
loss_data = loss.data
writer.add_scalar("Train/loss")
```
更改后的代码如下:
```python
for i, (images, labels) in enumerate(dataset):
output = model(images)
loss = nn.cross_entropy_loss(output, labels)
acc1 = accuracy(output, labels)
SGD.step(loss)
loss_data = loss.data
if jt.rank == 0:
writer.add_scalar("Train/loss")
```
这里我们使用了 jt.rank 来限制,只允许第一个进程可以写 loss这个代码在单卡下也是有效的因为单卡的 jt.rank 值为 0 需要注意的是,在 `if jt.rank == 0` 代码块里面的代码不允许调用任何jittor的api因为这很有可能导致多卡之间的api调用不一致而产生**死锁**!
### 需要统计全局信息
统计全局信息有两种方法,第一种是使用提供的 mpi op 来实现全局信息统计, 如下所示, 是一个validation的代码
```python
def val(epoch):
global min_error
model.eval()
correct_nums = 0
for i, (images, labels) in enumerate(valdataset):
output = model(images)
correct_nums += top1error(output, labels)
correct_nums.sync()
top1_error = (valdataset.total_len - correct_nums.data[0]) / valdataset.total_len
if top1_error < min_error:
print("[*] Best model is updated ...")
model.save('model_best.pkl')
```
更改方案如下:
```python
def val(epoch):
global min_error
model.eval()
correct_nums = 0
for i, (images, labels) in enumerate(valdataset):
output = model(images)
correct_nums += top1error(output, labels)
correct_nums.sync()
if jt.in_mpi:
correct_nums = correct_nums.mpi_all_reduce()
top1_error = (valdataset.total_len - correct_nums.data[0]) / valdataset.total_len
if jt.rank == 0 and top1_error < min_error:
print("[*] Best model is updated ...")
model.save('model_best.pkl')
```
可以留意到我们首先使用了 `mpi_all_reduce` 来统计多卡的正确数量(mpi_all_reduce会将多个mpi进程的结果累加起来) 然后在 `jt.rank == 0` 的情况下才更新模型。
第二种方法是使用`@jt.single_process_scope()`,被装饰的代码会直接以单进程的方式执行,无需处理多卡。
```python
@jt.single_process_scope()
def val(epoch):
......
```
## MPI接口
下面是 jittor 的 mpi api reference.
目前MPI开放接口如下
* `jt.in_mpi`: 当计图不在MPI环境下时`jt.mpi == False` 用户可以用这个判断是否在mpi环境下。
* `jt.world_size`: 获取当前进程总数量如果没有用mpi则为1。
* `jt.rank`: 获取当前进程的编号,区间为`0 jt.world_size-1` 如果没有用mpi则为0。
* `jt.mpi`: 计图的MPI模块。
* `jt.Module.mpi_param_broadcast(root=0)`: 将模块的参数从root节点广播给其他节点。
* `jt.mpi.mpi_reduce(x, op='add', root=0)`: 将所有节点的变量x使用算子opreduce到root节点。如果op是'add'或者'sum'该接口会把所有变量求和如果op是'mean',该接口会取均值。
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/tutorial/2020-5-2-16-44-distributed/mpi_reduce.png">
* `jt.mpi.mpi_broadcast(x, root=0)`: 将变量x从root节点广播到所有节点。
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/tutorial/2020-5-2-16-44-distributed/mpi_broadcast.png">
* `jt.mpi.mpi_all_reduce(x, op='add')`: 将所有节点的变量x使用一起reduce并且吧reduce的结果再次广播到所有节点。如果op是'add'或者'sum'该接口会把所有变量求和如果op是'mean',该接口会取均值。
<img src="https://cg.cs.tsinghua.edu.cn/jittor/images/tutorial/2020-5-2-16-44-distributed/mpi_all_reduce.png">
```eval_rst
.. automodule:: jittor_mpi_core
:members:
:undoc-members:
.. automodule:: jittor_mpi_core.ops
:members:
:undoc-members:
```
## 实例MPI实现分布式同步批归一化层
下面的代码是使用计图实现分布式同步批归一化层的实例代码在原来批归一化层的基础上只需增加三行代码就可以实现分布式的batch norm添加的代码如下
```python
# 将均值和方差通过all reduce同步到所有节点
if self.sync and jt.mpi:
xmean = xmean.mpi_all_reduce("mean")
x2mean = x2mean.mpi_all_reduce("mean")
```
> 注:计图内部已经实现了同步的批归一化层,用户不需要自己实现
分布式同步批归一化层的完整代码:
```python
class BatchNorm(Module):
def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True, sync=True):
assert affine == None
self.sync = sync
self.num_features = num_features
self.is_train = is_train
self.eps = eps
self.momentum = momentum
self.weight = init.constant((num_features,), "float32", 1.0)
self.bias = init.constant((num_features,), "float32", 0.0)
self.running_mean = init.constant((num_features,), "float32", 0.0).stop_grad()
self.running_var = init.constant((num_features,), "float32", 1.0).stop_grad()
def execute(self, x):
if self.is_train:
xmean = jt.mean(x, dims=[0,2,3], keepdims=1)
x2mean = jt.mean(x*x, dims=[0,2,3], keepdims=1)
# 将均值和方差通过all reduce同步到所有节点
if self.sync and jt.mpi:
xmean = xmean.mpi_all_reduce("mean")
x2mean = x2mean.mpi_all_reduce("mean")
xvar = x2mean-xmean*xmean
norm_x = (x-xmean)/jt.sqrt(xvar+self.eps)
self.running_mean += (xmean.sum([0,2,3])-self.running_mean)*self.momentum
self.running_var += (xvar.sum([0,2,3])-self.running_var)*self.momentum
else:
running_mean = self.running_mean.broadcast(x, [0,2,3])
running_var = self.running_var.broadcast(x, [0,2,3])
norm_x = (x-running_mean)/jt.sqrt(running_var+self.eps)
w = self.weight.broadcast(x, [0,2,3])
b = self.bias.broadcast(x, [0,2,3])
return norm_x * w + b
```

24
doc/source/jittor.nn.md Normal file
View File

@ -0,0 +1,24 @@
jittor.nn
=====================
这里是Jittor的神经网络模块的API文档您可以通过`from jittor import nn`来获取该模块。
```eval_rst
.. automodule:: jittor.nn
:members:
:undoc-members:
.. automodule:: jittor.nn
:imported-members:
:members: Pool, pool, AdaptiveAvgPool2d, Pool3d, AdaptiveMaxPool2d, AdaptiveAvgPool3d, AdaptiveMaxPool2d, pool3d, AvgPool2d, AvgPool3d, avg_pool2d, MaxPool2d, MaxPool3d, max_pool2d, max_pool3d, MaxUnpool2d, MaxUnpool3d
:undoc-members:
.. autoclass:: jittor.nn.ReLU
:members:
.. autoclass:: jittor.nn.ReLU6
:members:
.. autoclass:: jittor.nn.LeakyReLU
:members:
.. autoclass:: jittor.nn.Softmax
:members:
```

View File

@ -0,0 +1,18 @@
jittor.optim
=====================
这里是Jittor的优化器模块的API文档您可以通过`from jittor import optim`来获取该模块。
```eval_rst
.. automodule:: jittor.optim
:members:
:undoc-members:
```
以下是Jittor的学习率调度模块的API文档学习率调度模块需要配合优化器使用您可以通过`from jittor import lr_scheduler`来获取该模块。
```eval_rst
.. automodule:: jittor.lr_scheduler
:members:
:undoc-members:
```

View File

@ -0,0 +1,10 @@
jittor.transform
=====================
这里是Jittor的 数据变换 模块的API文档您可以通过`from jittor import transform`来获取该模块。
```eval_rst
.. automodule:: jittor.transform
:members:
:undoc-members:
```

12
doc/source/todo.md Normal file
View File

@ -0,0 +1,12 @@
TODO
=====================
## 文档相关
* 文档语法规范
* 文档加上教程链接
* MPI接口文档
* 文档自动更新
* 首页到文档的链接
* 模型库的文档GANsegmentationdetection...
* 文档补全重要的类加上使用example

2179
python/jittor/__init__.py Normal file

File diff suppressed because it is too large Load Diff

7995
python/jittor/__init__.pyi Normal file

File diff suppressed because it is too large Load Diff

176
python/jittor/attention.py Normal file
View File

@ -0,0 +1,176 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Guowei Yang <471184555@qq.com>
# Dun Liang <randonlang@gmail.com>.
#
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
from jittor import init, Module, nn
import numpy as np
import math
class MultiheadAttention(Module):
def __init__(
self,
embed_dim,
num_heads,
kdim=None,
vdim=None,
dropout=0.0,
bias=True,
add_bias_kv=False,
add_zero_attn=False,
self_attention=False,
encoder_decoder_attention=False,
q_noise=0.0,
qn_block_size=8,
):
super().__init__()
self.embed_dim = embed_dim
self.kdim = kdim if kdim is not None else embed_dim
self.vdim = vdim if vdim is not None else embed_dim
self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
self.num_heads = num_heads
assert dropout==0, "TODO: dropout>0"
self.head_dim = embed_dim // num_heads
assert (self.head_dim * num_heads == self.embed_dim), "embed_dim must be divisible by num_heads"
self.scaling = self.head_dim ** -0.5
self.self_attention = self_attention
self.encoder_decoder_attention = encoder_decoder_attention
assert not self.self_attention or self.qkv_same_dim, ("Self-attention requires query, key and " "value to be of the same size")
#TODO: quant_noise
self.k_proj = nn.Linear(self.kdim, embed_dim, bias=bias)
self.v_proj = nn.Linear(self.vdim, embed_dim, bias=bias)
self.q_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
assert not add_bias_kv, "TODO: add_bias_kv=True"
self.bias_k = self.bias_v = None
self.add_zero_attn = add_zero_attn
self.reset_parameters()
self.onnx_trace = False
self.tpu = False
def reset_parameters(self):
if self.qkv_same_dim:
# Empirically observed the convergence to be much better with
# the scaled initialization
init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
else:
init.xavier_uniform_(self.k_proj.weight)
init.xavier_uniform_(self.v_proj.weight)
init.xavier_uniform_(self.q_proj.weight)
# init.xavier_uniform_(self.out_proj.weight)
if self.out_proj.bias is not None:
init.constant_(self.out_proj.bias, 0.)
if self.bias_k is not None:
init.xavier_normal_(self.bias_k)
if self.bias_v is not None:
init.xavier_normal_(self.bias_v)
def execute(
self,
query,
key = None,
value = None,
key_padding_mask = None,
incremental_state = None,
need_weights = True,
static_kv = False,
attn_mask = None,
before_softmax = False,
need_head_weights = False,
):
if need_head_weights:
need_weights = True
tgt_len, bsz, embed_dim = query.shape
assert embed_dim == self.embed_dim
assert list(query.shape) == [tgt_len, bsz, embed_dim]
assert incremental_state is None, "TODO: incremental_state is not None"
saved_state = None
if self.self_attention:
q = self.q_proj(query)
k = self.k_proj(query)
v = self.v_proj(query)
elif self.encoder_decoder_attention:
# encoder-decoder attention
q = self.q_proj(query)
if key is None:
assert value is None
k = v = None
else:
k = self.k_proj(key)
v = self.v_proj(key)
else:
assert key is not None and value is not None
q = self.q_proj(query)
k = self.k_proj(key)
v = self.v_proj(value)
q = q*self.scaling
assert self.bias_k is None, "TODO: self.bias_k is not None:"
q = q.view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(1, 0, 2)
if k is not None:
k = k.view(-1, bsz * self.num_heads, self.head_dim).transpose(1, 0, 2)
if v is not None:
v = v.view(-1, bsz * self.num_heads, self.head_dim).transpose(1, 0, 2)
assert saved_state is None, "TODO: saved_state is not None"
assert k is not None
src_len = k.shape[1]
assert key_padding_mask is None, "TODO: key_padding_mask is not None"
assert not self.add_zero_attn, "TODO: self.add_zero_attn=True"
attn_weights = nn.bmm(q, k.transpose(0, 2, 1))
assert list(attn_weights.shape) == [bsz * self.num_heads, tgt_len, src_len]
assert attn_mask is None, "TODO: attn_mask is not None"
assert key_padding_mask is None, "TODO: key_padding_mask is not None"
if before_softmax:
return attn_weights, v
attn_weights_float = nn.softmax(attn_weights, dim=-1)
attn_weights = attn_weights_float.type_as(attn_weights)
assert v is not None
attn = nn.bmm(attn_weights, v)
assert list(attn.shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
if self.onnx_trace and attn.shape[1] == 1:
# when ONNX tracing a single decoder step (sequence length == 1)
# the transpose is a no-op copy before view, thus unnecessary
attn = attn.view(tgt_len, bsz, embed_dim)
else:
attn = attn.transpose(1, 0, 2).view(tgt_len, bsz, embed_dim)
attn = self.out_proj(attn)
attn_weights = None
if need_weights:
attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0, 2, 3)
if not need_head_weights:
# average attention weights over heads
attn_weights = attn_weights.mean(dims=[0])
return attn, attn_weights

View File

@ -0,0 +1,3 @@
from .ccl_2d import ccl_2d
from .ccl_3d import ccl_3d
from .ccl_link import ccl_link

177
python/jittor/ccl/ccl_2d.py Normal file
View File

@ -0,0 +1,177 @@
import jittor as jt
def ccl_2d(data_2d):
'''
2D connected component labelling, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm
Args:
[in]param data_2d: binary two-dimensional vector
type data_2d: jittor array
Returns:
[out]result: labeled two-dimensional vector
Example:
>>> import jittor as jt
>>> jt.flags.use_cuda = 1
>>> import cv2
>>> import numpy as np
>>> img = cv2.imread('testImg.png', 0)
>>> a = img.mean()
>>> img[img <= a] = 0
>>> img[img > a] = 1
>>> img = jt.Var(img)
>>> result = ccl_2d(img)
>>> print(jt.unique(result, return_counts=True, return_inverse=True)[0], jt.unique(result, return_counts=True, return_inverse=True)[2])
>>> cv2.imwrite('testImg_result.png', result.numpy().astype(np.uint8) * 50)
'''
data_2d = data_2d.astype(jt.uint32)
cY = data_2d.shape[0]
cX = data_2d.shape[1]
data_2d_copy = data_2d.clone()
changed = jt.ones([1], dtype=jt.uint32)
data_2d = data_2d.reshape(cX * cY)
result = jt.code(data_2d.shape,
data_2d.dtype, [data_2d, changed],
cuda_header='''
@alias(g_image, in0)
@alias(g_labels, out)
''',
cuda_src=r'''
__global__ void init_labels(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
@g_labels(iy*cX + ix) = iy*cX + ix;
}
__device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
// Resolve Label
unsigned int next = @g_labels(label);
// Follow chain
while(label != next) {
// Move to next
label = next;
next = @g_labels(label);
}
// Return label
return label;
}
__global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int id = ((blockIdx.y * blockDim.y) + threadIdx.y) * cX +
((blockIdx.x * blockDim.x) + threadIdx.x);
// Check Thread Range
if(id < cX*cY) {
// Resolve Label
@g_labels(id) = find_root(@ARGS, @g_labels(id));
}
}
__global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
// Check Thread Range
if((ix < cX) && (iy < cY)) {
// Get image and label values
const unsigned char cyx = @g_image( iy*cX + ix);
// Get neighbour labels
const unsigned int lym1x = (iy > 0) ? @g_labels((iy-1)*cX + ix) : 0;
const unsigned int lyxm1 = (ix > 0) ? @g_labels(iy *cX + ix-1) : 0;
const unsigned int lyx = @g_labels(iy *cX + ix);
const unsigned int lyxp1 = (ix < cX-1) ? @g_labels(iy *cX + ix+1) : 0;
const unsigned int lyp1x = (iy < cY-1) ? @g_labels((iy+1)*cX + ix) : 0;
const unsigned int lym1xm1 = (iy > 0 && ix > 0 ) ? @g_labels((iy-1)*cX + ix-1) : 0;
const unsigned int lym1xp1 = (iy > 0 && ix < cX-1) ? @g_labels((iy-1)*cX + ix+1) : 0;
const unsigned int lyp1xm1 = (iy < cY-1 && ix > 0 ) ? @g_labels((iy+1)*cX + ix-1) : 0;
const unsigned int lyp1xp1 = (iy < cY-1 && ix < cX-1) ? @g_labels((iy+1)*cX + ix+1) : 0;
const bool nym1x = (iy > 0) ? (cyx == (@g_image((iy-1)*cX + ix))) : false;
const bool nyxm1 = (ix > 0) ? (cyx == (@g_image(iy *cX + ix-1))) : false;
const bool nyxp1 = (ix < cX-1) ? (cyx == (@g_image(iy *cX + ix+1))) : false;
const bool nyp1x = (iy > cY-1) ? (cyx == (@g_image((iy+1)*cX + ix))) : false;
const bool nym1xm1 = (iy > 0 && ix > 0 ) ? (cyx == (@g_image((iy-1)*cX + ix-1))) : false;
const bool nym1xp1 = (iy > 0 && ix < cX-1) ? (cyx == (@g_image((iy-1)*cX + ix+1))) : false;
const bool nyp1xm1 = (iy < cY-1 && ix > 0 ) ? (cyx == (@g_image((iy+1)*cX + ix-1))) : false;
const bool nyp1xp1 = (iy < cY-1 && ix < cX-1) ? (cyx == (@g_image((iy+1)*cX + ix+1))) : false;
// Lowest label
unsigned int label = lyx;
// Find lowest neighbouring label
label = ((nym1x) && (lym1x < label)) ? lym1x : label;
label = ((nyxm1) && (lyxm1 < label)) ? lyxm1 : label;
label = ((nyxp1) && (lyxp1 < label)) ? lyxp1 : label;
label = ((nyp1x) && (lyp1x < label)) ? lyp1x : label;
label = ((nym1xm1) && (lym1xm1 < label)) ? lym1xm1 : label;
label = ((nym1xp1) && (lym1xp1 < label)) ? lym1xp1 : label;
label = ((nyp1xm1) && (lyp1xm1 < label)) ? lyp1xm1 : label;
label = ((nyp1xp1) && (lyp1xp1 < label)) ? lyp1xp1 : label;
// If labels are different, resolve them
if(label < lyx) {
// Update label
// Nonatomic write may overwrite another label but on average seems to give faster results
@g_labels(lyx) = label;
// Record the change
@in1(0) = 1;
}
}
}
''' + f'''
dim3 block(32, 32);
const int cX= {cX};
const int cY= {cY};''' + '''
dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y));
dim3 resolve_block(32, 32);
dim3 resolve_grid(ceil(cX/(float)resolve_block.x), ceil(cY/(float)resolve_block.y));
// Initialise labels
init_labels <<< grid, block >>>(@ARGS, cX, cY);
// Resolve the labels
resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
// Changed Flag
int32 changed = 1;
// While labels have changed
while(changed) {
// Copy changed to device
cudaMemsetAsync(in1_p, 0, 4);
// Label image
label_equivalence <<< grid, block >>>(@ARGS, cX, cY);
// Copy changed flag to host
cudaMemcpy(&changed, in1_p, sizeof(int32), cudaMemcpyDeviceToHost);
// Resolve the labels
resolve_labels <<< resolve_grid, resolve_block>>>(@ARGS, cX, cY);
}
''')
result = result.reshape((cY, cX)) * data_2d_copy
value = jt.unique(result)
value = value[value != 0]
map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
map_result[value] = jt.index(value.shape)[0] + 1
result = map_result[result]
return result

196
python/jittor/ccl/ccl_3d.py Normal file
View File

@ -0,0 +1,196 @@
import jittor as jt
def ccl_3d(data_3d):
'''
3D connected component labelling, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm
Args:
[in]param data_3d: binary three-dimensional vector
type data_3d: jittor array
Returns:
[out]result : labeled three-dimensional vector
Example:
>>> import jittor as jt
>>> jt.flags.use_cuda = 1
>>> data_3d = jt.zeros((10, 11, 12), dtype=jt.uint32)
>>> data_3d[2:4, :, :] = 1
>>> data_3d[5:7, :, :] = 1
>>> result = ccl_3d(data_3d)
>>> print(result[:, 0, 0])
>>> print(
jt.unique(result, return_counts=True, return_inverse=True)[0],
jt.unique(result, return_counts=True, return_inverse=True)[2])
'''
data_3d = data_3d.astype(jt.uint32)
cX = data_3d.shape[0]
cY = data_3d.shape[1]
cZ = data_3d.shape[2]
changed = jt.ones([1], dtype=jt.uint32)
data_3d_copy = data_3d.copy()
data_3d = data_3d.reshape(cX * cY * cZ)
result = jt.code(data_3d.shape,
data_3d.dtype, [data_3d, changed],
cuda_header='''
@alias(g_image, in0)
@alias(g_labels, out)
''',
cuda_src=r'''
__global__ void init_labels(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
const unsigned int iz = (blockIdx.z * blockDim.z) + threadIdx.z;
if((ix < cX) && (iy < cY) && (iz < cZ)) {
const unsigned char pzyx = @g_image(iz*pY + iy*pX + ix);
// Neighbour Connections
const bool nzm1yx = (iz > 0) ? (pzyx == @g_image((iz-1)*pY + iy *pX + ix )) : false;
const bool nzym1x = (iy > 0) ? (pzyx == @g_image( iz *pY + (iy-1)*pX + ix )) : false;
const bool nzyxm1 = (ix > 0) ? (pzyx == @g_image( iz *pY + iy *pX + ix-1)) : false;
// Label
unsigned int label;
// Initialise Label
label = (nzyxm1) ? ( iz*pY + iy*pX + ix-1) : (iz*pY + iy*pX + ix);
label = (nzym1x) ? ( iz*pY + (iy-1)*pX + ix) : label;
label = (nzm1yx) ? ((iz-1)*pY + iy*pX + ix) : label;
// Write to Global Memory
@g_labels(iz*pY + iy*pX + ix) = label;
}
}
__device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
// Resolve Label
unsigned int next = @g_labels(label);
// Follow chain
while(label != next) {
// Move to next
label = next;
next = @g_labels(label);
}
// Return label
return label;
}
__global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
@PRECALC
// Calculate index
const unsigned int id = ((blockIdx.z * blockDim.z) + threadIdx.z) * pY +
((blockIdx.y * blockDim.y) + threadIdx.y) * pX +
((blockIdx.x * blockDim.x) + threadIdx.x);
// Check Thread Range
if(id < cX*cY*cZ) {
// Resolve Label
@g_labels(id) = find_root(@ARGS, @g_labels(id));
}
}
__global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
const unsigned int iz = (blockIdx.z * blockDim.z) + threadIdx.z;
// Check Thread Range
if((ix < cX) && (iy < cY) && (iz < cZ)) {
// Get image and label values
const unsigned char pzyx = @g_image(iz*pY + iy*pX + ix);
// Neighbouring indexes
const unsigned int xm1 = ix-1;
const unsigned int xp1 = ix+1;
const unsigned int ym1 = iy-1;
const unsigned int yp1 = iy+1;
const unsigned int zm1 = iz-1;
const unsigned int zp1 = iz+1;
// Get neighbour labels
const unsigned int lzm1yx = (iz > 0) ? @g_labels(zm1*pY + iy*pX + ix) : 0;
const unsigned int lzym1x = (iy > 0) ? @g_labels( iz*pY + ym1*pX + ix) : 0;
const unsigned int lzyxm1 = (ix > 0) ? @g_labels( iz*pY + iy*pX + xm1) : 0;
const unsigned int lzyx = @g_labels( iz*pY + iy*pX + ix);
const unsigned int lzyxp1 = (ix < cX-1) ? @g_labels( iz*pY + iy*pX + xp1) : 0;
const unsigned int lzyp1x = (iy < cY-1) ? @g_labels( iz*pY + yp1*pX + ix) : 0;
const unsigned int lzp1yx = (iz < cZ-1) ? @g_labels(zp1*pY + iy*pX + ix) : 0;
const bool nzm1yx = (iz > 0) ? (pzyx == @g_image(zm1*pY + iy*pX + ix)) : false;
const bool nzym1x = (iy > 0) ? (pzyx == @g_image( iz*pY + ym1*pX + ix)) : false;
const bool nzyxm1 = (ix > 0) ? (pzyx == @g_image( iz*pY + iy*pX + xm1)) : false;
const bool nzyxp1 = (ix < cX-1) ? (pzyx == @g_image( iz*pY + iy*pX + xp1)) : false;
const bool nzyp1x = (iy < cY-1) ? (pzyx == @g_image( iz*pY + yp1*pX + ix)) : false;
const bool nzp1yx = (iz < cZ-1) ? (pzyx == @g_image(zp1*pY + iy*pX + ix)) : false;
// Lowest label
unsigned int label = lzyx;
// Find lowest neighbouring label
label = ((nzm1yx) && (lzm1yx < label)) ? lzm1yx : label;
label = ((nzym1x) && (lzym1x < label)) ? lzym1x : label;
label = ((nzyxm1) && (lzyxm1 < label)) ? lzyxm1 : label;
label = ((nzyxp1) && (lzyxp1 < label)) ? lzyxp1 : label;
label = ((nzyp1x) && (lzyp1x < label)) ? lzyp1x : label;
label = ((nzp1yx) && (lzp1yx < label)) ? lzp1yx : label;
// If labels are different, resolve them
if(label < lzyx) {
// Update label
// Nonatomic write may overwrite another label but on average seems to give faster results
@g_labels(lzyx) = label;
// Record the change
@in1(0) = 1;
}
}
}
''' + f'''
dim3 block(32, 4, 4);
const int cX= {cX};
const int cY= {cY};
const int cZ= {cZ};
const int pX= cX;
const int pY= cX*cY;''' + '''
dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y), ceil(cZ/(float)block.z));
// Initialise labels
init_labels <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
// Resolve the labels
resolve_labels <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
// Changed Flag
int32 changed = 1;
// While labels have changed
while(changed) {
// Copy changed to device
cudaMemsetAsync(in1_p, 0, 4);
// Label image
label_equivalence <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
// Copy changed flag to host
cudaMemcpy(&changed, in1_p, sizeof(int32), cudaMemcpyDeviceToHost);
// Resolve the labels
resolve_labels <<< grid, block>>>(@ARGS, cX, cY, cZ, pX, pY);
}
''')
result = result.reshape((cX, cY, cZ)) * data_3d_copy
value = jt.unique(result)
value = value[value != 0]
map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
map_result[value] = jt.index(value.shape)[0] + 1
result = map_result[result]
return result

View File

@ -0,0 +1,195 @@
import jittor as jt
def ccl_link(score_map, link_map, result_comp_area_thresh=6):
"""
Find components in score map and link them with link map, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm.
Args:
[in]param score_map: binary two-dimensional vector
type score_map: jittor array
[in]param link_map: two-dimensional vector with 8 channels
type link_map: jittor array
[in]param result_comp_area_thresh: threshold of component area
type result_comp_area_thresh: int
Returns:
[out]result: labeled two-dimensional vector
Example:
>>> import jittor as jt
>>> jt.flags.use_cuda = 1
>>> import cv2
>>> import numpy as np
>>> score_map = jt.Var(np.load("score_map.npy"))
>>> link_map = jt.Var(np.load("link_map.npy"))
>>> score_map = score_map >= 0.5
>>> link_map = link_map >= 0.8
>>> for i in range(8):
>>> link_map[:, :, i] = link_map[:, :, i] & score_map
>>> result = ccl_link(score_map, link_map)
>>> cv2.imwrite('pixellink.png', result.numpy().astype(np.uint8) * 50)
"""
score_map = score_map.astype(jt.uint32)
link_map = link_map.astype(jt.uint32)
cY = score_map.shape[0]
cX = score_map.shape[1]
changed = jt.ones([1], dtype=jt.uint32)
score_map = score_map.reshape(cX * cY)
result = jt.code(score_map.shape,
score_map.dtype, [score_map, link_map, changed],
cuda_header='''
@alias(score_map, in0)
@alias(link_map, in1)
@alias(g_labels, out)
''',
cuda_src=r'''
__global__ void init_labels(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
@g_labels(iy*cX + ix) = iy*cX + ix;
}
__device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
// Resolve Label
unsigned int next = @g_labels(label);
// Follow chain
while(label != next) {
// Move to next
label = next;
next = @g_labels(label);
}
// Return label
return label;
}
__global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int id = ((blockIdx.y * blockDim.y) + threadIdx.y) * cX +
((blockIdx.x * blockDim.x) + threadIdx.x);
// Check Thread Range
if(id < cX*cY) {
// Resolve Label
@g_labels(id) = find_root(@ARGS, @g_labels(id));
}
}
__global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY) {
@PRECALC
// Calculate index
const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
// Check Thread Range
if((ix < cX) && (iy < cY)) {
// Get image and label values
const unsigned char cyx = @score_map( iy*cX + ix);
// Get neighbour labels
const unsigned int lym1x = (iy > 0) ? @g_labels((iy-1)*cX + ix) : 0;
const unsigned int lyxm1 = (ix > 0) ? @g_labels(iy *cX + ix-1) : 0;
const unsigned int lyx = @g_labels(iy *cX + ix);
const unsigned int lyxp1 = (ix < cX-1) ? @g_labels(iy *cX + ix+1) : 0;
const unsigned int lyp1x = (iy < cY-1) ? @g_labels((iy+1)*cX + ix) : 0;
const unsigned int lym1xm1 = (iy > 0 && ix > 0 ) ? @g_labels((iy-1)*cX + ix-1) : 0;
const unsigned int lym1xp1 = (iy > 0 && ix < cX-1) ? @g_labels((iy-1)*cX + ix+1) : 0;
const unsigned int lyp1xm1 = (iy < cY-1 && ix > 0 ) ? @g_labels((iy+1)*cX + ix-1) : 0;
const unsigned int lyp1xp1 = (iy < cY-1 && ix < cX-1) ? @g_labels((iy+1)*cX + ix+1) : 0;
bool nym1x, nyxm1, nyxp1, nyp1x, nym1xm1, nym1xp1, nyp1xm1, nyp1xp1;
if(cyx) {
nym1x = (iy > 0) ? ((cyx == (@score_map((iy-1)*cX + ix))) && (@link_map(iy, ix, 6) || @link_map(iy-1, ix, 7))) : false; // up
nyxm1 = (ix > 0) ? ((cyx == (@score_map(iy *cX + ix-1))) && (@link_map(iy, ix, 0) || @link_map(iy-1, ix-1, 3))) : false; // left
nyxp1 = (ix < cX-1) ? ((cyx == (@score_map(iy *cX + ix+1))) && (@link_map(iy, ix, 3) || @link_map(iy, ix+1, 0))) : false; // right
nyp1x = (iy > cY-1) ? ((cyx == (@score_map((iy+1)*cX + ix))) && (@link_map(iy, ix, 7) || @link_map(iy+1, ix, 6))) : false; // down
nym1xm1 = (iy > 0 && ix > 0 ) ? ((cyx == (@score_map((iy-1)*cX + ix-1))) && (@link_map(iy, ix, 2) || @link_map(iy-1, ix-1, 4))) : false; // up-left
nym1xp1 = (iy > 0 && ix < cX-1) ? ((cyx == (@score_map((iy-1)*cX + ix+1))) && (@link_map(iy, ix, 5) || @link_map(iy-1, ix+1, 1))) : false; // up-right
nyp1xm1 = (iy < cY-1 && ix > 0 ) ? ((cyx == (@score_map((iy+1)*cX + ix-1))) && (@link_map(iy, ix, 1) || @link_map(iy+1, ix-1, 5))) : false; // down-left
nyp1xp1 = (iy < cY-1 && ix < cX-1) ? ((cyx == (@score_map((iy+1)*cX + ix+1))) && (@link_map(iy, ix, 4) || @link_map(iy+1, ix+1, 2))) : false; // down-right
}
else {
nym1x = (iy > 0) ? (cyx == (@score_map((iy-1)*cX + ix))) : false; // up
nyxm1 = (ix > 0) ? (cyx == (@score_map(iy *cX + ix-1))) : false; // left
nyxp1 = (ix < cX-1) ? (cyx == (@score_map(iy *cX + ix+1))) : false; // right
nyp1x = (iy > cY-1) ? (cyx == (@score_map((iy+1)*cX + ix))) : false; // down
nym1xm1 = (iy > 0 && ix > 0 ) ? (cyx == (@score_map((iy-1)*cX + ix-1))) : false; // up-left
nym1xp1 = (iy > 0 && ix < cX-1) ? (cyx == (@score_map((iy-1)*cX + ix+1))) : false; // up-right
nyp1xm1 = (iy < cY-1 && ix > 0 ) ? (cyx == (@score_map((iy+1)*cX + ix-1))) : false; // down-left
nyp1xp1 = (iy < cY-1 && ix < cX-1) ? (cyx == (@score_map((iy+1)*cX + ix+1))) : false; // down-right
}
// Lowest label
unsigned int label = lyx;
// Find lowest neighbouring label
label = ((nym1x) && (lym1x < label)) ? lym1x : label;
label = ((nyxm1) && (lyxm1 < label)) ? lyxm1 : label;
label = ((nyxp1) && (lyxp1 < label)) ? lyxp1 : label;
label = ((nyp1x) && (lyp1x < label)) ? lyp1x : label;
label = ((nym1xm1) && (lym1xm1 < label)) ? lym1xm1 : label;
label = ((nym1xp1) && (lym1xp1 < label)) ? lym1xp1 : label;
label = ((nyp1xm1) && (lyp1xm1 < label)) ? lyp1xm1 : label;
label = ((nyp1xp1) && (lyp1xp1 < label)) ? lyp1xp1 : label;
// If labels are different, resolve them
if(label < lyx) {
// Update label
// Nonatomic write may overwrite another label but on average seems to give faster results
@g_labels(lyx) = label;
// Record the change
@in2(0) = 1;
}
}
}
''' + f'''
dim3 block(32, 32);
const int cX= {cX};
const int cY= {cY};''' + '''
dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y));
dim3 resolve_block(32, 32);
dim3 resolve_grid(ceil(cX/(float)resolve_block.x), ceil(cY/(float)resolve_block.y));
// Initialise labels
init_labels <<< grid, block >>>(@ARGS, cX, cY);
// Resolve the labels
resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
// Changed Flag
int32 changed = 1;
// While labels have changed
while(changed) {
// Copy changed to device
cudaMemsetAsync(in2_p, 0, 4);
// Label image
label_equivalence <<< grid, block >>>(@ARGS, cX, cY);
// Copy changed flag to host
cudaMemcpy(&changed, in2_p, sizeof(int32), cudaMemcpyDeviceToHost);
// Resolve the labels
resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
}
''')
result = result.reshape((cY, cX))
value, _, cnt = jt.unique(result, return_inverse=True, return_counts=True)
value = (cnt > result_comp_area_thresh) * value
value = value[value != 0]
map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
map_result[value] = jt.index(value.shape)[0] + 1
result = map_result[result]
return result

View File

@ -0,0 +1,430 @@
# import os
# os.environ["FIX_TORCH_ERROR"] = "0"
# import jittor as jt
# from jittor import *
# from typing import Tuple
# org_int = int = type(1)
# org_float = float = type(1.0)
# org_bool = bool = type(True)
# import jtorch.compiler
# import jtorch_core
# from jtorch_core import *
# device.__reduce__ = lambda self: (device, (self.type,))
# device.__module__ = "jtorch"
# jt.jittor_core.device = device
# def handle_dtype(args, kw, dtype):
# def convert(x):
# if isinstance(x, jt.Var):
# return x.cast(dtype)
# return x
# if dtype is not None:
# if args is not None:
# if isinstance(args, (tuple,list)):
# args = [ convert(a) for a in args ]
# else:
# args = convert(x)
# if kw is not None:
# kw = { k:convert(v) for k,v in kw.items() }
# return args, kw
# def get_args_names(func):
# import inspect
# spec = inspect.getfullargspec(func)
# return spec[0] + spec[4]
# def wrapper(func):
# has_dtype = False
# if hasattr(func, "__code__"):
# has_dtype = "dtype" in get_args_names(func)
# def inner(*args, **kw):
# requires_grad = None
# dtype = None
# if "requires_grad" in kw:
# requires_grad = kw["requires_grad"]
# del kw["requires_grad"]
# if not has_dtype and "dtype" in kw:
# dtype = kw["dtype"]
# del kw["dtype"]
# if "device" in kw:
# del kw["device"]
# if 'pin_memory' in kw:
# del kw['pin_memory']
# args, kw = handle_dtype(args, kw, dtype)
# ret = func(*args, **kw)
# if isinstance(ret, jt.Var):
# if requires_grad is not None:
# ret.requires_grad = requires_grad
# if dtype is not None:
# ret.astype(dtype)
# return ret
# return inner
# import inspect
# _wrapper_keys = set(["shape", "start", "size"])
# _wrapper_keys.add("x")
# for k,v in list(globals().items()):
# if callable(v) and not isinstance(v, type):
# try:
# spec = inspect.getfullargspec(v)
# args_name = spec[0]
# if len(args_name) and args_name[0] in _wrapper_keys:
# globals()[k] = wrapper(v)
# elif spec.varargs in _wrapper_keys:
# globals()[k] = wrapper(v)
# except:
# pass
# def empty(*size, dtype=jt.float32, device=None, requires_grad=False):
# if len(size) == 1 and not isinstance(size[0], org_int):
# size = size[0]
# return jt.empty(size, dtype)
# Tensor = Var
# Tensor.backward = lambda x: jtorch_core.backward(x)
# Tensor.grad = property(grad_get, grad_set, grad_del)
# Tensor.retains_grad = property(retain_grad_get, retain_grad_set)
# def retain_grad(x:Tensor, value:bool=True):
# x.retains_grad = value
# return value
# Tensor.retain_grad = retain_grad
# Tensor.dim = lambda self: self.ndim
# Tensor.ndimension = lambda self: self.ndim
# Tensor.nelement = lambda self: self.numel()
# Tensor.cuda = lambda self: self
# def device_get(x:Tensor):
# return device("cpu") if not jt.has_cuda or not jt.flags.use_cuda else device("cuda")
# Tensor.device = property(device_get)
# def argmax(x: Var, dim=None, keepdim: bool = False):
# return jt.argmax(x, dim, keepdim)[0]
# Tensor.argmax = argmax
# def tensor_type(x: Var, dtype=None, **kwargs):
# if dtype:
# return x.astype(dtype)
# else:
# return x.dtype
# Tensor.type = tensor_type
# def is_floating_point(x: Var):
# return "float" in str(x.dtype)
# Tensor.is_floating_point = is_floating_point
# from . import autograd
# from .autograd import *
# def tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False):
# if isinstance(data,list):
# data_list = []
# check = True
# for p in data:
# if isinstance(p, Tensor) and p.numel()==1:
# data_list.append(p.item())
# elif isinstance(p, (org_int,org_float)):
# data_list.append(p)
# else:
# check = False
# break
# if check:
# data = data_list
# return wrapper(array)(data, dtype=dtype, device=device, requires_grad=requires_grad, pin_memory=pin_memory)
# # tensor = wrapper(array)
# from_numpy = wrapper(array)
# strided = None
# def mod_zero_grad(self):
# for p in self.parameters():
# p.grad = None
# Module.zero_grad = mod_zero_grad
# class ModuleMisc:
# def parameters(self):
# return iter(super().parameters())
# def load_state_dict(self, state_dict, strict=False):
# return super().load_state_dict(state_dict)
# def to(self, device=None,dtype=None):
# ''' do nothing but return its self'''
# return self
# def register_parameter(self,name,data):
# self.name = data
# def buffers(self):
# for _, buf in self.named_buffers():
# yield buf
# def make_module(cls):
# class TMod(ModuleMisc, cls):
# def __init__(self, *args, **kw):
# dtype = None
# if "dtype" in kw:
# dtype = kw["dtype"]
# del kw["dtype"]
# self._dtype = dtype
# with jt.flag_scope(th_mode=0):
# if "device" in kw:
# del kw["device"]
# super().__init__(*args, **kw)
# for k,v in self.__dict__.items():
# if not k.startswith("_") and isinstance(v, Var) \
# and v.requires_grad:
# v.retain_grad()
# if dtype is not None and isinstance(v, Var):
# v.assign(v.cast(dtype))
# def __call__(self, *args, **kw):
# args, kw = handle_dtype(args, kw, self._dtype)
# # if forward is override by user, call forward
# if self.__class__.forward is not TMod.forward:
# return self.forward(*args, **kw)
# return self.execute(*args, **kw)
# def forward(self, *args, **kw):
# args, kw = handle_dtype(args, kw, self._dtype)
# return self.execute(*args, **kw)
# @property
# def training(self):
# if not hasattr(self, "is_train"):
# self.is_train = True
# return self.is_train
# @training.setter
# def training(self, value):
# self.is_train = value
# TMod.__name__ = cls.__name__
# return TMod
# import jtorch.cuda
# import jtorch.nn
# from jtorch.nn import Module, Parameter
# import jtorch.optim
# from jtorch.utils.dtype import Dtype, get_string_dtype
# def frombuffer(buffer: bytearray,
# *,
# dtype: Dtype,
# count: int = -1,
# offset: int = 0,
# requires_grad: bool = True) -> Tensor:
# dtype = get_string_dtype(dtype)
# tensor = jt.array(np.frombuffer(buffer, dtype, count=count, offset=offset))
# if requires_grad and tensor.dtype.is_float():
# tensor.requires_grad = True
# return tensor
# def conflict_wrapper(origin_func, new_func):
# def wrapper(*args, **kw):
# if jt.flags.th_mode:
# return new_func(*args, **kw)
# else:
# return origin_func(*args, **kw)
# return wrapper
# def min(*args, **kw):
# dim = None
# if len(args) >= 2 and isinstance(args[1], org_int):
# dim = args[1]
# elif "dim" in kw and isinstance(kw["dim"], org_int):
# dim = kw["dim"]
# if dim is not None:
# k, v = jt.argmin(*args, **kw)
# return v, k
# elif len(args) == 2 and isinstance(args[1], jt.Var):
# return jt.minimum(args[0], args[1])
# else:
# return jt.min(*args, **kw)
# Tensor.min = conflict_wrapper(jt.min, min)
# def max(*args, **kw):
# dim = None
# if "dim" in kw:
# x = kw["dim"]
# if len(args) >= 2 and isinstance(args[1], org_int):
# dim = args[1]
# elif "dim" in kw and isinstance(kw["dim"], org_int):
# dim = kw["dim"]
# if dim is not None:
# k, v = jt.argmax(*args, **kw)
# return v, k
# elif len(args) == 2 and isinstance(args[1], jt.Var):
# return jt.maximum(args[0], args[1])
# else:
# return jt.max(*args, **kw)
# Tensor.max = conflict_wrapper(jt.max, max)
# def argsort(*args, **kw):
# k, v = jt.argsort(*args, **kw)
# return k
# Tensor.argsort = conflict_wrapper(jt.argsort, argsort)
# LongTensor = jt.int64
# FloatTensor = jt.float
# HalfTensor = jt.float16
# BoolTensor = jt.bool
# IntTensor = jt.int32
# class JDType:
# def __init__(self, func, str):
# self.func = func
# self.str = str
# self.__name__ = str.split(".")[-1]
# def __call__(self, *args, **kw):
# return self.func(*args, **kw)
# def __str__(self):
# return self.str
# def is_floating_point(self):
# return "float" in str(self.str)
# int8 = JDType(jt.int8, "torch.int8")
# int16 = JDType(jt.int16, "torch.int16")
# int = int32 = JDType(jt.int32, "torch.int32")
# long = int64 = JDType(jt.int64, "torch.int64")
# half = float16 = JDType(jt.float16, "torch.float16")
# float = float32 = JDType(jt.float32, "torch.float32")
# double = float64 = JDType(jt.float64, "torch.float64")
# bfloat16 = "bfloat16" # TODO
# complex64 = "complex64" # TODO
# complex128 = "complex128" # TODO
# def get_JDtype(dtype):
# if dtype=='float32' or dtype == jt.float32:
# return float32
# elif dtype=='float64' or dtype == jt.float64:
# return float64
# elif dtype=='float16' or dtype == jt.float16:
# return float16
# elif dtype=='int32' or dtype == jt.int32:
# return int32
# elif dtype=='int64' or dtype == jt.int64:
# return int64
# elif dtype=='int16' or dtype == jt.int16:
# return int16
# elif dtype=='int8' or dtype == jt.int8:
# return int8
# else:
# raise Exception("dtype {} not supported".format(dtype))
# def load(path,**kwargs):
# def _to_jittor(data):
# if isinstance(data,dict):
# return {k:_to_jittor(d) for k,d in data.items()}
# if isinstance(data,list):
# return [_to_jittor(d) for d in data]
# if isinstance(data,np.ndarray):
# return jt.array(data)
# return data
# data = jt.load(path)
# return _to_jittor(data)
# def is_tensor(x):
# return isinstance(x, Tensor)
# manual_seed = jt.set_global_seed
# jt.flags.amp_level = 3
# Size = jt.NanoVector
# class Generator:
# def __init__(self,*args,**kw) -> None:
# self.seed = None
# def manual_seed(self,seed):
# self.seed = seed
# from . import fx
# _default_type = "float32"
# def get_default_dtype():
# return _default_type
# def set_default_dtype(dtype):
# global _default_type
# _default_type = dtype
# dtype = JDType
# def div(x,y,rounding_mode="floor"):
# assert rounding_mode == "floor"
# z = (x / y)
# if rounding_mode == "floor":
# z = z.floor()
# if x.dtype == "int32" and (isinstance(y,org_int) or y.dtype == "int32"):
# z = z.int32()
# return z
# def randn(*args,**kw):
# wrap_randn = wrapper(jt.randn)
# generator = kw.get('generator',None)
# kw.pop('generator',None)
# if 'layout' in kw:
# del kw['layout']
# if generator is not None and generator.seed is not None:
# jt.set_global_seed(generator.seed)
# return wrap_randn(*args,**kw)
# def rand(*args,**kw):
# print("rand")
# wrap_rand = wrapper(jt.rand)
# generator = kw.get('generator',None)
# kw.pop('generator',None)
# if 'layout' in kw:
# del kw['layout']
# if generator is not None and generator.seed is not None:
# jt.set_global_seed(generator.seed)
# return wrap_rand(*args,**kw)
# def set_default_tensor_type(t: type or str):
# if isinstance(t, str):
# info = t.split(".")
# if len(info) == 3 and info[1] == 'cuda':
# jt.flags.use_cuda = 1
# #TODO: type
# def clamp(x, min=None, max=None):
# return jt.clamp(x, min, max)
# def to(x,*args,**kw):
# device = None
# if len(args) == 1:
# device = args[0]
# if isinstance(device, jt.NanoString) or callable(device):
# return jt.to(x,*args,**kw)
# if 'cpu' in str(device):
# args = []
# device = kw.get("device",None)
# if 'cpu' in str(device):
# kw.pop('device',None)
# print("to cpu")
# # print(kw)
# return jt.to(x,*args,**kw)
# Tensor.to = conflict_wrapper(jt.to, to)
# mm = wrapper(jt.matmul)
# def _data_get(x):
# return x
# def _data_set(x, value):
# x.assign(value)
# Tensor.data = property(_data_get, _data_set)
# Tensor.layout = None

View File

@ -0,0 +1,134 @@
import jittor as jt
from jittor import Var
from collections.abc import Sequence, Mapping
Variable = Var
class FunctionContext:
def save_for_backward(self, *args):
self.saved_tensors = args
class Function:
''' Function Module for customized backward operations
Example 1 (Function can have multiple input and multiple output, and user
can store value for backward computation)::
import jtorch
from jtorch import Function
class MyFunc(Function):
@staticmethod
def forward(self, x, y):
self.x = x
self.y = y
return x*y, x/y
@staticmethod
def backward(self, grad0, grad1):
return grad0 * self.y, grad1 * self.x
a = jtorch.array(3.0)
a.requires_grad = True
b = jtorch.array(4.0)
b.requires_grad = True
func = MyFunc.apply
c,d = func(a, b)
(c+d*3).backward()
assert a.grad.data == 4
assert b.grad.data == 9
Example 2(Function can return None for no gradiant, and gradiant
can also be None)::
import jtorch
from jtorch import Function
class MyFunc(Function):
@staticmethod
def forward(self, x, y):
self.x = x
self.y = y
return x*y, x/y
@staticmethod
def backward(self, grad0, grad1):
assert grad1 is None
return grad0 * self.y, None
a = jt.array(3.0)
a.requires_grad = True
b = jt.array(4.0)
b.requires_grad = True
func = MyFunc.apply
c,d = func(a, b)
d.stop_grad()
da, db = jt.grad(c+d*3, [a, b])
assert da.data == 4
assert db.data == 0
'''
def __call__(self, *args):
backup = args
args = list(args)
taped_inputs = []
taped_outputs = []
input_mask = [-1] * len(args)
for i,v in enumerate(args):
if isinstance(v, Var):
if v.is_stop_grad():
# -2 in input_mask represents it is stop_grad
input_mask[i] = -2
continue
v = v.tape()
input_mask[i] = len(taped_inputs)
args[i] = v
taped_inputs.append(v)
ctx = FunctionContext()
ori_res = self.forward(ctx, *args)
# ori_res = self.execute(*args)
if not isinstance(ori_res, Sequence):
res = [ori_res]
else:
res = list(ori_res)
output_mask = [-1] * len(res)
for i,v in enumerate(res):
if isinstance(v, Var):
v = v.tape()
output_mask[i] = len(taped_outputs)
res[i] = v
taped_outputs.append(v)
ctx.input_mask = input_mask
ctx.output_mask = output_mask
# tape output and input together so
# backward treat them as one operator
jt.tape_together(taped_inputs, taped_outputs,
lambda *args: self._grad(ctx, self, *args))
if isinstance(ori_res, Sequence):
return res
else:
return res[0]
@staticmethod
def _grad(ctx, func, *args):
new_args = ( (args[i] if i>=0 else None) for i in ctx.output_mask )
ret = func.backward(ctx, *new_args)
if not isinstance(ret, Sequence):
ret = (ret,)
new_ret = []
for i, r in enumerate(ret):
j = ctx.input_mask[i]
if j<0:
# -2 in input_mask represents it is stop_grad
assert r is None or j==-2, f"{type(self)}'s {i}-th returned grad should be None, "\
"because the input value is not jittor variable."
else:
new_ret.append(r)
return new_ret
def dfs(self, parents, k, callback, callback_leave=None):
pass
@classmethod
def apply(cls, *args, **kw):
func = cls()
return func(*args, **kw)

View File

@ -0,0 +1,39 @@
import jittor as jt
import jittor_utils
import glob
import os
from jittor import pyjt_compiler
import sys
from jittor_utils import lock
jtorch_path = os.path.dirname(__file__)
cache_path = os.path.join(jt.compiler.cache_path, "jtorch")
# os.makedirs(cache_path, exist_ok=True)
os.makedirs(os.path.join(cache_path, "gen"), exist_ok=True)
with lock.lock_scope():
pyjt_gen_src = pyjt_compiler.compile(cache_path, jtorch_path)
ext_args = 'c[cu]' if jt.has_cuda else 'cc'
files = glob.glob(jtorch_path+"/src/**/*."+ext_args, recursive=True)
files += pyjt_gen_src
cc_flags = " -I\""+os.path.join(jtorch_path, "src")+"\" "
if os.environ.get("use_data_o", "1") == "1":
files += glob.glob(jtorch_path+"/src/**/*.o", recursive=True)
files = [f for f in files if "__data__" not in f]
with lock.lock_scope():
jt.compiler.compile(
jt.compiler.cc_path,
jt.compiler.cc_flags+jt.compiler.opt_flags+ cc_flags,
files,
"jtorch_core"+jt.compiler.extension_suffix,
obj_dirname="jtorch_objs")
with jittor_utils.import_scope(jt.compiler.import_flags):
import jtorch_core as core
jt.flags.th_mode = 1

View File

@ -0,0 +1,64 @@
import jittor as jt
import jtorch
def is_available():
return jt.has_cuda
def device_count():
return int(jt.has_cuda)
def set_device(device=None):
pass
def get_rng_state(device=None):
pass
def current_device():
return jtorch.device("cuda")
def mem_get_info(i):
return ("75GB",)
class Generator:
def __init__(self):
pass
def set_state(self, state):
self.state = state
default_generators = [Generator()]
_lazy_call = lambda func: func()
device = None
LongTensor = jt.int64
FloatTensor = jt.float
HalfTensor = jt.float16
BoolTensor = jt.bool
manual_seed = jt.set_global_seed
manual_seed_all = jt.set_global_seed
def synchronize():
jt.sync_all(True)
class Event:
pass
class Stream:
pass
from typing import Any
from .gradscaler import GradScaler
class autocast:
def __init__(self,**kwargs):
pass
def __enter__(self,):
pass
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any):
pass

View File

@ -0,0 +1,53 @@
import datetime
from enum import Enum
import jittor as jt
class DistributedDataParallel:
def __new__(cls, model):
return model
def is_initialized():
return True
def get_rank(group=None):
return 0
def get_world_size(group=None):
return 1
def get_backend(group=None):
return "nccl"
def new_group(ranks=None, timeout=datetime.timedelta(seconds=1800), backend=None, pg_options=None):
return 1
def barrier():
pass
def is_available():
return True
def is_built():
return True
class ReduceOp:
SUM = 0
class GroupMember:
WORLD = 0
class ProcessGroup:
pass
class Join:
pass
dist_backend = Enum("dist_backend", ("GLOO", "MPI", "NCCL"))
_backend = dist_backend.NCCL
def is_mpi_available():
return jt.in_mpi
def DistributedDataParallel(model, *args, **kw):
return model

View File

@ -0,0 +1,15 @@
import jittor as jt
class RelaxedBernoulli:
def __init__(self, temperature, probs=None, logits=None):
self.temperature = temperature
self.probs = probs
self.logits = logits
def rsample(self):
noise = jt.rand_like(self.logits)
eps = 1e-20
noise = jt.clamp(noise, eps, 1.0 - eps)
logit_noise = jt.log(noise) - jt.log(1 - noise)
sample = (self.logits + logit_noise) / self.temperature
return jt.sigmoid(sample)

View File

@ -0,0 +1,5 @@
#TODO: Implement FFT and IFFT
fftn = None
fftshift = None
ifftn = None
ifftshift = None

View File

@ -0,0 +1,2 @@
class Proxy:
pass

View File

@ -0,0 +1,519 @@
from collections import defaultdict, abc
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, cast
import inspect
import warnings
import jittor as jt
# import torch
def _refresh_per_optimizer_state():
return {}
class GradScaler:
_scale: Optional[jt.Var]
_grows_tracker: Optional[jt.Var]
_per_optimizer_states: Dict[int, Dict[str, Any]]
"""
An instance ``scaler`` of :class:`GradScaler` helps perform the steps of gradient scaling
conveniently.
* ``scaler.scale(loss)`` multiplies a given loss by ``scaler``'s current scale factor.
* ``scaler.step(optimizer)`` safely unscales gradients and calls ``optimizer.step()``.
* ``scaler.update()`` updates ``scaler``'s scale factor.
Example::
# Creates a GradScaler once at the beginning of training.
scaler = GradScaler()
for epoch in epochs:
for input, target in data:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
# Scales loss. Calls backward() on scaled loss to create scaled gradients.
scaler.scale(loss).backward()
# scaler.step() first unscales gradients of the optimizer's params.
# If gradients don't contain infs/NaNs, optimizer.step() is then called,
# otherwise, optimizer.step() is skipped.
scaler.step(optimizer)
# Updates the scale for next iteration.
scaler.update()
See the :ref:`Automatic Mixed Precision examples<amp-examples>` for usage
(along with autocasting) in more complex cases like gradient clipping, gradient accumulation, gradient penalty,
and multiple losses/optimizers.
``scaler`` dynamically estimates the scale factor each iteration. To minimize gradient underflow,
a large scale factor should be used. However, ``float16`` values can "overflow" (become inf or NaN) if
the scale factor is too large. Therefore, the optimal scale factor is the largest factor that can be used
without incurring inf or NaN gradient values.
``scaler`` approximates the optimal scale factor over time by checking the gradients for infs and NaNs during every
``scaler.step(optimizer)`` (or optional separate ``scaler.unscale_(optimizer)``, see :meth:`unscale_`).
* If infs/NaNs are found, ``scaler.step(optimizer)`` skips the underlying ``optimizer.step()`` (so the params
themselves remain uncorrupted) and ``update()`` multiplies the scale by ``backoff_factor``.
* If no infs/NaNs are found, ``scaler.step(optimizer)`` runs the underlying ``optimizer.step()`` as usual.
If ``growth_interval`` unskipped iterations occur consecutively, ``update()`` multiplies the scale by
``growth_factor``.
The scale factor often causes infs/NaNs to appear in gradients for the first few iterations as its
value calibrates. ``scaler.step`` will skip the underlying ``optimizer.step()`` for these
iterations. After that, step skipping should occur rarely (once every few hundred or thousand iterations).
Args:
init_scale (float, optional, default=2.**16): Initial scale factor.
growth_factor (float, optional, default=2.0): Factor by which the scale is multiplied during
:meth:`update` if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
backoff_factor (float, optional, default=0.5): Factor by which the scale is multiplied during
:meth:`update` if inf/NaN gradients occur in an iteration.
growth_interval (int, optional, default=2000): Number of consecutive iterations without inf/NaN gradients
that must occur for the scale to be multiplied by ``growth_factor``.
enabled (bool, optional): If ``False``, disables gradient scaling. :meth:`step` simply
invokes the underlying ``optimizer.step()``, and other methods become no-ops.
Default: ``True``
"""
def __init__(self,
init_scale=2.**16,
growth_factor=2.0,
backoff_factor=0.5,
growth_interval=2000,
enabled=True):
self._enabled = enabled
if self._enabled:
assert growth_factor > 1.0, "The growth factor must be > 1.0."
assert backoff_factor < 1.0, "The backoff factor must be < 1.0."
self._init_scale = init_scale
# self._scale will be lazily initialized during the first call to scale()
self._scale = None
self._growth_factor = growth_factor
self._backoff_factor = backoff_factor
self._growth_interval = growth_interval
self._init_growth_tracker = 0
# self._growth_tracker will be lazily initialized during the first call to scale()
self._growth_tracker = None
self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
def _check_scale_growth_tracker(self, funcname) -> Tuple[jt.Var, jt.Var]:
fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."
assert self._scale is not None, "Attempted {} but _scale is None. ".format(funcname) + fix
assert self._growth_tracker is not None, "Attempted {} but _growth_tracker is None. ".format(funcname) + fix
return (self._scale, self._growth_tracker)
def _lazy_init_scale_growth_tracker(self):
assert self._growth_tracker is None, "_growth_tracker initialized before _scale"
self._scale = self._init_scale
self._growth_tracker = self._init_growth_tracker
def scale(self, outputs):
"""
Multiplies ('scales') a tensor or list of tensors by the scale factor.
Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned
unmodified.
Args:
outputs (Tensor or iterable of Tensors): Outputs to scale.
"""
if not self._enabled:
return outputs
# Short-circuit for the common case.
if isinstance(outputs, jt.Var):
assert jt.flags.use_cuda == 1
if self._scale is None:
self._lazy_init_scale_growth_tracker()
assert self._scale is not None
return outputs * self._scale
def apply_scale(val):
if isinstance(val, jt.Var):
assert jt.flags.use_cuda == 1
if self._scale is None:
self._lazy_init_scale_growth_tracker()
assert self._scale is not None
return val * self._scale
elif isinstance(val, abc.Iterable):
iterable = map(apply_scale, val)
if isinstance(val, (list, tuple)):
return type(val)(iterable)
else:
return iterable
else:
raise ValueError("outputs must be a Tensor or an iterable of Tensors")
return apply_scale(outputs)
def _unscale_grads_(self, optimizer, inv_scale, found_inf, allow_fp16):
with jt.no_grad():
optimizer.pre_step()
for group in optimizer.param_groups:
for to_unscale in group["grads"]:
if to_unscale is None or isinstance(to_unscale,(int,float)):
continue
if (not allow_fp16) and str(to_unscale.dtype) == "float16":
raise ValueError("Attempting to unscale FP16 gradients.")
if not (to_unscale.isinf().any()):
if inv_scale != 1.0:
to_unscale.update(to_unscale*inv_scale)
else:
found_inf = 1.0
return found_inf
def unscale_(self, optimizer):
"""
Divides ("unscales") the optimizer's gradient tensors by the scale factor.
:meth:`unscale_` is optional, serving cases where you need to
:ref:`modify or inspect gradients<working-with-unscaled-gradients>`
between the backward pass(es) and :meth:`step`.
If :meth:`unscale_` is not called explicitly, gradients will be unscaled automatically during :meth:`step`.
Simple example, using :meth:`unscale_` to enable clipping of unscaled gradients::
...
scaler.scale(loss).backward()
scaler.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
scaler.step(optimizer)
scaler.update()
Args:
optimizer (torch.optim.Optimizer): Optimizer that owns the gradients to be unscaled.
.. note::
:meth:`unscale_` does not incur a CPU-GPU sync.
.. warning::
:meth:`unscale_` should only be called once per optimizer per :meth:`step` call,
and only after all gradients for that optimizer's assigned parameters have been accumulated.
Calling :meth:`unscale_` twice for a given optimizer between each :meth:`step` triggers a RuntimeError.
.. warning::
:meth:`unscale_` may unscale sparse gradients out of place, replacing the ``.grad`` attribute.
"""
if not self._enabled:
return
self._check_scale_growth_tracker("unscale_")
optimizer_state = self._per_optimizer_states[id(optimizer)]
if hasattr(optimizer,"get_find_inf"):
return
# FP32 division can be imprecise for certain compile options, so we carry out the reciprocal in FP64.
assert self._scale is not None
inv_scale = 1.0 / self._scale
found_inf = 0.0
optimizer_state["found_inf_per_device"] = self._unscale_grads_(optimizer, inv_scale, found_inf, False)
def step(self, optimizer, *args, **kwargs):
"""
:meth:`step` carries out the following two operations:
1. Internally invokes ``unscale_(optimizer)`` (unless :meth:`unscale_` was explicitly called for ``optimizer``
earlier in the iteration). As part of the :meth:`unscale_`, gradients are checked for infs/NaNs.
2. If no inf/NaN gradients are found, invokes ``optimizer.step()`` using the unscaled
gradients. Otherwise, ``optimizer.step()`` is skipped to avoid corrupting the params.
``*args`` and ``**kwargs`` are forwarded to ``optimizer.step()``.
Returns the return value of ``optimizer.step(*args, **kwargs)``.
Args:
optimizer (torch.optim.Optimizer): Optimizer that applies the gradients.
args: Any arguments.
kwargs: Any keyword arguments.
.. warning::
Closure use is not currently supported.
"""
if (not self._enabled):
return optimizer.step(*args, **kwargs)
if "closure" in kwargs:
raise RuntimeError("Closure use is not currently supported if GradScaler is enabled.")
self._check_scale_growth_tracker("step")
optimizer_state = self._per_optimizer_states[id(optimizer)]
retval = None
if (hasattr(optimizer, "_step_supports_amp_scaling") and optimizer._step_supports_amp_scaling):
# This optimizer has customized scale-handling logic, so we can call optimizer.step() directly.
# The contract with custom optimizers is that their step() should accept an additional,
# optional grad_scaler kwarg. We append self to the kwargs so the custom optimizer has full information:
# it can query its own state, invoke unscale_ on itself, etc
# The contract above is being deprecated to avoid introducing `grad_scaler: GradScaler` argument
# to `Optimizer.step`. The new behavior is going to add two Tensor attributes of `grad_scale`
# and `found_inf` to the passed optimizer so that the optimizer can utilize those
# to skip the parameter updates or unscale gradients before updating parameters in
# the fused kernel, e.g. `FusedAdamMathFunctor`.
# In this behavior, `GradScaler._check_inf_per_device` is called if `OptState.READY`,
# while the method is expected to be called by users side, i.e. their optimizers.
kwargs_ = kwargs
has_grad_scaler_kwarg = "grad_scaler" in inspect.signature(optimizer.step).parameters
if has_grad_scaler_kwarg:
warnings.warn(
"GradScaler is going to stop passing itself as a keyword argument to the passed "
"optimizer. In the near future GradScaler registers `grad_scale: Tensor` and "
"`found_inf: Tensor` to the passed optimizer and let the optimizer use them directly.",
FutureWarning)
kwargs_.update({"grad_scaler": self})
else:
if optimizer_state["stage"] is OptState.READY:
self._check_inf_per_device(optimizer)
scaler = self._get_scale_async()
found_inf = cast(
jt.Var,
sum([
t for t in optimizer_state["found_inf_per_device"].values()
])
)
optimizer.grad_scale = None if optimizer_state["stage"] == OptState.UNSCALED else scaler
optimizer.found_inf = found_inf
retval = optimizer.step(*args, **kwargs_)
optimizer_state["stage"] = OptState.STEPPED
if not has_grad_scaler_kwarg:
del optimizer.grad_scale
del optimizer.found_inf
return retval
if hasattr(optimizer,"get_find_inf"):
optimizer.set_grad_scale(self._scale)
optimizer.step()
optimizer_state["found_inf_per_device"] = optimizer.get_find_inf()
return
retval = None
if not optimizer_state["found_inf_per_device"]:
retval = optimizer.step(*args, **kwargs)
else:
optimizer.post_step()
return retval
def update(self, new_scale=None):
"""
Updates the scale factor.
If any optimizer steps were skipped the scale is multiplied by ``backoff_factor``
to reduce it. If ``growth_interval`` unskipped iterations occurred consecutively,
the scale is multiplied by ``growth_factor`` to increase it.
Passing ``new_scale`` sets the new scale value manually. (``new_scale`` is not
used directly, it's used to fill GradScaler's internal scale tensor. So if
``new_scale`` was a tensor, later in-place changes to that tensor will not further
affect the scale GradScaler uses internally.)
Args:
new_scale (float or :class:`torch.cuda.FloatTensor`, optional, default=None): New scale factor.
.. warning::
:meth:`update` should only be called at the end of the iteration, after ``scaler.step(optimizer)`` has
been invoked for all optimizers used this iteration.
"""
if not self._enabled:
return
_scale, _growth_tracker = self._check_scale_growth_tracker("update")
if new_scale is not None:
# Accept a new user-defined scale.
if isinstance(new_scale, float):
self._scale.fill_(new_scale) # type: ignore[union-attr]
else:
reason = "new_scale should be a float or a 1-element torch.cuda.FloatTensor with requires_grad=False."
assert isinstance(new_scale, torch.cuda.FloatTensor), reason # type: ignore[attr-defined]
assert new_scale.numel() == 1, reason
assert new_scale.requires_grad is False, reason
self._scale.copy_(new_scale) # type: ignore[union-attr]
else:
# Consume shared inf/nan data collected from optimizers to update the scale.
# If all found_inf tensors are on the same device as self._scale, this operation is asynchronous.
found_infs = [state["found_inf_per_device"]
for state in self._per_optimizer_states.values()
]
assert len(found_infs) > 0, "No inf checks were recorded prior to update."
found_inf_combined = found_infs[0]
if len(found_infs) > 1:
for i in range(1, len(found_infs)):
found_inf_combined += found_infs[i]
current_scale = _scale
if found_inf_combined:
current_scale *=self._backoff_factor
_growth_tracker = 0
else:
successful = _growth_tracker+1
if successful == self._growth_interval:
new_scale = current_scale*self._growth_factor
if new_scale < 1e9:
current_scale = new_scale
_growth_tracker = 0
else:
_growth_tracker = successful
self._scale, self._growth_tracker = current_scale,_growth_tracker
# To prepare for next iteration, clear the data collected from optimizers this iteration.
self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
def _get_scale_async(self):
return self._scale
def get_scale(self):
"""
Returns a Python float containing the current scale, or 1.0 if scaling is disabled.
.. warning::
:meth:`get_scale` incurs a CPU-GPU sync.
"""
if self._enabled:
return self._init_scale if self._scale is None else self._get_scale_async()
else:
return 1.0
def get_growth_factor(self):
r"""
Returns a Python float containing the scale growth factor.
"""
return self._growth_factor
def set_growth_factor(self, new_factor):
r"""
Args:
new_scale (float): Value to use as the new scale growth factor.
"""
self._growth_factor = new_factor
def get_backoff_factor(self):
r"""
Returns a Python float containing the scale backoff factor.
"""
return self._backoff_factor
def set_backoff_factor(self, new_factor):
r"""
Args:
new_scale (float): Value to use as the new scale backoff factor.
"""
self._backoff_factor = new_factor
def get_growth_interval(self):
r"""
Returns a Python int containing the growth interval.
"""
return self._growth_interval
def set_growth_interval(self, new_interval):
r"""
Args:
new_interval (int): Value to use as the new growth interval.
"""
self._growth_interval = new_interval
def _get_growth_tracker(self):
if self._enabled:
return self._init_growth_tracker if self._growth_tracker is None else self._growth_tracker.item()
else:
return 0
def is_enabled(self):
r"""
Returns a bool indicating whether this instance is enabled.
"""
return self._enabled
def state_dict(self):
r"""
Returns the state of the scaler as a :class:`dict`. It contains five entries:
* ``"scale"`` - a Python float containing the current scale
* ``"growth_factor"`` - a Python float containing the current growth factor
* ``"backoff_factor"`` - a Python float containing the current backoff factor
* ``"growth_interval"`` - a Python int containing the current growth interval
* ``"_growth_tracker"`` - a Python int containing the number of recent consecutive unskipped steps.
If this instance is not enabled, returns an empty dict.
.. note::
If you wish to checkpoint the scaler's state after a particular iteration, :meth:`state_dict`
should be called after :meth:`update`.
"""
return {"scale": self.get_scale(),
"growth_factor": self._growth_factor,
"backoff_factor": self._backoff_factor,
"growth_interval": self._growth_interval,
"_growth_tracker": self._get_growth_tracker()} if self._enabled else {}
def load_state_dict(self, state_dict):
r"""
Loads the scaler state. If this instance is disabled, :meth:`load_state_dict` is a no-op.
Args:
state_dict(dict): scaler state. Should be an object returned from a call to :meth:`state_dict`.
"""
if not self._enabled:
return
if len(state_dict) == 0:
raise RuntimeError("The source state dict is empty, possibly because it was saved "
"from a disabled instance of GradScaler.")
self._init_scale = state_dict["scale"]
if self._scale is not None:
self._scale.fill_(state_dict["scale"])
self._growth_factor = state_dict["growth_factor"]
self._backoff_factor = state_dict["backoff_factor"]
self._growth_interval = state_dict["growth_interval"]
self._init_growth_tracker = state_dict["_growth_tracker"]
if self._growth_tracker is not None:
self._growth_tracker.fill_(state_dict["_growth_tracker"])
def __getstate__(self):
state = self.__dict__.copy()
if self._enabled:
assert len(self._per_optimizer_states) == 0, "A GradScaler instance may only be pickled at the beginning "\
"of an iteration, or at the end after scaler.update()."
# Pickling _scale and _growth_tracker Tensors directly triggers
# "warnings.warn("pickle support for Storage will be removed in 1.5..."
# so instead, we set the unpickled instance up to reinitialize them lazily.
state['_init_scale'] = self.get_scale()
state['_init_growth_tracker'] = self._get_growth_tracker()
state['_scale'] = None
state['_growth_tracker'] = None
return state
def __setstate__(self, state):
self.__dict__.update(state)
def _check_inf_per_device(self, optimizer):
_scale, _ = self._check_scale_growth_tracker("_check_inf_per_device")
dummy_inv_scale = 1.0
found_inf = 0.0
self._per_optimizer_states[id(optimizer)]["found_inf_per_device"] = \
self._unscale_grads_(optimizer, dummy_inv_scale, found_inf, True)
return self._per_optimizer_states[id(optimizer)]["found_inf_per_device"]
def _found_inf_per_device(self, optimizer):
return self._per_optimizer_states[id(optimizer)]["found_inf_per_device"]

View File

@ -0,0 +1,556 @@
from collections import defaultdict, abc
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, cast
import inspect
import warnings
import jittor as jt
# import torch
__all__ = ["OptState", "GradScaler"]
# Defines default_factory for GradScaler's _per_optimizer_states defaultdict,
# as well as associated "enum" values. Prefers defining these at top level because
# - Lambdas can't be pickled, so we don't want to supply a lambda as the factory.
# - Defining READY, UNSCALED, STEPPED and _refresh_per_optimizer_state within GradScaler
# causes a circular reference, which we'd rather avoid.
class OptState(Enum):
READY = 0
UNSCALED = 1
STEPPED = 2
def _refresh_per_optimizer_state():
return {"stage": OptState.READY, "found_inf_per_device": {}}
class GradScaler:
_scale: Optional[jt.Var]
_grows_tracker: Optional[jt.Var]
_per_optimizer_states: Dict[int, Dict[str, Any]]
"""
An instance ``scaler`` of :class:`GradScaler` helps perform the steps of gradient scaling
conveniently.
* ``scaler.scale(loss)`` multiplies a given loss by ``scaler``'s current scale factor.
* ``scaler.step(optimizer)`` safely unscales gradients and calls ``optimizer.step()``.
* ``scaler.update()`` updates ``scaler``'s scale factor.
Example::
# Creates a GradScaler once at the beginning of training.
scaler = GradScaler()
for epoch in epochs:
for input, target in data:
optimizer.zero_grad()
output = model(input)
loss = loss_fn(output, target)
# Scales loss. Calls backward() on scaled loss to create scaled gradients.
scaler.scale(loss).backward()
# scaler.step() first unscales gradients of the optimizer's params.
# If gradients don't contain infs/NaNs, optimizer.step() is then called,
# otherwise, optimizer.step() is skipped.
scaler.step(optimizer)
# Updates the scale for next iteration.
scaler.update()
See the :ref:`Automatic Mixed Precision examples<amp-examples>` for usage
(along with autocasting) in more complex cases like gradient clipping, gradient accumulation, gradient penalty,
and multiple losses/optimizers.
``scaler`` dynamically estimates the scale factor each iteration. To minimize gradient underflow,
a large scale factor should be used. However, ``float16`` values can "overflow" (become inf or NaN) if
the scale factor is too large. Therefore, the optimal scale factor is the largest factor that can be used
without incurring inf or NaN gradient values.
``scaler`` approximates the optimal scale factor over time by checking the gradients for infs and NaNs during every
``scaler.step(optimizer)`` (or optional separate ``scaler.unscale_(optimizer)``, see :meth:`unscale_`).
* If infs/NaNs are found, ``scaler.step(optimizer)`` skips the underlying ``optimizer.step()`` (so the params
themselves remain uncorrupted) and ``update()`` multiplies the scale by ``backoff_factor``.
* If no infs/NaNs are found, ``scaler.step(optimizer)`` runs the underlying ``optimizer.step()`` as usual.
If ``growth_interval`` unskipped iterations occur consecutively, ``update()`` multiplies the scale by
``growth_factor``.
The scale factor often causes infs/NaNs to appear in gradients for the first few iterations as its
value calibrates. ``scaler.step`` will skip the underlying ``optimizer.step()`` for these
iterations. After that, step skipping should occur rarely (once every few hundred or thousand iterations).
Args:
init_scale (float, optional, default=2.**16): Initial scale factor.
growth_factor (float, optional, default=2.0): Factor by which the scale is multiplied during
:meth:`update` if no inf/NaN gradients occur for ``growth_interval`` consecutive iterations.
backoff_factor (float, optional, default=0.5): Factor by which the scale is multiplied during
:meth:`update` if inf/NaN gradients occur in an iteration.
growth_interval (int, optional, default=2000): Number of consecutive iterations without inf/NaN gradients
that must occur for the scale to be multiplied by ``growth_factor``.
enabled (bool, optional): If ``False``, disables gradient scaling. :meth:`step` simply
invokes the underlying ``optimizer.step()``, and other methods become no-ops.
Default: ``True``
"""
def __init__(self,
init_scale=2.**16,
growth_factor=2.0,
backoff_factor=0.5,
growth_interval=2000,
enabled=True):
self._enabled = enabled
if self._enabled:
assert growth_factor > 1.0, "The growth factor must be > 1.0."
assert backoff_factor < 1.0, "The backoff factor must be < 1.0."
self._init_scale = init_scale
# self._scale will be lazily initialized during the first call to scale()
self._scale = None
self._growth_factor = growth_factor
self._backoff_factor = backoff_factor
self._growth_interval = growth_interval
self._init_growth_tracker = 0
# self._growth_tracker will be lazily initialized during the first call to scale()
self._growth_tracker = None
self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
def _check_scale_growth_tracker(self, funcname) -> Tuple[jt.Var, jt.Var]:
fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."
assert self._scale is not None, "Attempted {} but _scale is None. ".format(funcname) + fix
assert self._growth_tracker is not None, "Attempted {} but _growth_tracker is None. ".format(funcname) + fix
return (self._scale, self._growth_tracker)
def _lazy_init_scale_growth_tracker(self):
assert self._growth_tracker is None, "_growth_tracker initialized before _scale"
self._scale = self._init_scale
self._growth_tracker = self._init_growth_tracker
def scale(self, outputs):
"""
Multiplies ('scales') a tensor or list of tensors by the scale factor.
Returns scaled outputs. If this instance of :class:`GradScaler` is not enabled, outputs are returned
unmodified.
Args:
outputs (Tensor or iterable of Tensors): Outputs to scale.
"""
print("scale")
if not self._enabled:
return outputs
# Short-circuit for the common case.
if isinstance(outputs, jt.Var):
assert jt.flags.use_cuda == 1
if self._scale is None:
self._lazy_init_scale_growth_tracker()
assert self._scale is not None
return outputs * self._scale
def apply_scale(val):
if isinstance(val, jt.Var):
assert jt.flags.use_cuda == 1
if self._scale is None:
self._lazy_init_scale_growth_tracker()
assert self._scale is not None
return val * self._scale
elif isinstance(val, abc.Iterable):
iterable = map(apply_scale, val)
if isinstance(val, (list, tuple)):
return type(val)(iterable)
else:
return iterable
else:
raise ValueError("outputs must be a Tensor or an iterable of Tensors")
return apply_scale(outputs)
def _unscale_grads_(self, optimizer, inv_scale, found_inf, allow_fp16):
# To set up _amp_foreach_non_finite_check_and_unscale_, split grads by device and dtype.
# There could be hundreds of grads, so we'd like to iterate through them just once.
# However, we don't know their devices or dtypes in advance.
# https://stackoverflow.com/questions/5029934/defaultdict-of-defaultdict
# Google says mypy struggles with defaultdicts type annotations.
with jt.no_grad():
optimizer.pre_step()
for group in optimizer.param_groups:
for to_unscale in group["grads"]:
if to_unscale is None or isinstance(to_unscale,(int,float)):
continue
if (not allow_fp16) and str(to_unscale.dtype) == "float16":
raise ValueError("Attempting to unscale FP16 gradients.")
if not (to_unscale.isinf().any()):
if inv_scale != 1.0:
to_unscale.update(to_unscale*inv_scale)
else:
found_inf = 1.0
return found_inf
def unscale_(self, optimizer):
"""
Divides ("unscales") the optimizer's gradient tensors by the scale factor.
:meth:`unscale_` is optional, serving cases where you need to
:ref:`modify or inspect gradients<working-with-unscaled-gradients>`
between the backward pass(es) and :meth:`step`.
If :meth:`unscale_` is not called explicitly, gradients will be unscaled automatically during :meth:`step`.
Simple example, using :meth:`unscale_` to enable clipping of unscaled gradients::
...
scaler.scale(loss).backward()
scaler.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
scaler.step(optimizer)
scaler.update()
Args:
optimizer (torch.optim.Optimizer): Optimizer that owns the gradients to be unscaled.
.. note::
:meth:`unscale_` does not incur a CPU-GPU sync.
.. warning::
:meth:`unscale_` should only be called once per optimizer per :meth:`step` call,
and only after all gradients for that optimizer's assigned parameters have been accumulated.
Calling :meth:`unscale_` twice for a given optimizer between each :meth:`step` triggers a RuntimeError.
.. warning::
:meth:`unscale_` may unscale sparse gradients out of place, replacing the ``.grad`` attribute.
"""
if not self._enabled:
return
self._check_scale_growth_tracker("unscale_")
optimizer_state = self._per_optimizer_states[id(optimizer)]
if optimizer_state["stage"] is OptState.UNSCALED:
raise RuntimeError("unscale_() has already been called on this optimizer since the last update().")
elif optimizer_state["stage"] is OptState.STEPPED:
raise RuntimeError("unscale_() is being called after step().")
# FP32 division can be imprecise for certain compile options, so we carry out the reciprocal in FP64.
assert self._scale is not None
inv_scale = 1.0 / self._scale
found_inf = 0.0
optimizer_state["found_inf_per_device"] = self._unscale_grads_(optimizer, inv_scale, found_inf, False)
optimizer_state["stage"] = OptState.UNSCALED
def _maybe_opt_step(self, optimizer, optimizer_state, *args, **kwargs):
retval = None
if not optimizer_state["found_inf_per_device"]:
retval = optimizer.step(*args, **kwargs)
else:
optimizer.post_step()
return retval
def step(self, optimizer, *args, **kwargs):
"""
:meth:`step` carries out the following two operations:
1. Internally invokes ``unscale_(optimizer)`` (unless :meth:`unscale_` was explicitly called for ``optimizer``
earlier in the iteration). As part of the :meth:`unscale_`, gradients are checked for infs/NaNs.
2. If no inf/NaN gradients are found, invokes ``optimizer.step()`` using the unscaled
gradients. Otherwise, ``optimizer.step()`` is skipped to avoid corrupting the params.
``*args`` and ``**kwargs`` are forwarded to ``optimizer.step()``.
Returns the return value of ``optimizer.step(*args, **kwargs)``.
Args:
optimizer (torch.optim.Optimizer): Optimizer that applies the gradients.
args: Any arguments.
kwargs: Any keyword arguments.
.. warning::
Closure use is not currently supported.
"""
if (not self._enabled):
return optimizer.step(*args, **kwargs)
if "closure" in kwargs:
raise RuntimeError("Closure use is not currently supported if GradScaler is enabled.")
self._check_scale_growth_tracker("step")
optimizer_state = self._per_optimizer_states[id(optimizer)]
if optimizer_state["stage"] is OptState.STEPPED:
raise RuntimeError("step() has already been called since the last update().")
retval = None
if (hasattr(optimizer, "_step_supports_amp_scaling") and optimizer._step_supports_amp_scaling):
# This optimizer has customized scale-handling logic, so we can call optimizer.step() directly.
# The contract with custom optimizers is that their step() should accept an additional,
# optional grad_scaler kwarg. We append self to the kwargs so the custom optimizer has full information:
# it can query its own state, invoke unscale_ on itself, etc
# The contract above is being deprecated to avoid introducing `grad_scaler: GradScaler` argument
# to `Optimizer.step`. The new behavior is going to add two Tensor attributes of `grad_scale`
# and `found_inf` to the passed optimizer so that the optimizer can utilize those
# to skip the parameter updates or unscale gradients before updating parameters in
# the fused kernel, e.g. `FusedAdamMathFunctor`.
# In this behavior, `GradScaler._check_inf_per_device` is called if `OptState.READY`,
# while the method is expected to be called by users side, i.e. their optimizers.
kwargs_ = kwargs
has_grad_scaler_kwarg = "grad_scaler" in inspect.signature(optimizer.step).parameters
if has_grad_scaler_kwarg:
warnings.warn(
"GradScaler is going to stop passing itself as a keyword argument to the passed "
"optimizer. In the near future GradScaler registers `grad_scale: Tensor` and "
"`found_inf: Tensor` to the passed optimizer and let the optimizer use them directly.",
FutureWarning)
kwargs_.update({"grad_scaler": self})
else:
if optimizer_state["stage"] is OptState.READY:
self._check_inf_per_device(optimizer)
scaler = self._get_scale_async()
found_inf = cast(
jt.Var,
sum([
t for t in optimizer_state["found_inf_per_device"].values()
])
)
optimizer.grad_scale = None if optimizer_state["stage"] == OptState.UNSCALED else scaler
optimizer.found_inf = found_inf
retval = optimizer.step(*args, **kwargs_)
optimizer_state["stage"] = OptState.STEPPED
if not has_grad_scaler_kwarg:
del optimizer.grad_scale
del optimizer.found_inf
return retval
if optimizer_state["stage"] is OptState.READY:
self.unscale_(optimizer)
assert "found_inf_per_device" in optimizer_state, "No inf checks were recorded for this optimizer."
retval = self._maybe_opt_step(optimizer, optimizer_state, *args, **kwargs)
optimizer_state["stage"] = OptState.STEPPED
return retval
def update(self, new_scale=None):
"""
Updates the scale factor.
If any optimizer steps were skipped the scale is multiplied by ``backoff_factor``
to reduce it. If ``growth_interval`` unskipped iterations occurred consecutively,
the scale is multiplied by ``growth_factor`` to increase it.
Passing ``new_scale`` sets the new scale value manually. (``new_scale`` is not
used directly, it's used to fill GradScaler's internal scale tensor. So if
``new_scale`` was a tensor, later in-place changes to that tensor will not further
affect the scale GradScaler uses internally.)
Args:
new_scale (float or :class:`torch.cuda.FloatTensor`, optional, default=None): New scale factor.
.. warning::
:meth:`update` should only be called at the end of the iteration, after ``scaler.step(optimizer)`` has
been invoked for all optimizers used this iteration.
"""
if not self._enabled:
return
_scale, _growth_tracker = self._check_scale_growth_tracker("update")
if new_scale is not None:
# Accept a new user-defined scale.
if isinstance(new_scale, float):
self._scale.fill_(new_scale) # type: ignore[union-attr]
else:
reason = "new_scale should be a float or a 1-element torch.cuda.FloatTensor with requires_grad=False."
assert isinstance(new_scale, torch.cuda.FloatTensor), reason # type: ignore[attr-defined]
assert new_scale.numel() == 1, reason
assert new_scale.requires_grad is False, reason
self._scale.copy_(new_scale) # type: ignore[union-attr]
else:
# Consume shared inf/nan data collected from optimizers to update the scale.
# If all found_inf tensors are on the same device as self._scale, this operation is asynchronous.
found_infs = [state["found_inf_per_device"]
for state in self._per_optimizer_states.values()
]
assert len(found_infs) > 0, "No inf checks were recorded prior to update."
found_inf_combined = found_infs[0]
if len(found_infs) > 1:
for i in range(1, len(found_infs)):
found_inf_combined += found_infs[i]
current_scale = _scale
if found_inf_combined:
current_scale *=self._backoff_factor
_growth_tracker = 0
else:
successful = _growth_tracker+1
if successful == self._growth_interval:
new_scale = current_scale*self._growth_factor
if new_scale < 1e9:
current_scale = new_scale
_growth_tracker = 0
else:
_growth_tracker = successful
self._scale, self._growth_tracker = current_scale,_growth_tracker
# To prepare for next iteration, clear the data collected from optimizers this iteration.
self._per_optimizer_states = defaultdict(_refresh_per_optimizer_state)
def _get_scale_async(self):
return self._scale
def get_scale(self):
"""
Returns a Python float containing the current scale, or 1.0 if scaling is disabled.
.. warning::
:meth:`get_scale` incurs a CPU-GPU sync.
"""
if self._enabled:
return self._init_scale if self._scale is None else self._get_scale_async()
else:
return 1.0
def get_growth_factor(self):
r"""
Returns a Python float containing the scale growth factor.
"""
return self._growth_factor
def set_growth_factor(self, new_factor):
r"""
Args:
new_scale (float): Value to use as the new scale growth factor.
"""
self._growth_factor = new_factor
def get_backoff_factor(self):
r"""
Returns a Python float containing the scale backoff factor.
"""
return self._backoff_factor
def set_backoff_factor(self, new_factor):
r"""
Args:
new_scale (float): Value to use as the new scale backoff factor.
"""
self._backoff_factor = new_factor
def get_growth_interval(self):
r"""
Returns a Python int containing the growth interval.
"""
return self._growth_interval
def set_growth_interval(self, new_interval):
r"""
Args:
new_interval (int): Value to use as the new growth interval.
"""
self._growth_interval = new_interval
def _get_growth_tracker(self):
if self._enabled:
return self._init_growth_tracker if self._growth_tracker is None else self._growth_tracker.item()
else:
return 0
def is_enabled(self):
r"""
Returns a bool indicating whether this instance is enabled.
"""
return self._enabled
def state_dict(self):
r"""
Returns the state of the scaler as a :class:`dict`. It contains five entries:
* ``"scale"`` - a Python float containing the current scale
* ``"growth_factor"`` - a Python float containing the current growth factor
* ``"backoff_factor"`` - a Python float containing the current backoff factor
* ``"growth_interval"`` - a Python int containing the current growth interval
* ``"_growth_tracker"`` - a Python int containing the number of recent consecutive unskipped steps.
If this instance is not enabled, returns an empty dict.
.. note::
If you wish to checkpoint the scaler's state after a particular iteration, :meth:`state_dict`
should be called after :meth:`update`.
"""
return {"scale": self.get_scale(),
"growth_factor": self._growth_factor,
"backoff_factor": self._backoff_factor,
"growth_interval": self._growth_interval,
"_growth_tracker": self._get_growth_tracker()} if self._enabled else {}
def load_state_dict(self, state_dict):
r"""
Loads the scaler state. If this instance is disabled, :meth:`load_state_dict` is a no-op.
Args:
state_dict(dict): scaler state. Should be an object returned from a call to :meth:`state_dict`.
"""
if not self._enabled:
return
if len(state_dict) == 0:
raise RuntimeError("The source state dict is empty, possibly because it was saved "
"from a disabled instance of GradScaler.")
self._init_scale = state_dict["scale"]
if self._scale is not None:
self._scale.fill_(state_dict["scale"])
self._growth_factor = state_dict["growth_factor"]
self._backoff_factor = state_dict["backoff_factor"]
self._growth_interval = state_dict["growth_interval"]
self._init_growth_tracker = state_dict["_growth_tracker"]
if self._growth_tracker is not None:
self._growth_tracker.fill_(state_dict["_growth_tracker"])
def __getstate__(self):
state = self.__dict__.copy()
if self._enabled:
assert len(self._per_optimizer_states) == 0, "A GradScaler instance may only be pickled at the beginning "\
"of an iteration, or at the end after scaler.update()."
# Pickling _scale and _growth_tracker Tensors directly triggers
# "warnings.warn("pickle support for Storage will be removed in 1.5..."
# so instead, we set the unpickled instance up to reinitialize them lazily.
state['_init_scale'] = self.get_scale()
state['_init_growth_tracker'] = self._get_growth_tracker()
state['_scale'] = None
state['_growth_tracker'] = None
return state
def __setstate__(self, state):
self.__dict__.update(state)
def _check_inf_per_device(self, optimizer):
_scale, _ = self._check_scale_growth_tracker("_check_inf_per_device")
dummy_inv_scale = 1.0
found_inf = 0.0
self._per_optimizer_states[id(optimizer)]["found_inf_per_device"] = \
self._unscale_grads_(optimizer, dummy_inv_scale, found_inf, True)
return self._per_optimizer_states[id(optimizer)]["found_inf_per_device"]
def _found_inf_per_device(self, optimizer):
return self._per_optimizer_states[id(optimizer)]["found_inf_per_device"]

View File

@ -0,0 +1,12 @@
import math
def _jit_set_profiling_mode(x): pass
def _jit_set_profiling_executor(x): pass
def _jit_override_can_fuse_on_cpu(x): pass
def _jit_override_can_fuse_on_gpu(x): pass
def script(func):
return func
inf = math.inf
nan = math.nan

View File

@ -0,0 +1,281 @@
import jtorch
from typing import List, Optional, Tuple, Iterable, Iterator, Mapping, Any, overload, TypeVar, Dict
from typing_extensions import Self
import jittor as jt
from jtorch import make_module, Tensor, ModuleMisc, wrapper
#from . import init
from jittor import Function
import operator
import warnings
for k,v in jt.nn.__dict__.items():
if callable(v):
globals()[k] = wrapper(v)
for k,v in jt.nn.__dict__.items():
if isinstance(v, type) and issubclass(v, jt.Module):
globals()[k] = make_module(v)
from collections import OrderedDict
from collections import abc as container_abcs
class Module(ModuleMisc, jt.Module):
def __call__(self, *args, **kw):
return self.execute(*args, **kw)
def execute(self, *args, **kw):
return self.forward(*args, **kw)
def get_submodule(self, target: str):
if target == "":
return self
atoms: List[str] = target.split(".")
mod: jt.nn.Module = self
for item in atoms:
if not hasattr(mod, item):
raise AttributeError(mod._get_name() + " has no "
"attribute `" + item + "`")
mod = getattr(mod, item)
if not isinstance(mod, jt.nn.Module):
raise AttributeError("`" + item + "` is not "
"an nn.Module")
return mod
def Parameter(x:Tensor, requires_grad:bool=True) -> Tensor:
x = x.clone()
x.requires_grad = requires_grad
x.retains_grad = requires_grad
return x
def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False):
return jt.nn.embedding(input, weight)
def dropout(x, p=0.5, training=False):
return jt.nn.dropout(x, p, training)
class Flatten(Module):
''' Flattens the contiguous range of dimensions in a Var.
:param start_dim: the first dimension to be flattened. Defaults: 1.
:type start_dim: int
:param end_dim: the last dimension to be flattened. Defaults: -1.
:type end_dim: int
'''
def __init__(self, start_dim=1, end_dim=-1):
self.start_dim = start_dim
self.end_dim = end_dim
def forward(self, x) -> jt.Var:
return x.flatten(self.start_dim, self.end_dim)
class _IncompatibleKeys:
def __init__(self, missing_keys, unexpected_keys):
self.missing_keys = missing_keys
self.unexpected_keys = unexpected_keys
_BatchNorm = None
#from . import utils
normalize = wrapper(jt.normalize)
T = TypeVar('T', bound=Module)
class ModuleDict(Module):
_modules: Dict[str, Module] # type: ignore[assignment]
def __init__(self, modules: Optional[Mapping[str, Module]] = None) -> None:
super().__init__()
if modules is not None:
self.update(modules)
def __getitem__(self, key: str) -> Module:
return self._modules[key]
def __setitem__(self, key: str, module: Module) -> None:
self.add_module(key, module)
def __delitem__(self, key: str) -> None:
del self._modules[key]
def __len__(self) -> int:
return len(self._modules)
def __iter__(self) -> Iterator[str]:
return iter(self._modules)
def __contains__(self, key: str) -> bool:
return key in self._modules
def clear(self) -> None:
"""Remove all items from the ModuleDict."""
self._modules.clear()
def pop(self, key: str) -> Module:
r"""Remove key from the ModuleDict and return its module.
Args:
key (str): key to pop from the ModuleDict
"""
v = self[key]
del self[key]
return v
def keys(self) -> Iterable[str]:
r"""Return an iterable of the ModuleDict keys."""
return self._modules.keys()
def items(self) -> Iterable[Tuple[str, Module]]:
r"""Return an iterable of the ModuleDict key/value pairs."""
return self._modules.items()
def values(self) -> Iterable[Module]:
r"""Return an iterable of the ModuleDict values."""
return self._modules.values()
def update(self, modules: Mapping[str, Module]) -> None:
r"""Update the :class:`~torch.nn.ModuleDict` with key-value pairs from a mapping, overwriting existing keys.
.. note::
If :attr:`modules` is an ``OrderedDict``, a :class:`~torch.nn.ModuleDict`, or
an iterable of key-value pairs, the order of new elements in it is preserved.
Args:
modules (iterable): a mapping (dictionary) from string to :class:`~torch.nn.Module`,
or an iterable of key-value pairs of type (string, :class:`~torch.nn.Module`)
"""
if not isinstance(modules, container_abcs.Iterable):
raise TypeError("ModuleDict.update should be called with an "
"iterable of key/value pairs, but got " +
type(modules).__name__)
if isinstance(modules, (OrderedDict, ModuleDict, container_abcs.Mapping)):
for key, module in modules.items():
self[key] = module
else:
# modules here can be a list with two items
for j, m in enumerate(modules):
if not isinstance(m, container_abcs.Iterable):
raise TypeError("ModuleDict update sequence element "
"#" + str(j) + " should be Iterable; is" +
type(m).__name__)
if not len(m) == 2:
raise ValueError("ModuleDict update sequence element "
"#" + str(j) + " has length " + str(len(m)) +
"; 2 is required")
# modules can be Mapping (what it's typed at), or a list: [(name1, module1), (name2, module2)]
# that's too cumbersome to type correctly with overloads, so we add an ignore here
self[m[0]] = m[1] # type: ignore[assignment]
# remove forward alltogether to fallback on Module's _forward_unimplemented
class ParameterList(Module):
def __init__(self, values: Optional[Iterable[Any]] = None) -> None:
super().__init__()
self._size = 0
if values is not None:
self += values
def _get_abs_string_index(self, idx):
"""Get the absolute index for the list of modules."""
idx = operator.index(idx)
if not (-len(self) <= idx < len(self)):
raise IndexError(f'index {idx} is out of range')
if idx < 0:
idx += len(self)
return str(idx)
@overload
def __getitem__(self, idx: int) -> Any:
...
@overload
def __getitem__(self: T, idx: slice) -> T:
...
def __getitem__(self, idx):
if isinstance(idx, slice):
start, stop, step = idx.indices(len(self))
out = self.__class__()
for i in range(start, stop, step):
out.append(self[i])
return out
else:
idx = self._get_abs_string_index(idx)
return getattr(self, str(idx))
def __setitem__(self, idx: int, param: Any) -> None:
# Note that all other function that add an entry to the list part of
# the ParameterList end up here. So this is the only place where we need
# to wrap things into Parameter if needed.
# Objects added via setattr() are not in the list part and thus won't
# call into this function.
idx = self._get_abs_string_index(idx)
if isinstance(param, jt.Var) and not isinstance(param, Parameter):
param = Parameter(param)
return setattr(self, str(idx), param)
def __len__(self) -> int:
return self._size
def __iter__(self) -> Iterator[Any]:
return iter(self[i] for i in range(len(self)))
def __iadd__(self, parameters: Iterable[Any]) -> Self:
return self.extend(parameters)
def __dir__(self):
keys = super().__dir__()
keys = [key for key in keys if not key.isdigit()]
return keys
def append(self, value: Any) -> 'ParameterList':
"""Append a given value at the end of the list.
Args:
value (Any): value to append
"""
new_idx = len(self)
self._size += 1
self[new_idx] = value
return self
def extend(self, values: Iterable[Any]) -> Self:
"""Append values from a Python iterable to the end of the list.
Args:
values (iterable): iterable of values to append
"""
# Tensor is an iterable but we never want to unpack it here
if not isinstance(values, container_abcs.Iterable) or isinstance(values, jt.Var):
raise TypeError("ParameterList.extend should be called with an "
"iterable, but got " + type(values).__name__)
for value in values:
self.append(value)
return self
def extra_repr(self) -> str:
child_lines = []
for k, p in enumerate(self):
if isinstance(p, jt.Var):
size_str = 'x'.join(str(size) for size in p.size())
parastr = '{} containing: [{} of size {}{}]'.format(
"Parameter" if isinstance(p, Parameter) else "Tensor",
p.dtype, size_str, "cuda" if jt.flags.use_cuda else "cpu")
child_lines.append(' (' + str(k) + '): ' + parastr)
else:
child_lines.append(' (' + str(k) + '): Object of type: ' + type(p).__name__)
tmpstr = '\n'.join(child_lines)
return tmpstr
def __call__(self, *args, **kwargs):
raise RuntimeError('ParameterList should not be called.')

View File

@ -0,0 +1,16 @@
import jittor as jt
for k,v in jt.nn.init.__dict__.items():
if callable(v):
globals()[k] = v
normal = gauss
normal_ = gauss_
xavier_normal = xavier_gauss
xavier_normal_ = xavier_gauss_
zeros_ = zero_
jt.Var.normal_ = normal_

View File

@ -0,0 +1 @@
from . import rnn

View File

@ -0,0 +1,20 @@
import jittor as jt
PackedSequence = None
def pad_sequence(sequences,batch_first=False,padding_value=0.0):
max_f = max([len(s) for s in sequences])
# max_f = 512
b = len(sequences)
if batch_first:
ret = sequences[0].new_full([b,max_f,]+list(sequences[0].shape[1:]),padding_value)
for i,s in enumerate(sequences):
ret[i,:len(s)] = s
else:
ret = sequences[0].new_full([max_f,b,]+list(sequences[0].shape[1:]),padding_value)
for i,s in enumerate(sequences):
ret[:len(s),i] = s
# print(ret.shape)
# ret = ret[:,:406]
return ret

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,102 @@
#include "pyjt/py_obj_holder.h"
#include "utils/str_utils.h"
#include "jtorch_core.h"
#include "graph.h"
#include "grad.h"
#include "ops/op_register.h"
namespace jittor {
void pyjt_def_all(PyObject* m);
EXTERN_LIB void setter_use_cuda(int value);
Device::Device(const string& name, int ordinal) : name(name) {
if (startswith(name, "cpu"))
setter_use_cuda(0);
else
setter_use_cuda(1);
}
unordered_map<int64, VarPtr> grad_backup;
EXTERN_LIB void (*_var_free_hook)(Var*);
EXTERN_LIB unordered_map<int64, VarPtr>* _grad_backup_ptr;
void jtorch_var_free_hook(Var* v) {
auto iter = grad_backup.find(v->id);
if (iter != grad_backup.end()) {
grad_backup.erase(iter);
}
}
void jtorch_init() {
_var_free_hook = &jtorch_var_free_hook;
_grad_backup_ptr = &grad_backup;
}
inline static VarPtr& get_grad(Var* v) {
return grad_backup[v->id];
}
static auto make_binary = get_op_info("binary")
.get_constructor<VarPtr, Var*, Var*, NanoString>();
inline static void add_grad(VarPtr& a, VarPtr&& b) {
if (!a) a = move(b);
else {
a = make_binary(a, b, ns_add);
}
}
void grad_set(VarHolder* x, Maybe<VarHolder> v) {
if (!v) {
grad_del(x);
return;
}
grad_backup[x->var->id] = v.ptr->var;
}
Maybe<VarHolder> grad_get(VarHolder* x) {
auto iter = grad_backup.find(x->var->id);
if (iter != grad_backup.end()) {
if (!iter->second.ptr) return nullptr;
return new VarHolder(iter->second.ptr);
}
return nullptr;
}
void grad_del(VarHolder* x) {
auto iter = grad_backup.find(x->var->id);
if (iter != grad_backup.end())
grad_backup.erase(iter);
}
void backward(VarHolder* x) {
vector<Node*> gnodes({x->var});
bfs_backward(gnodes, [&](Node* node) {
if (node->is_stop_grad())
return false;
return true;
});
vector<Var*> targets;
for (auto* node : gnodes) {
if (node->is_var() && node->flags.get(NodeFlags::_th_require_grad))
targets.push_back(node->var());
}
auto grads = grad(x->var, targets);
for (int i=0; i<targets.size(); i++) {
auto& gptr = get_grad(targets[i]);
add_grad(gptr, move(grads[i]));
}
}
}
static void init_module(PyModuleDef* mdef, PyObject* m) {
jittor::jtorch_init();
mdef->m_doc = "Inner c++ core of jtorch";
jittor::pyjt_def_all(m);
}
PYJT_MODULE_INIT(jtorch_core);

View File

@ -0,0 +1,40 @@
#pragma once
#include "common.h"
#include "var_holder.h"
#include "misc/fast_shared_ptr.h"
namespace jittor {
// @pyjt(device)
// @attrs(heaptype)
struct Device {
string name;
// @pyjt(__init__)
Device(const string& name, int ordinal=0);
// @pyjt(__get__type, __str__)
inline string get_type() {return name;}
// @pyjt(__get__index)
inline int index() {return 0;}
};
// @pyjt(backward)
void backward(VarHolder* x);
// @pyjt(grad_set)
void grad_set(VarHolder* x, Maybe<VarHolder> v);
// @pyjt(grad_get)
Maybe<VarHolder> grad_get(VarHolder* x);
// @pyjt(grad_del)
void grad_del(VarHolder* x);
// @pyjt(retain_grad_set)
inline void retain_grad_set(VarHolder* x, bool v) {
x->var->flags.set(NodeFlags::_th_require_grad, v);
}
// @pyjt(retain_grad_get)
inline bool retain_grad_get(VarHolder* x) {
return x->var->flags.get(NodeFlags::_th_require_grad);
}
}

View File

@ -0,0 +1,25 @@
import unittest
import numpy as np
import torch
import jittor as jt
class TestConflictFunc(unittest.TestCase):
def test_max(self):
a = torch.Tensor([1,4,2])
assert a.max() == 4
v, k = a.max(dim=0)
assert v==4 and k==1
def test_argsort(self):
a = torch.Tensor([1,4,2])
k = a.argsort()
assert jt.all_equal(k, [0,2,1])
with jt.flag_scope(th_mode=0):
k, v = a.argsort()
assert jt.all_equal(k, [0,2,1])
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,58 @@
import unittest
import numpy as np
import torch
class TestFunction(unittest.TestCase):
def test_example1(self):
import jtorch
from jtorch import Function
class MyFunc(Function):
@staticmethod
def forward(self, x, y):
self.x = x
self.y = y
return x*y, x/y
@staticmethod
def backward(self, grad0, grad1):
return grad0 * self.y, grad1 * self.x
a = jtorch.array(3.0)
a.requires_grad = True
b = jtorch.array(4.0)
b.requires_grad = True
func = MyFunc.apply
c,d = func(a, b)
(c+d*3).backward()
assert a.grad.data == 4
assert b.grad.data == 9
def test_example2(self):
import jtorch as jt
from jtorch import Function
class MyFunc(Function):
@staticmethod
def forward(self, x, y):
self.x = x
self.y = y
return x*y, x/y
@staticmethod
def backward(self, grad0, grad1):
assert grad1 is None
return grad0 * self.y, None
a = jt.array(3.0)
a.requires_grad = True
b = jt.array(4.0)
b.requires_grad = True
func = MyFunc.apply
c,d = func(a, b)
d.stop_grad()
da, db = jt.grad(c+d*3, [a, b])
assert da.data == 4
assert db.data == 0
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,24 @@
import unittest
import numpy as np
import torch
class TestMisc(unittest.TestCase):
def test_update_grad(self):
class Net(torch.nn.Module):
def __init__(self):
super().__init__()
self.a = torch.nn.Parameter(torch.Tensor([1.0, 2.0]))
net = Net()
assert(net.a.requires_grad)
net.load_state_dict({"a": torch.Tensor([3.0, 4.0])})
assert(net.a.requires_grad)
def test_reshape(self):
a = torch.ones(3,3)
a.requires_grad = True
b = torch.reshape(a, [9])
assert b.requires_grad == True
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,56 @@
import unittest
import numpy as np
import os
import subprocess as sp
import sys
def check_two(cmd, parser=None, checker=None):
jtorch_out = sp.getoutput(cmd)
print("=========JTORCH OUT==========")
print(jtorch_out)
torch_out = sp.getoutput("PYTHONPATH= "+cmd)
print("=========TORCH OUT==========")
print(torch_out)
if parser:
torch_out = parser(torch_out)
jtorch_out = parser(jtorch_out)
if checker:
checker(torch_out, jtorch_out)
else:
assert torch_out == jtorch_out
return jtorch_out, torch_out
jtorch_path = os.path.join(os.path.dirname(__file__), "..")
# come from https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
class TestTutorial(unittest.TestCase):
def test_auto_grad1(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad1.py",
parser=lambda s: np.array(s.split())[[-10,-8,-5,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-4))
def test_auto_grad2(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad2.py",
parser=lambda s: np.array(s.split())[[-10,-8,-5,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-4))
def test_auto_grad3(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad3.py",
parser=lambda s: np.array(s.split())[[-9,-7,-4,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-4))
def test_auto_grad4(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad4.py",
parser=lambda s: np.array(s.split())[[-10,-8,-5,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-4))
def test_auto_grad5(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad5_optim.py",
parser=lambda s: np.array(s.split())[[-10,-8,-5,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-2))
def test_auto_grad6(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad6_module.py",
parser=lambda s: np.array(s.split())[[-10,-8,-5,-2]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-4))
def test_auto_grad7(self):
check_two(f"{sys.executable} {jtorch_path}/tutorial/auto_grad7_dynet.py",
parser=lambda s: np.array(s.split())[[-13,-10,-7,-3]].astype(float),
checker=lambda a,b: np.testing.assert_allclose(a, b, atol=1e-2))
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,44 @@
import torch
import math
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)
learning_rate = 1e-6
for t in range(20000):
# Forward pass: compute predicted y
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# Compute and print loss
loss = (y_pred - y).pow(2).sum().item()
if t % 1000 == 999:
print(t, loss)
# Backprop to compute gradients of a, b, c, d with respect to loss
grad_y_pred = 2.0 * (y_pred - y)
grad_a = grad_y_pred.sum()
grad_b = (grad_y_pred * x).sum()
grad_c = (grad_y_pred * x ** 2).sum()
grad_d = (grad_y_pred * x ** 3).sum()
# Update weights using gradient descent
a -= learning_rate * grad_a
b -= learning_rate * grad_b
c -= learning_rate * grad_c
d -= learning_rate * grad_d
# print(t, torch.liveness_info())
# torch.sync_all()
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

View File

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
import torch
import math
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
learning_rate = 1e-6
for t in range(20000):
# Forward pass: compute predicted y using operations on Tensors.
y_pred = a + b * x + c * x ** 2 + d * x ** 3
# print(y_pred.requires_grad)
# y_pred.requires_grad = False
# Compute and print loss using operations on Tensors.
# Now loss is a Tensor of shape (1,)
# loss.item() gets the scalar value held in the loss.
loss = (y_pred - y).pow(2).sum()
if t % 1000 == 990:
print(t, loss.item())
# Use autograd to compute the backward pass. This call will compute the
# gradient of loss with respect to all Tensors with requires_grad=True.
# After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
# the gradient of the loss with respect to a, b, c, d respectively.
# torch.backward(loss)
loss.backward()
# Manually update weights using gradient descent. Wrap in torch.no_grad()
# because weights have requires_grad=True, but we don't need to track this
# in autograd.
with torch.no_grad():
a -= learning_rate * a.grad
b -= learning_rate * b.grad
c -= learning_rate * c.grad
d -= learning_rate * d.grad
# Manually zero the gradients after updating weights
a.grad = None
b.grad = None
c.grad = None
d.grad = None
print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
import torch
import math
class LegendrePolynomial3(torch.autograd.Function):
"""
We can implement our own custom autograd Functions by subclassing
torch.autograd.Function and implementing the forward and backward passes
which operate on Tensors.
"""
@staticmethod
def forward(ctx, input):
"""
In the forward pass we receive a Tensor containing the input and return
a Tensor containing the output. ctx is a context object that can be used
to stash information for backward computation. You can cache arbitrary
objects for use in the backward pass using the ctx.save_for_backward method.
"""
ctx.save_for_backward(input)
return 0.5 * (5 * input ** 3 - 3 * input)
@staticmethod
def backward(ctx, grad_output):
"""
In the backward pass we receive a Tensor containing the gradient of the loss
with respect to the output, and we need to compute the gradient of the loss
with respect to the input.
"""
input, = ctx.saved_tensors
return grad_output * 1.5 * (5 * input ** 2 - 1)
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU
# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
# Create random Tensors for weights. For this example, we need
# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
# not too far from the correct result to ensure convergence.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
learning_rate = 5e-6
for t in range(2000):
# To apply our Function, we use Function.apply method. We alias this as 'P3'.
P3 = LegendrePolynomial3.apply
# Forward pass: compute predicted y using operations; we compute
# P3 using our custom autograd operation.
y_pred = a + b * P3(c + d * x)
# Compute and print loss
loss = (y_pred - y).pow(2).sum()
if t % 100 == 99:
print(t, loss.item())
# Use autograd to compute the backward pass.
loss.backward()
# Update weights using gradient descent
with torch.no_grad():
a -= learning_rate * a.grad
b -= learning_rate * b.grad
c -= learning_rate * c.grad
d -= learning_rate * d.grad
# Manually zero the gradients after updating weights
a.grad = None
b.grad = None
c.grad = None
d.grad = None
print(f'Result: y = {a.item()} + {b.item()} * P3( {c.item()} + {d.item()} x)')

View File

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
import torch
import math
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3)
# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')
# print(model[0].weight.requires_grad)
learning_rate = 1e-6
for t in range(8000):
# Forward pass: compute predicted y by passing x to the model. Module objects
# override the __call__ operator so you can call them like functions. When
# doing so you pass a Tensor of input data to the Module and it produces
# a Tensor of output data.
y_pred = model(xx)
# Compute and print loss. We pass Tensors containing the predicted and true
# values of y, and the loss function returns a Tensor containing the
# loss.
loss = loss_fn(y_pred, y)
if t % 1000 == 999:
print(t, loss.item())
# Zero the gradients before running the backward pass.
model.zero_grad()
# Backward pass: compute gradient of the loss with respect to all the learnable
# parameters of the model. Internally, the parameters of each Module are stored
# in Tensors with requires_grad=True, so this call will compute gradients for
# all learnable parameters in the model.
loss.backward()
# Update the weights using gradient descent. Each parameter is a Tensor, so
# we can access its gradients like we did before.
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]
# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
import torch
import math
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Prepare the input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
torch.nn.Linear(3, 1),
torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')
# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(8000):
# Forward pass: compute predicted y by passing x to the model.
y_pred = model(xx)
# Compute and print loss.
loss = loss_fn(y_pred, y)
if t % 1000 == 999:
print(t, loss.item())
# Before the backward pass, use the optimizer object to zero all of the
# gradients for the variables it will update (which are the learnable
# weights of the model). This is because by default, gradients are
# accumulated in buffers( i.e, not overwritten) whenever .backward()
# is called. Checkout docs of torch.autograd.backward for more details.
optimizer.zero_grad()
# Backward pass: compute gradient of the loss with respect to model
# parameters
loss.backward()
# Calling the step function on an Optimizer makes an update to its
# parameters
optimizer.step()
linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

View File

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
import torch
import math
class Polynomial3(torch.nn.Module):
def __init__(self):
"""
In the constructor we instantiate four parameters and assign them as
member parameters.
"""
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
def forward(self, x):
"""
In the forward function we accept a Tensor of input data and we must return
a Tensor of output data. We can use Modules defined in the constructor as
well as arbitrary operators on Tensors.
"""
return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
def string(self):
"""
Just like any class in Python, you can also define custom method on PyTorch modules
"""
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Construct our model by instantiating the class defined above
model = Polynomial3()
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters (defined
# with torch.nn.Parameter) which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(8000):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 1000 == 999:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Result: {model.string()}')

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
import random
import torch
import math
class DynamicNet(torch.nn.Module):
def __init__(self):
"""
In the constructor we instantiate five parameters and assign them as members.
"""
super().__init__()
self.a = torch.nn.Parameter(torch.randn(()))
self.b = torch.nn.Parameter(torch.randn(()))
self.c = torch.nn.Parameter(torch.randn(()))
self.d = torch.nn.Parameter(torch.randn(()))
self.e = torch.nn.Parameter(torch.randn(()))
def forward(self, x):
"""
For the forward pass of the model, we randomly choose either 4, 5
and reuse the e parameter to compute the contribution of these orders.
Since each forward pass builds a dynamic computation graph, we can use normal
Python control-flow operators like loops or conditional statements when
defining the forward pass of the model.
Here we also see that it is perfectly safe to reuse the same parameter many
times when defining a computational graph.
"""
y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
for exp in range(4, random.randint(4, 6)):
y = y + self.e * x ** exp
return y
def string(self):
"""
Just like any class in Python, you can also define custom method on PyTorch modules
"""
return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
# Construct our model by instantiating the class defined above
model = DynamicNet()
# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(60000):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(x)
# Compute and print loss
loss = criterion(y_pred, y)
if t % 2000 == 1999:
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print(torch.liveness_info())
print(f'Result: {model.string()}')

View File

@ -0,0 +1,106 @@
import torch
from torch import nn
# from jtorch.utils import DataLoader
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor(),
)
batch_size = 64
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)
print(len(train_dataloader))
for X, y in test_dataloader:
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")
break
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
epochs = 5
test(test_dataloader, model, loss_fn)
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")

View File

@ -0,0 +1,5 @@
cpp_extension = None
_flatten_dense_tensors = None
_unflatten_dense_tensors = None
tensorboard = None

View File

@ -0,0 +1,3 @@
#TODO: Implement this
_register_pytree_node = None
_dict_flatten = None

View File

@ -0,0 +1,8 @@
detach_variable = None
def checkpoint(
*args,
**kwargs
):
pass

View File

@ -0,0 +1,137 @@
import jittor as jt
import jittor.dataset
from jittor.dataset import Dataset as JDataset
from collections import namedtuple
from typing import Any, Callable, Iterable, Optional, Sequence, Union
class Dataset:
def __getitem__(self, index):
raise NotImplementedError
class IterableDataset:
def __iter__(self):
raise NotImplementedError
class DataLoader(JDataset):
def __init__(self, dataset,
batch_size: Optional[int] = 1,
shuffle: Optional[bool] = False,
sampler = None,
batch_sampler = None,
num_workers: int = 0,
collate_fn = None,
pin_memory: bool = False,
drop_last: bool = False,
timeout: float = 0,
worker_init_fn = None,
multiprocessing_context=None,
generator=None,
*, prefetch_factor: int = 2,
persistent_workers: bool = False,
pin_memory_device: str = "") -> None:
super().__init__(batch_size=batch_size,
shuffle=shuffle,
num_workers=num_workers,
drop_last=drop_last)
unsupported_kwargs = {
"batch_sampler": batch_sampler,
"pin_memory": pin_memory,
"timeout": timeout,
"worker_init_fn": worker_init_fn,
"multiprocessing_context": multiprocessing_context,
"generator": generator,
"persistent_workers": persistent_workers,
"pin_memory_device": pin_memory_device
}
for kwarg, value in unsupported_kwargs.items():
if value:
jt.LOG.w(f"Not implemented Dataloader kwarg: {kwarg}")
self.dataset = dataset
self.collate_fn = collate_fn
self.sampler = sampler
if not isinstance(dataset, IterableDataset):
self.total_len = len(dataset)
else:
# TODO: support multiple worker for iterable dataset
assert(num_workers == 0)
def collate_batch(self, batch):
if self.collate_fn is not None:
return self.collate_fn(batch)
else:
return super().collate_batch(batch)
def __getitem__(self, i):
return self.dataset[i]
def __iter__(self):
if isinstance(self.dataset, IterableDataset):
return self.inner_iter()
else:
return super().__iter__()
def inner_iter(self):
current_batch = []
if jt.world_size > 1:
assert self.batch_size % jt.world_size == 0, \
f"IterableDataset does not support a batch size ({self.batch_size}) that is not evenly divisible by the number of processes f{jt.world_size}"
real_batch_size = int(self.batch_size / jt.world_size)
else:
real_batch_size = self.batch_size
for element in self.dataset:
current_batch.append(element)
if len(current_batch) == real_batch_size:
current_batch = self.collate_batch(current_batch)
current_batch = self.to_jittor(current_batch)
yield current_batch
current_batch = []
if not self.drop_last and len(current_batch) > 0:
current_batch = self.collate_batch(current_batch)
yield self.to_jittor(current_batch)
# def get_worker_info():
# # always return the fake worker info
# return namedtuple('WorkerInfo', 'id num_workers')(0, 1)
# class RandomSampler(jt.dataset.RandomSampler):
# def __init__(self, dataset, generator=None, **kwargs):
# super().__init__(dataset, **kwargs)
# def __iter__(self):
# if getattr(self.dataset, "support_random_access", True):
# return super().__iter__()
# else:
# self.dataset.shuffle()
# return iter(range(self.dataset.__real_len__() if hasattr(self.dataset,"__real_len__") else self.dataset.__len__()))
# class DistributedSampler(jt.dataset.Sampler):
# def __init__(self, sampler: RandomSampler):
# assert(isinstance(sampler, RandomSampler))
# self.sampler = sampler
# def set_epoch(self, epoch: int):
# ### do nothing, let jittor's inner dataset handle
# pass
# def __iter__(self):
# return self.sampler.__iter__()
# def __len__(self):
# return self.sampler.__len__()
# BatchSampler = jt.dataset.BatchSampler
# Sampler = jt.dataset.Sampler
# SequentialSampler = jt.dataset.SequentialSampler
# SubsetRandomSampler = jt.dataset.SubsetRandomSampler
# TensorDataset = Dataset

View File

@ -0,0 +1,9 @@
from typing import Callable, Union
Dtype = Union[Callable, str]
def get_string_dtype(dtype):
if callable(dtype):
dtype = dtype.__name__
if not isinstance(dtype, str):
raise ValueError(f"dtype is expected to be str, python type function, or jittor type function, but got {dtype}.")
return dtype

View File

@ -0,0 +1,34 @@
import os
import glob
import shutil
import sys
home_path = os.path.join(os.path.dirname(__file__), "..", "..", "..")
home_path = os.path.abspath(home_path)
def callback(func, path, exc_info):
print(f"remove \"{path}\" failed.")
def rmtree(path):
if os.path.isdir(path):
print(f"remove \"{path}\" recursive.")
shutil.rmtree(path, onerror=callback)
def remove_tmpfile():
dist_file = home_path+"/dist"
egg_file = glob.glob(home_path+"/**/*egg-info")
rmtree(dist_file)
for e in egg_file:
rmtree(e)
def run_cmd(cmd):
print("[CMD]", cmd)
assert os.system(cmd)==0
os.chdir(home_path)
remove_tmpfile()
run_cmd(f"{sys.executable} ./setup.py sdist")
run_cmd(f"{sys.executable} -m twine upload dist/*")
remove_tmpfile()

View File

@ -0,0 +1,46 @@
import importlib.machinery
import os
def _download_file_from_remote_location(fpath: str, url: str) -> None:
pass
def _is_remote_location_available() -> bool:
return False
def _get_extension_path(lib_name):
lib_dir = os.path.dirname(__file__)
if os.name == "nt":
# Register the main torchvision library location on the default DLL path
import ctypes
import sys
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
with_load_library_flags = hasattr(kernel32, "AddDllDirectory")
prev_error_mode = kernel32.SetErrorMode(0x0001)
if with_load_library_flags:
kernel32.AddDllDirectory.restype = ctypes.c_void_p
if sys.version_info >= (3, 8):
os.add_dll_directory(lib_dir)
elif with_load_library_flags:
res = kernel32.AddDllDirectory(lib_dir)
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += f' Error adding "{lib_dir}" to the DLL directories.'
raise err
kernel32.SetErrorMode(prev_error_mode)
loader_details = (importlib.machinery.ExtensionFileLoader, importlib.machinery.EXTENSION_SUFFIXES)
extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
ext_specs = extfinder.find_spec(lib_name)
if ext_specs is None:
raise ImportError
return ext_specs.origin

View File

@ -0,0 +1,9 @@
from .mnist import EMNIST, FashionMNIST, KMNIST, MNIST, QMNIST
__all__ = (
"EMNIST",
"FashionMNIST",
"QMNIST",
"MNIST",
"KMNIST",
)

View File

@ -0,0 +1,558 @@
import codecs
import os
import os.path
import shutil
import string
import sys
import warnings
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.error import URLError
import numpy as np
import torch
from PIL import Image
from .utils import check_integrity, download_and_extract_archive, extract_archive, verify_str_arg
from .vision import VisionDataset
class MNIST(VisionDataset):
"""`MNIST <http://yann.lecun.com/exdb/mnist/>`_ Dataset.
Args:
root (string): Root directory of dataset where ``MNIST/raw/train-images-idx3-ubyte``
and ``MNIST/raw/t10k-images-idx3-ubyte`` exist.
train (bool, optional): If True, creates dataset from ``train-images-idx3-ubyte``,
otherwise from ``t10k-images-idx3-ubyte``.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
mirrors = [
"http://yann.lecun.com/exdb/mnist/",
"https://ossci-datasets.s3.amazonaws.com/mnist/",
]
resources = [
("train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
("t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
("t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c"),
]
training_file = "training.pt"
test_file = "test.pt"
classes = [
"0 - zero",
"1 - one",
"2 - two",
"3 - three",
"4 - four",
"5 - five",
"6 - six",
"7 - seven",
"8 - eight",
"9 - nine",
]
@property
def train_labels(self):
warnings.warn("train_labels has been renamed targets")
return self.targets
@property
def test_labels(self):
warnings.warn("test_labels has been renamed targets")
return self.targets
@property
def train_data(self):
warnings.warn("train_data has been renamed data")
return self.data
@property
def test_data(self):
warnings.warn("test_data has been renamed data")
return self.data
def __init__(
self,
root: str,
train: bool = True,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
download: bool = False,
) -> None:
super().__init__(root, transform=transform, target_transform=target_transform)
self.train = train # training set or test set
if self._check_legacy_exist():
self.data, self.targets = self._load_legacy_data()
return
if download:
self.download()
if not self._check_exists():
raise RuntimeError("Dataset not found. You can use download=True to download it")
self.data, self.targets = self._load_data()
def _check_legacy_exist(self):
processed_folder_exists = os.path.exists(self.processed_folder)
if not processed_folder_exists:
return False
return all(
check_integrity(os.path.join(self.processed_folder, file)) for file in (self.training_file, self.test_file)
)
def _load_legacy_data(self):
# This is for BC only. We no longer cache the data in a custom binary, but simply read from the raw data
# directly.
data_file = self.training_file if self.train else self.test_file
return torch.load(os.path.join(self.processed_folder, data_file))
def _load_data(self):
image_file = f"{'train' if self.train else 't10k'}-images-idx3-ubyte"
data = read_image_file(os.path.join(self.raw_folder, image_file))
label_file = f"{'train' if self.train else 't10k'}-labels-idx1-ubyte"
targets = read_label_file(os.path.join(self.raw_folder, label_file))
return data, targets
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img, target = self.data[index], int(self.targets[index])
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img.numpy(), mode="L")
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
def __len__(self) -> int:
return len(self.data)
@property
def raw_folder(self) -> str:
return os.path.join(self.root, self.__class__.__name__, "raw")
@property
def processed_folder(self) -> str:
return os.path.join(self.root, self.__class__.__name__, "processed")
@property
def class_to_idx(self) -> Dict[str, int]:
return {_class: i for i, _class in enumerate(self.classes)}
def _check_exists(self) -> bool:
return all(
check_integrity(os.path.join(self.raw_folder, os.path.splitext(os.path.basename(url))[0]))
for url, _ in self.resources
)
def download(self) -> None:
"""Download the MNIST data if it doesn't exist already."""
if self._check_exists():
return
os.makedirs(self.raw_folder, exist_ok=True)
# download files
for filename, md5 in self.resources:
for mirror in self.mirrors:
url = f"{mirror}{filename}"
try:
print(f"Downloading {url}")
download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5)
except URLError as error:
print(f"Failed to download (trying next):\n{error}")
continue
finally:
print()
break
else:
raise RuntimeError(f"Error downloading {filename}")
def extra_repr(self) -> str:
split = "Train" if self.train is True else "Test"
return f"Split: {split}"
class FashionMNIST(MNIST):
"""`Fashion-MNIST <https://github.com/zalandoresearch/fashion-mnist>`_ Dataset.
Args:
root (string): Root directory of dataset where ``FashionMNIST/raw/train-images-idx3-ubyte``
and ``FashionMNIST/raw/t10k-images-idx3-ubyte`` exist.
train (bool, optional): If True, creates dataset from ``train-images-idx3-ubyte``,
otherwise from ``t10k-images-idx3-ubyte``.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
mirrors = ["http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"]
resources = [
("train-images-idx3-ubyte.gz", "8d4fb7e6c68d591d4c3dfef9ec88bf0d"),
("train-labels-idx1-ubyte.gz", "25c81989df183df01b3e8a0aad5dffbe"),
("t10k-images-idx3-ubyte.gz", "bef4ecab320f06d8554ea6380940ec79"),
("t10k-labels-idx1-ubyte.gz", "bb300cfdad3c16e7a12a480ee83cd310"),
]
classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
class KMNIST(MNIST):
"""`Kuzushiji-MNIST <https://github.com/rois-codh/kmnist>`_ Dataset.
Args:
root (string): Root directory of dataset where ``KMNIST/raw/train-images-idx3-ubyte``
and ``KMNIST/raw/t10k-images-idx3-ubyte`` exist.
train (bool, optional): If True, creates dataset from ``train-images-idx3-ubyte``,
otherwise from ``t10k-images-idx3-ubyte``.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
mirrors = ["http://codh.rois.ac.jp/kmnist/dataset/kmnist/"]
resources = [
("train-images-idx3-ubyte.gz", "bdb82020997e1d708af4cf47b453dcf7"),
("train-labels-idx1-ubyte.gz", "e144d726b3acfaa3e44228e80efcd344"),
("t10k-images-idx3-ubyte.gz", "5c965bf0a639b31b8f53240b1b52f4d7"),
("t10k-labels-idx1-ubyte.gz", "7320c461ea6c1c855c0b718fb2a4b134"),
]
classes = ["o", "ki", "su", "tsu", "na", "ha", "ma", "ya", "re", "wo"]
class EMNIST(MNIST):
"""`EMNIST <https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist>`_ Dataset.
Args:
root (string): Root directory of dataset where ``EMNIST/raw/train-images-idx3-ubyte``
and ``EMNIST/raw/t10k-images-idx3-ubyte`` exist.
split (string): The dataset has 6 different splits: ``byclass``, ``bymerge``,
``balanced``, ``letters``, ``digits`` and ``mnist``. This argument specifies
which one to use.
train (bool, optional): If True, creates dataset from ``training.pt``,
otherwise from ``test.pt``.
download (bool, optional): If True, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
"""
url = "https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip"
md5 = "58c8d27c78d21e728a6bc7b3cc06412e"
splits = ("byclass", "bymerge", "balanced", "letters", "digits", "mnist")
# Merged Classes assumes Same structure for both uppercase and lowercase version
_merged_classes = {"c", "i", "j", "k", "l", "m", "o", "p", "s", "u", "v", "w", "x", "y", "z"}
_all_classes = set(string.digits + string.ascii_letters)
classes_split_dict = {
"byclass": sorted(list(_all_classes)),
"bymerge": sorted(list(_all_classes - _merged_classes)),
"balanced": sorted(list(_all_classes - _merged_classes)),
"letters": ["N/A"] + list(string.ascii_lowercase),
"digits": list(string.digits),
"mnist": list(string.digits),
}
def __init__(self, root: str, split: str, **kwargs: Any) -> None:
self.split = verify_str_arg(split, "split", self.splits)
self.training_file = self._training_file(split)
self.test_file = self._test_file(split)
super().__init__(root, **kwargs)
self.classes = self.classes_split_dict[self.split]
@staticmethod
def _training_file(split) -> str:
return f"training_{split}.pt"
@staticmethod
def _test_file(split) -> str:
return f"test_{split}.pt"
@property
def _file_prefix(self) -> str:
return f"emnist-{self.split}-{'train' if self.train else 'test'}"
@property
def images_file(self) -> str:
return os.path.join(self.raw_folder, f"{self._file_prefix}-images-idx3-ubyte")
@property
def labels_file(self) -> str:
return os.path.join(self.raw_folder, f"{self._file_prefix}-labels-idx1-ubyte")
def _load_data(self):
return read_image_file(self.images_file), read_label_file(self.labels_file)
def _check_exists(self) -> bool:
return all(check_integrity(file) for file in (self.images_file, self.labels_file))
def download(self) -> None:
"""Download the EMNIST data if it doesn't exist already."""
if self._check_exists():
return
os.makedirs(self.raw_folder, exist_ok=True)
download_and_extract_archive(self.url, download_root=self.raw_folder, md5=self.md5)
gzip_folder = os.path.join(self.raw_folder, "gzip")
for gzip_file in os.listdir(gzip_folder):
if gzip_file.endswith(".gz"):
extract_archive(os.path.join(gzip_folder, gzip_file), self.raw_folder)
shutil.rmtree(gzip_folder)
class QMNIST(MNIST):
"""`QMNIST <https://github.com/facebookresearch/qmnist>`_ Dataset.
Args:
root (string): Root directory of dataset whose ``raw``
subdir contains binary files of the datasets.
what (string,optional): Can be 'train', 'test', 'test10k',
'test50k', or 'nist' for respectively the mnist compatible
training set, the 60k qmnist testing set, the 10k qmnist
examples that match the mnist testing set, the 50k
remaining qmnist testing examples, or all the nist
digits. The default is to select 'train' or 'test'
according to the compatibility argument 'train'.
compat (bool,optional): A boolean that says whether the target
for each example is class number (for compatibility with
the MNIST dataloader) or a torch vector containing the
full qmnist information. Default=True.
download (bool, optional): If True, downloads the dataset from
the internet and puts it in root directory. If dataset is
already downloaded, it is not downloaded again.
transform (callable, optional): A function/transform that
takes in an PIL image and returns a transformed
version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform
that takes in the target and transforms it.
train (bool,optional,compatibility): When argument 'what' is
not specified, this boolean decides whether to load the
training set ot the testing set. Default: True.
"""
subsets = {"train": "train", "test": "test", "test10k": "test", "test50k": "test", "nist": "nist"}
resources: Dict[str, List[Tuple[str, str]]] = { # type: ignore[assignment]
"train": [
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-images-idx3-ubyte.gz",
"ed72d4157d28c017586c42bc6afe6370",
),
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-train-labels-idx2-int.gz",
"0058f8dd561b90ffdd0f734c6a30e5e4",
),
],
"test": [
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-images-idx3-ubyte.gz",
"1394631089c404de565df7b7aeaf9412",
),
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/qmnist-test-labels-idx2-int.gz",
"5b5b05890a5e13444e108efe57b788aa",
),
],
"nist": [
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-images-idx3-ubyte.xz",
"7f124b3b8ab81486c9d8c2749c17f834",
),
(
"https://raw.githubusercontent.com/facebookresearch/qmnist/master/xnist-labels-idx2-int.xz",
"5ed0e788978e45d4a8bd4b7caec3d79d",
),
],
}
classes = [
"0 - zero",
"1 - one",
"2 - two",
"3 - three",
"4 - four",
"5 - five",
"6 - six",
"7 - seven",
"8 - eight",
"9 - nine",
]
def __init__(
self, root: str, what: Optional[str] = None, compat: bool = True, train: bool = True, **kwargs: Any
) -> None:
if what is None:
what = "train" if train else "test"
self.what = verify_str_arg(what, "what", tuple(self.subsets.keys()))
self.compat = compat
self.data_file = what + ".pt"
self.training_file = self.data_file
self.test_file = self.data_file
super().__init__(root, train, **kwargs)
@property
def images_file(self) -> str:
(url, _), _ = self.resources[self.subsets[self.what]]
return os.path.join(self.raw_folder, os.path.splitext(os.path.basename(url))[0])
@property
def labels_file(self) -> str:
_, (url, _) = self.resources[self.subsets[self.what]]
return os.path.join(self.raw_folder, os.path.splitext(os.path.basename(url))[0])
def _check_exists(self) -> bool:
return all(check_integrity(file) for file in (self.images_file, self.labels_file))
def _load_data(self):
data = read_sn3_pascalvincent_tensor(self.images_file)
if data.dtype != torch.uint8:
raise TypeError(f"data should be of dtype torch.uint8 instead of {data.dtype}")
if data.ndimension() != 3:
raise ValueError("data should have 3 dimensions instead of {data.ndimension()}")
targets = read_sn3_pascalvincent_tensor(self.labels_file).long()
if targets.ndimension() != 2:
raise ValueError(f"targets should have 2 dimensions instead of {targets.ndimension()}")
if self.what == "test10k":
data = data[0:10000, :, :].clone()
targets = targets[0:10000, :].clone()
elif self.what == "test50k":
data = data[10000:, :, :].clone()
targets = targets[10000:, :].clone()
return data, targets
def download(self) -> None:
"""Download the QMNIST data if it doesn't exist already.
Note that we only download what has been asked for (argument 'what').
"""
if self._check_exists():
return
os.makedirs(self.raw_folder, exist_ok=True)
split = self.resources[self.subsets[self.what]]
for url, md5 in split:
download_and_extract_archive(url, self.raw_folder, md5=md5)
def __getitem__(self, index: int) -> Tuple[Any, Any]:
# redefined to handle the compat flag
img, target = self.data[index], self.targets[index]
img = Image.fromarray(img.numpy(), mode="L")
if self.transform is not None:
img = self.transform(img)
if self.compat:
target = int(target[0])
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
def extra_repr(self) -> str:
return f"Split: {self.what}"
def get_int(b: bytes) -> int:
return int(codecs.encode(b, "hex"), 16)
SN3_PASCALVINCENT_BITSMAP = {
8: torch.uint8,
9: torch.int8,
11: torch.int16,
12: torch.int32,
13: torch.float32,
14: torch.float64,
}
TORCH_TYPE_BITS = {
torch.uint8: 8,
torch.int8: 8,
torch.int16: 16,
torch.int32: 32,
torch.float32: 32,
torch.float64: 64,
}
def read_sn3_pascalvincent_tensor(path: str, strict: bool = True) -> torch.Tensor:
"""Read a SN3 file in "Pascal Vincent" format (Lush file 'libidx/idx-io.lsh').
Argument may be a filename, compressed filename, or file object.
"""
# read
with open(path, "rb") as f:
data = f.read()
# parse
magic = get_int(data[0:4])
nd = magic % 256
ty = magic // 256
assert 1 <= nd <= 3
assert 8 <= ty <= 14
torch_type = SN3_PASCALVINCENT_BITSMAP[ty]
s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)]
num_bytes_per_value = TORCH_TYPE_BITS[torch_type] // 8
# The MNIST format uses the big endian byte order. If the system uses little endian byte order by default,
# we need to reverse the bytes before we can read them with torch.frombuffer().
needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1
parsed = torch.frombuffer(bytearray(data), dtype=torch_type, offset=(4 * (nd + 1)))
if needs_byte_reversal:
parsed = parsed.flip(0)
assert parsed.shape[0] == np.prod(s) or not strict
return parsed.view(*s)
def read_label_file(path: str) -> torch.Tensor:
x = read_sn3_pascalvincent_tensor(path, strict=False)
if x.dtype != torch.uint8:
raise TypeError(f"x should be of dtype torch.uint8 instead of {x.dtype}")
if x.ndimension() != 1:
raise ValueError(f"x should have 1 dimension instead of {x.ndimension()}")
return x.long()
def read_image_file(path: str) -> torch.Tensor:
x = read_sn3_pascalvincent_tensor(path, strict=False)
if x.dtype != torch.uint8:
raise TypeError(f"x should be of dtype torch.uint8 instead of {x.dtype}")
if x.ndimension() != 3:
raise ValueError(f"x should have 3 dimension instead of {x.ndimension()}")
return x

View File

@ -0,0 +1,522 @@
import bz2
import contextlib
import gzip
import hashlib
import itertools
import lzma
import os
import os.path
import pathlib
import re
import sys
import tarfile
import urllib
import urllib.error
import urllib.request
import warnings
import zipfile
from typing import Any, Callable, Dict, IO, Iterable, Iterator, List, Optional, Tuple, TypeVar
from urllib.parse import urlparse
import numpy as np
import requests
import torch
from tqdm import tqdm
from .._internally_replaced_utils import _download_file_from_remote_location, _is_remote_location_available
USER_AGENT = "pytorch/vision"
def _save_response_content(
content: Iterator[bytes],
destination: str,
length: Optional[int] = None,
) -> None:
with open(destination, "wb") as fh, tqdm(total=length) as pbar:
for chunk in content:
# filter out keep-alive new chunks
if not chunk:
continue
fh.write(chunk)
pbar.update(len(chunk))
def _urlretrieve(url: str, filename: str, chunk_size: int = 1024 * 32) -> None:
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
_save_response_content(iter(lambda: response.read(chunk_size), b""), filename, length=response.length)
def gen_bar_updater() -> Callable[[int, int, int], None]:
warnings.warn("The function `gen_bar_update` is deprecated since 0.13 and will be removed in 0.15.")
pbar = tqdm(total=None)
def bar_update(count, block_size, total_size):
if pbar.total is None and total_size:
pbar.total = total_size
progress_bytes = count * block_size
pbar.update(progress_bytes - pbar.n)
return bar_update
def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str:
# Setting the `usedforsecurity` flag does not change anything about the functionality, but indicates that we are
# not using the MD5 checksum for cryptography. This enables its usage in restricted environments like FIPS. Without
# it torchvision.datasets is unusable in these environments since we perform a MD5 check everywhere.
if sys.version_info >= (3, 9):
md5 = hashlib.md5(usedforsecurity=False)
else:
md5 = hashlib.md5()
with open(fpath, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
md5.update(chunk)
return md5.hexdigest()
def check_md5(fpath: str, md5: str, **kwargs: Any) -> bool:
return md5 == calculate_md5(fpath, **kwargs)
def check_integrity(fpath: str, md5: Optional[str] = None) -> bool:
if not os.path.isfile(fpath):
return False
if md5 is None:
return True
return check_md5(fpath, md5)
def _get_redirect_url(url: str, max_hops: int = 3) -> str:
initial_url = url
headers = {"Method": "HEAD", "User-Agent": USER_AGENT}
for _ in range(max_hops + 1):
with urllib.request.urlopen(urllib.request.Request(url, headers=headers)) as response:
if response.url == url or response.url is None:
return url
url = response.url
else:
raise RecursionError(
f"Request to {initial_url} exceeded {max_hops} redirects. The last redirect points to {url}."
)
def _get_google_drive_file_id(url: str) -> Optional[str]:
parts = urlparse(url)
if re.match(r"(drive|docs)[.]google[.]com", parts.netloc) is None:
return None
match = re.match(r"/file/d/(?P<id>[^/]*)", parts.path)
if match is None:
return None
return match.group("id")
def download_url(
url: str, root: str, filename: Optional[str] = None, md5: Optional[str] = None, max_redirect_hops: int = 3
) -> None:
"""Download a file from a url and place it in root.
Args:
url (str): URL to download file from
root (str): Directory to place downloaded file in
filename (str, optional): Name to save the file under. If None, use the basename of the URL
md5 (str, optional): MD5 checksum of the download. If None, do not check
max_redirect_hops (int, optional): Maximum number of redirect hops allowed
"""
root = os.path.expanduser(root)
if not filename:
filename = os.path.basename(url)
fpath = os.path.join(root, filename)
os.makedirs(root, exist_ok=True)
# check if file is already present locally
if check_integrity(fpath, md5):
print("Using downloaded and verified file: " + fpath)
return
if _is_remote_location_available():
_download_file_from_remote_location(fpath, url)
else:
# expand redirect chain if needed
url = _get_redirect_url(url, max_hops=max_redirect_hops)
# check if file is located on Google Drive
file_id = _get_google_drive_file_id(url)
if file_id is not None:
return download_file_from_google_drive(file_id, root, filename, md5)
# download the file
try:
print("Downloading " + url + " to " + fpath)
_urlretrieve(url, fpath)
except (urllib.error.URLError, OSError) as e: # type: ignore[attr-defined]
if url[:5] == "https":
url = url.replace("https:", "http:")
print("Failed download. Trying https -> http instead. Downloading " + url + " to " + fpath)
_urlretrieve(url, fpath)
else:
raise e
# check integrity of downloaded file
if not check_integrity(fpath, md5):
raise RuntimeError("File not found or corrupted.")
def list_dir(root: str, prefix: bool = False) -> List[str]:
"""List all directories at a given root
Args:
root (str): Path to directory whose folders need to be listed
prefix (bool, optional): If true, prepends the path to each result, otherwise
only returns the name of the directories found
"""
root = os.path.expanduser(root)
directories = [p for p in os.listdir(root) if os.path.isdir(os.path.join(root, p))]
if prefix is True:
directories = [os.path.join(root, d) for d in directories]
return directories
def list_files(root: str, suffix: str, prefix: bool = False) -> List[str]:
"""List all files ending with a suffix at a given root
Args:
root (str): Path to directory whose folders need to be listed
suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png').
It uses the Python "str.endswith" method and is passed directly
prefix (bool, optional): If true, prepends the path to each result, otherwise
only returns the name of the files found
"""
root = os.path.expanduser(root)
files = [p for p in os.listdir(root) if os.path.isfile(os.path.join(root, p)) and p.endswith(suffix)]
if prefix is True:
files = [os.path.join(root, d) for d in files]
return files
def _extract_gdrive_api_response(response, chunk_size: int = 32 * 1024) -> Tuple[bytes, Iterator[bytes]]:
content = response.iter_content(chunk_size)
first_chunk = None
# filter out keep-alive new chunks
while not first_chunk:
first_chunk = next(content)
content = itertools.chain([first_chunk], content)
try:
match = re.search("<title>Google Drive - (?P<api_response>.+?)</title>", first_chunk.decode())
api_response = match["api_response"] if match is not None else None
except UnicodeDecodeError:
api_response = None
return api_response, content
def download_file_from_google_drive(file_id: str, root: str, filename: Optional[str] = None, md5: Optional[str] = None):
"""Download a Google Drive file from and place it in root.
Args:
file_id (str): id of file to be downloaded
root (str): Directory to place downloaded file in
filename (str, optional): Name to save the file under. If None, use the id of the file.
md5 (str, optional): MD5 checksum of the download. If None, do not check
"""
# Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
root = os.path.expanduser(root)
if not filename:
filename = file_id
fpath = os.path.join(root, filename)
os.makedirs(root, exist_ok=True)
if check_integrity(fpath, md5):
print(f"Using downloaded {'and verified ' if md5 else ''}file: {fpath}")
return
url = "https://drive.google.com/uc"
params = dict(id=file_id, export="download")
with requests.Session() as session:
response = session.get(url, params=params, stream=True)
for key, value in response.cookies.items():
if key.startswith("download_warning"):
token = value
break
else:
api_response, content = _extract_gdrive_api_response(response)
token = "t" if api_response == "Virus scan warning" else None
if token is not None:
response = session.get(url, params=dict(params, confirm=token), stream=True)
api_response, content = _extract_gdrive_api_response(response)
if api_response == "Quota exceeded":
raise RuntimeError(
f"The daily quota of the file {filename} is exceeded and it "
f"can't be downloaded. This is a limitation of Google Drive "
f"and can only be overcome by trying again later."
)
_save_response_content(content, fpath)
# In case we deal with an unhandled GDrive API response, the file should be smaller than 10kB and contain only text
if os.stat(fpath).st_size < 10 * 1024:
with contextlib.suppress(UnicodeDecodeError), open(fpath) as fh:
text = fh.read()
# Regular expression to detect HTML. Copied from https://stackoverflow.com/a/70585604
if re.search(r"</?\s*[a-z-][^>]*\s*>|(&(?:[\w\d]+|#\d+|#x[a-f\d]+);)", text):
warnings.warn(
f"We detected some HTML elements in the downloaded file. "
f"This most likely means that the download triggered an unhandled API response by GDrive. "
f"Please report this to torchvision at https://github.com/pytorch/vision/issues including "
f"the response:\n\n{text}"
)
if md5 and not check_md5(fpath, md5):
raise RuntimeError(
f"The MD5 checksum of the download file {fpath} does not match the one on record."
f"Please delete the file and try again. "
f"If the issue persists, please report this to torchvision at https://github.com/pytorch/vision/issues."
)
def _extract_tar(from_path: str, to_path: str, compression: Optional[str]) -> None:
with tarfile.open(from_path, f"r:{compression[1:]}" if compression else "r") as tar:
tar.extractall(to_path)
_ZIP_COMPRESSION_MAP: Dict[str, int] = {
".bz2": zipfile.ZIP_BZIP2,
".xz": zipfile.ZIP_LZMA,
}
def _extract_zip(from_path: str, to_path: str, compression: Optional[str]) -> None:
with zipfile.ZipFile(
from_path, "r", compression=_ZIP_COMPRESSION_MAP[compression] if compression else zipfile.ZIP_STORED
) as zip:
zip.extractall(to_path)
_ARCHIVE_EXTRACTORS: Dict[str, Callable[[str, str, Optional[str]], None]] = {
".tar": _extract_tar,
".zip": _extract_zip,
}
_COMPRESSED_FILE_OPENERS: Dict[str, Callable[..., IO]] = {
".bz2": bz2.open,
".gz": gzip.open,
".xz": lzma.open,
}
_FILE_TYPE_ALIASES: Dict[str, Tuple[Optional[str], Optional[str]]] = {
".tbz": (".tar", ".bz2"),
".tbz2": (".tar", ".bz2"),
".tgz": (".tar", ".gz"),
}
def _detect_file_type(file: str) -> Tuple[str, Optional[str], Optional[str]]:
"""Detect the archive type and/or compression of a file.
Args:
file (str): the filename
Returns:
(tuple): tuple of suffix, archive type, and compression
Raises:
RuntimeError: if file has no suffix or suffix is not supported
"""
suffixes = pathlib.Path(file).suffixes
if not suffixes:
raise RuntimeError(
f"File '{file}' has no suffixes that could be used to detect the archive type and compression."
)
suffix = suffixes[-1]
# check if the suffix is a known alias
if suffix in _FILE_TYPE_ALIASES:
return (suffix, *_FILE_TYPE_ALIASES[suffix])
# check if the suffix is an archive type
if suffix in _ARCHIVE_EXTRACTORS:
return suffix, suffix, None
# check if the suffix is a compression
if suffix in _COMPRESSED_FILE_OPENERS:
# check for suffix hierarchy
if len(suffixes) > 1:
suffix2 = suffixes[-2]
# check if the suffix2 is an archive type
if suffix2 in _ARCHIVE_EXTRACTORS:
return suffix2 + suffix, suffix2, suffix
return suffix, None, suffix
valid_suffixes = sorted(set(_FILE_TYPE_ALIASES) | set(_ARCHIVE_EXTRACTORS) | set(_COMPRESSED_FILE_OPENERS))
raise RuntimeError(f"Unknown compression or archive type: '{suffix}'.\nKnown suffixes are: '{valid_suffixes}'.")
def _decompress(from_path: str, to_path: Optional[str] = None, remove_finished: bool = False) -> str:
r"""Decompress a file.
The compression is automatically detected from the file name.
Args:
from_path (str): Path to the file to be decompressed.
to_path (str): Path to the decompressed file. If omitted, ``from_path`` without compression extension is used.
remove_finished (bool): If ``True``, remove the file after the extraction.
Returns:
(str): Path to the decompressed file.
"""
suffix, archive_type, compression = _detect_file_type(from_path)
if not compression:
raise RuntimeError(f"Couldn't detect a compression from suffix {suffix}.")
if to_path is None:
to_path = from_path.replace(suffix, archive_type if archive_type is not None else "")
# We don't need to check for a missing key here, since this was already done in _detect_file_type()
compressed_file_opener = _COMPRESSED_FILE_OPENERS[compression]
with compressed_file_opener(from_path, "rb") as rfh, open(to_path, "wb") as wfh:
wfh.write(rfh.read())
if remove_finished:
os.remove(from_path)
return to_path
def extract_archive(from_path: str, to_path: Optional[str] = None, remove_finished: bool = False) -> str:
"""Extract an archive.
The archive type and a possible compression is automatically detected from the file name. If the file is compressed
but not an archive the call is dispatched to :func:`decompress`.
Args:
from_path (str): Path to the file to be extracted.
to_path (str): Path to the directory the file will be extracted to. If omitted, the directory of the file is
used.
remove_finished (bool): If ``True``, remove the file after the extraction.
Returns:
(str): Path to the directory the file was extracted to.
"""
if to_path is None:
to_path = os.path.dirname(from_path)
suffix, archive_type, compression = _detect_file_type(from_path)
if not archive_type:
return _decompress(
from_path,
os.path.join(to_path, os.path.basename(from_path).replace(suffix, "")),
remove_finished=remove_finished,
)
# We don't need to check for a missing key here, since this was already done in _detect_file_type()
extractor = _ARCHIVE_EXTRACTORS[archive_type]
extractor(from_path, to_path, compression)
if remove_finished:
os.remove(from_path)
return to_path
def download_and_extract_archive(
url: str,
download_root: str,
extract_root: Optional[str] = None,
filename: Optional[str] = None,
md5: Optional[str] = None,
remove_finished: bool = False,
) -> None:
download_root = os.path.expanduser(download_root)
if extract_root is None:
extract_root = download_root
if not filename:
filename = os.path.basename(url)
download_url(url, download_root, filename, md5)
archive = os.path.join(download_root, filename)
print(f"Extracting {archive} to {extract_root}")
extract_archive(archive, extract_root, remove_finished)
def iterable_to_str(iterable: Iterable) -> str:
return "'" + "', '".join([str(item) for item in iterable]) + "'"
T = TypeVar("T", str, bytes)
def verify_str_arg(
value: T,
arg: Optional[str] = None,
valid_values: Optional[Iterable[T]] = None,
custom_msg: Optional[str] = None,
) -> T:
if not isinstance(value, torch._six.string_classes):
if arg is None:
msg = "Expected type str, but got type {type}."
else:
msg = "Expected type str for argument {arg}, but got type {type}."
msg = msg.format(type=type(value), arg=arg)
raise ValueError(msg)
if valid_values is None:
return value
if value not in valid_values:
if custom_msg is not None:
msg = custom_msg
else:
msg = "Unknown value '{value}' for argument {arg}. Valid values are {{{valid_values}}}."
msg = msg.format(value=value, arg=arg, valid_values=iterable_to_str(valid_values))
raise ValueError(msg)
return value
def _read_pfm(file_name: str, slice_channels: int = 2) -> np.ndarray:
"""Read file in .pfm format. Might contain either 1 or 3 channels of data.
Args:
file_name (str): Path to the file.
slice_channels (int): Number of channels to slice out of the file.
Useful for reading different data formats stored in .pfm files: Optical Flows, Stereo Disparity Maps, etc.
"""
with open(file_name, "rb") as f:
header = f.readline().rstrip()
if header not in [b"PF", b"Pf"]:
raise ValueError("Invalid PFM file")
dim_match = re.match(rb"^(\d+)\s(\d+)\s$", f.readline())
if not dim_match:
raise Exception("Malformed PFM header.")
w, h = (int(dim) for dim in dim_match.groups())
scale = float(f.readline().rstrip())
if scale < 0: # little-endian
endian = "<"
scale = -scale
else:
endian = ">" # big-endian
data = np.fromfile(f, dtype=endian + "f")
pfm_channels = 3 if header == b"PF" else 1
data = data.reshape(h, w, pfm_channels).transpose(2, 0, 1)
data = np.flip(data, axis=1) # flip on h dimension
data = data[:slice_channels, :, :]
return data.astype(np.float32)

View File

@ -0,0 +1,104 @@
import os
from typing import Any, Callable, List, Optional, Tuple
import torch
import torch.utils.data as data
from ..utils import _log_api_usage_once
class VisionDataset(data.Dataset):
"""
Base Class For making datasets which are compatible with torchvision.
It is necessary to override the ``__getitem__`` and ``__len__`` method.
Args:
root (string): Root directory of dataset.
transforms (callable, optional): A function/transforms that takes in
an image and a label and returns the transformed versions of both.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
.. note::
:attr:`transforms` and the combination of :attr:`transform` and :attr:`target_transform` are mutually exclusive.
"""
_repr_indent = 4
def __init__(
self,
root: str,
transforms: Optional[Callable] = None,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
) -> None:
self.root = root
has_transforms = transforms is not None
has_separate_transform = transform is not None or target_transform is not None
if has_transforms and has_separate_transform:
raise ValueError("Only transforms or transform/target_transform can be passed as argument")
# for backwards-compatibility
self.transform = transform
self.target_transform = target_transform
if has_separate_transform:
transforms = StandardTransform(transform, target_transform)
self.transforms = transforms
def __getitem__(self, index: int) -> Any:
"""
Args:
index (int): Index
Returns:
(Any): Sample and meta data, optionally transformed by the respective transforms.
"""
raise NotImplementedError
def __len__(self) -> int:
raise NotImplementedError
def __repr__(self) -> str:
head = "Dataset " + self.__class__.__name__
body = [f"Number of datapoints: {self.__len__()}"]
if self.root is not None:
body.append(f"Root location: {self.root}")
body += self.extra_repr().splitlines()
if hasattr(self, "transforms") and self.transforms is not None:
body += [repr(self.transforms)]
lines = [head] + [" " * self._repr_indent + line for line in body]
return "\n".join(lines)
def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
def extra_repr(self) -> str:
return ""
class StandardTransform:
def __init__(self, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None) -> None:
self.transform = transform
self.target_transform = target_transform
def __call__(self, input: Any, target: Any) -> Tuple[Any, Any]:
if self.transform is not None:
input = self.transform(input)
if self.target_transform is not None:
target = self.target_transform(target)
return input, target
def _format_transform_repr(self, transform: Callable, head: str) -> List[str]:
lines = transform.__repr__().splitlines()
return [f"{head}{lines[0]}"] + ["{}{}".format(" " * len(head), line) for line in lines[1:]]
def __repr__(self) -> str:
body = [self.__class__.__name__]
if self.transform is not None:
body += self._format_transform_repr(self.transform, "Transform: ")
if self.target_transform is not None:
body += self._format_transform_repr(self.target_transform, "Target transform: ")
return "\n".join(body)

View File

@ -0,0 +1 @@
from jittor.transform import *

View File

@ -0,0 +1,582 @@
import collections
import math
import pathlib
import warnings
from itertools import repeat
from types import FunctionType
from typing import Any, BinaryIO, List, Optional, Tuple, Union
import numpy as np
import torch
from PIL import Image, ImageColor, ImageDraw, ImageFont
__all__ = [
"make_grid",
"save_image",
"draw_bounding_boxes",
"draw_segmentation_masks",
"draw_keypoints",
"flow_to_image",
]
@torch.no_grad()
def make_grid(
tensor: Union[torch.Tensor, List[torch.Tensor]],
nrow: int = 8,
padding: int = 2,
normalize: bool = False,
value_range: Optional[Tuple[int, int]] = None,
scale_each: bool = False,
pad_value: float = 0.0,
**kwargs,
) -> torch.Tensor:
"""
Make a grid of images.
Args:
tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
or a list of images all of the same size.
nrow (int, optional): Number of images displayed in each row of the grid.
The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
padding (int, optional): amount of padding. Default: ``2``.
normalize (bool, optional): If True, shift the image to the range (0, 1),
by the min and max values specified by ``value_range``. Default: ``False``.
value_range (tuple, optional): tuple (min, max) where min and max are numbers,
then these numbers are used to normalize the image. By default, min and max
are computed from the tensor.
range (tuple. optional):
.. warning::
This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``value_range``
instead.
scale_each (bool, optional): If ``True``, scale each image in the batch of
images separately rather than the (min, max) over all images. Default: ``False``.
pad_value (float, optional): Value for the padded pixels. Default: ``0``.
Returns:
grid (Tensor): the tensor containing grid of images.
"""
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(make_grid)
if not torch.is_tensor(tensor):
if isinstance(tensor, list):
for t in tensor:
if not torch.is_tensor(t):
raise TypeError(f"tensor or list of tensors expected, got a list containing {type(t)}")
else:
raise TypeError(f"tensor or list of tensors expected, got {type(tensor)}")
if "range" in kwargs.keys():
warnings.warn(
"The parameter 'range' is deprecated since 0.12 and will be removed in 0.14. "
"Please use 'value_range' instead."
)
value_range = kwargs["range"]
# if list of tensors, convert to a 4D mini-batch Tensor
if isinstance(tensor, list):
tensor = torch.stack(tensor, dim=0)
if tensor.dim() == 2: # single image H x W
tensor = tensor.unsqueeze(0)
if tensor.dim() == 3: # single image
if tensor.size(0) == 1: # if single-channel, convert to 3-channel
tensor = torch.cat((tensor, tensor, tensor), 0)
tensor = tensor.unsqueeze(0)
if tensor.dim() == 4 and tensor.size(1) == 1: # single-channel images
tensor = torch.cat((tensor, tensor, tensor), 1)
if normalize is True:
tensor = tensor.clone() # avoid modifying tensor in-place
if value_range is not None and not isinstance(value_range, tuple):
raise TypeError("value_range has to be a tuple (min, max) if specified. min and max are numbers")
def norm_ip(img, low, high):
img.clamp_(min=low, max=high)
img.sub_(low).div_(max(high - low, 1e-5))
def norm_range(t, value_range):
if value_range is not None:
norm_ip(t, value_range[0], value_range[1])
else:
norm_ip(t, float(t.min()), float(t.max()))
if scale_each is True:
for t in tensor: # loop over mini-batch dimension
norm_range(t, value_range)
else:
norm_range(tensor, value_range)
if not isinstance(tensor, torch.Tensor):
raise TypeError("tensor should be of type torch.Tensor")
if tensor.size(0) == 1:
return tensor.squeeze(0)
# make the mini-batch of images into a grid
nmaps = tensor.size(0)
xmaps = min(nrow, nmaps)
ymaps = int(math.ceil(float(nmaps) / xmaps))
height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding)
num_channels = tensor.size(1)
grid = tensor.new_full((num_channels, height * ymaps + padding, width * xmaps + padding), pad_value)
k = 0
for y in range(ymaps):
for x in range(xmaps):
if k >= nmaps:
break
# Tensor.copy_() is a valid method but seems to be missing from the stubs
# https://pytorch.org/docs/stable/tensors.html#torch.Tensor.copy_
grid.narrow(1, y * height + padding, height - padding).narrow( # type: ignore[attr-defined]
2, x * width + padding, width - padding
).copy_(tensor[k])
k = k + 1
return grid
@torch.no_grad()
def save_image(
tensor: Union[torch.Tensor, List[torch.Tensor]],
fp: Union[str, pathlib.Path, BinaryIO],
format: Optional[str] = None,
**kwargs,
) -> None:
"""
Save a given Tensor into an image file.
Args:
tensor (Tensor or list): Image to be saved. If given a mini-batch tensor,
saves the tensor as a grid of images by calling ``make_grid``.
fp (string or file object): A filename or a file object
format(Optional): If omitted, the format to use is determined from the filename extension.
If a file object was used instead of a filename, this parameter should always be used.
**kwargs: Other arguments are documented in ``make_grid``.
"""
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(save_image)
grid = make_grid(tensor, **kwargs)
# Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
im = Image.fromarray(ndarr)
im.save(fp, format=format)
@torch.no_grad()
def draw_bounding_boxes(
image: torch.Tensor,
boxes: torch.Tensor,
labels: Optional[List[str]] = None,
colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
fill: Optional[bool] = False,
width: int = 1,
font: Optional[str] = None,
font_size: Optional[int] = None,
) -> torch.Tensor:
"""
Draws bounding boxes on given image.
The values of the input image should be uint8 between 0 and 255.
If fill is True, Resulting Tensor should be saved as PNG image.
Args:
image (Tensor): Tensor of shape (C x H x W) and dtype uint8.
boxes (Tensor): Tensor of size (N, 4) containing bounding boxes in (xmin, ymin, xmax, ymax) format. Note that
the boxes are absolute coordinates with respect to the image. In other words: `0 <= xmin < xmax < W` and
`0 <= ymin < ymax < H`.
labels (List[str]): List containing the labels of bounding boxes.
colors (color or list of colors, optional): List containing the colors
of the boxes or single color for all boxes. The color can be represented as
PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
By default, random colors are generated for boxes.
fill (bool): If `True` fills the bounding box with specified color.
width (int): Width of bounding box.
font (str): A filename containing a TrueType font. If the file is not found in this filename, the loader may
also search in other directories, such as the `fonts/` directory on Windows or `/Library/Fonts/`,
`/System/Library/Fonts/` and `~/Library/Fonts/` on macOS.
font_size (int): The requested font size in points.
Returns:
img (Tensor[C, H, W]): Image Tensor of dtype uint8 with bounding boxes plotted.
"""
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(draw_bounding_boxes)
if not isinstance(image, torch.Tensor):
raise TypeError(f"Tensor expected, got {type(image)}")
elif image.dtype != torch.uint8:
raise ValueError(f"Tensor uint8 expected, got {image.dtype}")
elif image.dim() != 3:
raise ValueError("Pass individual images, not batches")
elif image.size(0) not in {1, 3}:
raise ValueError("Only grayscale and RGB images are supported")
elif (boxes[:, 0] > boxes[:, 2]).any() or (boxes[:, 1] > boxes[:, 3]).any():
raise ValueError(
"Boxes need to be in (xmin, ymin, xmax, ymax) format. Use torchvision.ops.box_convert to convert them"
)
num_boxes = boxes.shape[0]
if num_boxes == 0:
warnings.warn("boxes doesn't contain any box. No box was drawn")
return image
if labels is None:
labels: Union[List[str], List[None]] = [None] * num_boxes # type: ignore[no-redef]
elif len(labels) != num_boxes:
raise ValueError(
f"Number of boxes ({num_boxes}) and labels ({len(labels)}) mismatch. Please specify labels for each box."
)
if colors is None:
colors = _generate_color_palette(num_boxes)
elif isinstance(colors, list):
if len(colors) < num_boxes:
raise ValueError(f"Number of colors ({len(colors)}) is less than number of boxes ({num_boxes}). ")
else: # colors specifies a single color for all boxes
colors = [colors] * num_boxes
colors = [(ImageColor.getrgb(color) if isinstance(color, str) else color) for color in colors]
if font is None:
if font_size is not None:
warnings.warn("Argument 'font_size' will be ignored since 'font' is not set.")
txt_font = ImageFont.load_default()
else:
txt_font = ImageFont.truetype(font=font, size=font_size or 10)
# Handle Grayscale images
if image.size(0) == 1:
image = torch.tile(image, (3, 1, 1))
ndarr = image.permute(1, 2, 0).cpu().numpy()
img_to_draw = Image.fromarray(ndarr)
img_boxes = boxes.to(torch.int64).tolist()
if fill:
draw = ImageDraw.Draw(img_to_draw, "RGBA")
else:
draw = ImageDraw.Draw(img_to_draw)
for bbox, color, label in zip(img_boxes, colors, labels): # type: ignore[arg-type]
if fill:
fill_color = color + (100,)
draw.rectangle(bbox, width=width, outline=color, fill=fill_color)
else:
draw.rectangle(bbox, width=width, outline=color)
if label is not None:
margin = width + 1
draw.text((bbox[0] + margin, bbox[1] + margin), label, fill=color, font=txt_font)
return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8)
@torch.no_grad()
def draw_segmentation_masks(
image: torch.Tensor,
masks: torch.Tensor,
alpha: float = 0.8,
colors: Optional[Union[List[Union[str, Tuple[int, int, int]]], str, Tuple[int, int, int]]] = None,
) -> torch.Tensor:
"""
Draws segmentation masks on given RGB image.
The values of the input image should be uint8 between 0 and 255.
Args:
image (Tensor): Tensor of shape (3, H, W) and dtype uint8.
masks (Tensor): Tensor of shape (num_masks, H, W) or (H, W) and dtype bool.
alpha (float): Float number between 0 and 1 denoting the transparency of the masks.
0 means full transparency, 1 means no transparency.
colors (color or list of colors, optional): List containing the colors
of the masks or single color for all masks. The color can be represented as
PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
By default, random colors are generated for each mask.
Returns:
img (Tensor[C, H, W]): Image Tensor, with segmentation masks drawn on top.
"""
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(draw_segmentation_masks)
if not isinstance(image, torch.Tensor):
raise TypeError(f"The image must be a tensor, got {type(image)}")
elif image.dtype != torch.uint8:
raise ValueError(f"The image dtype must be uint8, got {image.dtype}")
elif image.dim() != 3:
raise ValueError("Pass individual images, not batches")
elif image.size()[0] != 3:
raise ValueError("Pass an RGB image. Other Image formats are not supported")
if masks.ndim == 2:
masks = masks[None, :, :]
if masks.ndim != 3:
raise ValueError("masks must be of shape (H, W) or (batch_size, H, W)")
if masks.dtype != torch.bool:
raise ValueError(f"The masks must be of dtype bool. Got {masks.dtype}")
if masks.shape[-2:] != image.shape[-2:]:
raise ValueError("The image and the masks must have the same height and width")
num_masks = masks.size()[0]
if colors is not None and num_masks > len(colors):
raise ValueError(f"There are more masks ({num_masks}) than colors ({len(colors)})")
if num_masks == 0:
warnings.warn("masks doesn't contain any mask. No mask was drawn")
return image
if colors is None:
colors = _generate_color_palette(num_masks)
if not isinstance(colors, list):
colors = [colors]
if not isinstance(colors[0], (tuple, str)):
raise ValueError("colors must be a tuple or a string, or a list thereof")
if isinstance(colors[0], tuple) and len(colors[0]) != 3:
raise ValueError("It seems that you passed a tuple of colors instead of a list of colors")
out_dtype = torch.uint8
colors_ = []
for color in colors:
if isinstance(color, str):
color = ImageColor.getrgb(color)
colors_.append(torch.tensor(color, dtype=out_dtype))
img_to_draw = image.detach().clone()
# TODO: There might be a way to vectorize this
for mask, color in zip(masks, colors_):
img_to_draw[:, mask] = color[:, None]
out = image * (1 - alpha) + img_to_draw * alpha
return out.to(out_dtype)
@torch.no_grad()
def draw_keypoints(
image: torch.Tensor,
keypoints: torch.Tensor,
connectivity: Optional[List[Tuple[int, int]]] = None,
colors: Optional[Union[str, Tuple[int, int, int]]] = None,
radius: int = 2,
width: int = 3,
) -> torch.Tensor:
"""
Draws Keypoints on given RGB image.
The values of the input image should be uint8 between 0 and 255.
Args:
image (Tensor): Tensor of shape (3, H, W) and dtype uint8.
keypoints (Tensor): Tensor of shape (num_instances, K, 2) the K keypoints location for each of the N instances,
in the format [x, y].
connectivity (List[Tuple[int, int]]]): A List of tuple where,
each tuple contains pair of keypoints to be connected.
colors (str, Tuple): The color can be represented as
PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
radius (int): Integer denoting radius of keypoint.
width (int): Integer denoting width of line connecting keypoints.
Returns:
img (Tensor[C, H, W]): Image Tensor of dtype uint8 with keypoints drawn.
"""
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(draw_keypoints)
if not isinstance(image, torch.Tensor):
raise TypeError(f"The image must be a tensor, got {type(image)}")
elif image.dtype != torch.uint8:
raise ValueError(f"The image dtype must be uint8, got {image.dtype}")
elif image.dim() != 3:
raise ValueError("Pass individual images, not batches")
elif image.size()[0] != 3:
raise ValueError("Pass an RGB image. Other Image formats are not supported")
if keypoints.ndim != 3:
raise ValueError("keypoints must be of shape (num_instances, K, 2)")
ndarr = image.permute(1, 2, 0).cpu().numpy()
img_to_draw = Image.fromarray(ndarr)
draw = ImageDraw.Draw(img_to_draw)
img_kpts = keypoints.to(torch.int64).tolist()
for kpt_id, kpt_inst in enumerate(img_kpts):
for inst_id, kpt in enumerate(kpt_inst):
x1 = kpt[0] - radius
x2 = kpt[0] + radius
y1 = kpt[1] - radius
y2 = kpt[1] + radius
draw.ellipse([x1, y1, x2, y2], fill=colors, outline=None, width=0)
if connectivity:
for connection in connectivity:
start_pt_x = kpt_inst[connection[0]][0]
start_pt_y = kpt_inst[connection[0]][1]
end_pt_x = kpt_inst[connection[1]][0]
end_pt_y = kpt_inst[connection[1]][1]
draw.line(
((start_pt_x, start_pt_y), (end_pt_x, end_pt_y)),
width=width,
)
return torch.from_numpy(np.array(img_to_draw)).permute(2, 0, 1).to(dtype=torch.uint8)
# Flow visualization code adapted from https://github.com/tomrunia/OpticalFlow_Visualization
@torch.no_grad()
def flow_to_image(flow: torch.Tensor) -> torch.Tensor:
"""
Converts a flow to an RGB image.
Args:
flow (Tensor): Flow of shape (N, 2, H, W) or (2, H, W) and dtype torch.float.
Returns:
img (Tensor): Image Tensor of dtype uint8 where each color corresponds
to a given flow direction. Shape is (N, 3, H, W) or (3, H, W) depending on the input.
"""
if flow.dtype != torch.float:
raise ValueError(f"Flow should be of dtype torch.float, got {flow.dtype}.")
orig_shape = flow.shape
if flow.ndim == 3:
flow = flow[None] # Add batch dim
if flow.ndim != 4 or flow.shape[1] != 2:
raise ValueError(f"Input flow should have shape (2, H, W) or (N, 2, H, W), got {orig_shape}.")
max_norm = torch.sum(flow**2, dim=1).sqrt().max()
epsilon = torch.finfo((flow).dtype).eps
normalized_flow = flow / (max_norm + epsilon)
img = _normalized_flow_to_image(normalized_flow)
if len(orig_shape) == 3:
img = img[0] # Remove batch dim
return img
@torch.no_grad()
def _normalized_flow_to_image(normalized_flow: torch.Tensor) -> torch.Tensor:
"""
Converts a batch of normalized flow to an RGB image.
Args:
normalized_flow (torch.Tensor): Normalized flow tensor of shape (N, 2, H, W)
Returns:
img (Tensor(N, 3, H, W)): Flow visualization image of dtype uint8.
"""
N, _, H, W = normalized_flow.shape
device = normalized_flow.device
flow_image = torch.zeros((N, 3, H, W), dtype=torch.uint8, device=device)
colorwheel = _make_colorwheel().to(device) # shape [55x3]
num_cols = colorwheel.shape[0]
norm = torch.sum(normalized_flow**2, dim=1).sqrt()
a = torch.atan2(-normalized_flow[:, 1, :, :], -normalized_flow[:, 0, :, :]) / torch.pi
fk = (a + 1) / 2 * (num_cols - 1)
k0 = torch.floor(fk).to(torch.long)
k1 = k0 + 1
k1[k1 == num_cols] = 0
f = fk - k0
for c in range(colorwheel.shape[1]):
tmp = colorwheel[:, c]
col0 = tmp[k0] / 255.0
col1 = tmp[k1] / 255.0
col = (1 - f) * col0 + f * col1
col = 1 - norm * (1 - col)
flow_image[:, c, :, :] = torch.floor(255 * col)
return flow_image
def _make_colorwheel() -> torch.Tensor:
"""
Generates a color wheel for optical flow visualization as presented in:
Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007)
URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf.
Returns:
colorwheel (Tensor[55, 3]): Colorwheel Tensor.
"""
RY = 15
YG = 6
GC = 4
CB = 11
BM = 13
MR = 6
ncols = RY + YG + GC + CB + BM + MR
colorwheel = torch.zeros((ncols, 3))
col = 0
# RY
colorwheel[0:RY, 0] = 255
colorwheel[0:RY, 1] = torch.floor(255 * torch.arange(0, RY) / RY)
col = col + RY
# YG
colorwheel[col : col + YG, 0] = 255 - torch.floor(255 * torch.arange(0, YG) / YG)
colorwheel[col : col + YG, 1] = 255
col = col + YG
# GC
colorwheel[col : col + GC, 1] = 255
colorwheel[col : col + GC, 2] = torch.floor(255 * torch.arange(0, GC) / GC)
col = col + GC
# CB
colorwheel[col : col + CB, 1] = 255 - torch.floor(255 * torch.arange(CB) / CB)
colorwheel[col : col + CB, 2] = 255
col = col + CB
# BM
colorwheel[col : col + BM, 2] = 255
colorwheel[col : col + BM, 0] = torch.floor(255 * torch.arange(0, BM) / BM)
col = col + BM
# MR
colorwheel[col : col + MR, 2] = 255 - torch.floor(255 * torch.arange(MR) / MR)
colorwheel[col : col + MR, 0] = 255
return colorwheel
def _generate_color_palette(num_objects: int):
palette = torch.tensor([2**25 - 1, 2**15 - 1, 2**21 - 1])
return [tuple((i * palette) % 255) for i in range(num_objects)]
def _log_api_usage_once(obj: Any) -> None:
"""
Logs API usage(module and name) within an organization.
In a large ecosystem, it's often useful to track the PyTorch and
TorchVision APIs usage. This API provides the similar functionality to the
logging module in the Python stdlib. It can be used for debugging purpose
to log which methods are used and by default it is inactive, unless the user
manually subscribes a logger via the `SetAPIUsageLogger method <https://github.com/pytorch/pytorch/blob/eb3b9fe719b21fae13c7a7cf3253f970290a573e/c10/util/Logging.cpp#L114>`_.
Please note it is triggered only once for the same API call within a process.
It does not collect any data from open-source users since it is no-op by default.
For more information, please refer to
* PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging;
* Logging policy: https://github.com/pytorch/vision/issues/5052;
Args:
obj (class instance or method): an object to extract info from.
"""
pass
def _make_ntuple(x: Any, n: int) -> Tuple[Any, ...]:
"""
Make n-tuple from input x. If x is an iterable, then we just convert it to tuple.
Otherwise we will make a tuple of length n, all with value of x.
reference: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/utils.py#L8
Args:
x (Any): input value
n (int): length of the resulting tuple
"""
if isinstance(x, collections.abc.Iterable):
return tuple(x)
return tuple(repeat(x, n))

View File

@ -0,0 +1,715 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers: Dun Liang <randonlang@gmail.com>.
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import os, sys, shutil
import platform
from .compiler import *
from jittor_utils import run_cmd, get_version, get_int_version
from jittor_utils.misc import download_url_to_local
import jittor_utils as jit_utils
def search_file(dirs, name, prefer_version=()):
if os.name == 'nt':
if name.startswith("lib"):
name = name[3:].replace(".so", "64*.dll")
for d in dirs:
fname = os.path.join(d, name)
if os.name == 'nt':
lname = os.path.join(d, name)
names = glob.glob(lname)
if len(names):
return names[0]
continue
prefer_version = tuple( str(p) for p in prefer_version )
for i in range(len(prefer_version),-1,-1):
vname = ".".join((fname,)+prefer_version[:i])
if os.path.isfile(vname):
LOG.v(f"found {vname}")
return vname
LOG.f(f"file {name} not found in {dirs}")
def install_mkl(root_folder):
# origin url is
# url = "https://github.com/intel/mkl-dnn/releases/download/v1.0.2/mkldnn_lnx_1.0.2_cpu_gomp.tgz"
import platform
url = None
if platform.system()=="Linux":
if platform.machine()=='x86_64':
filename = "dnnl_lnx_2.2.0_cpu_gomp.tgz"
md5 = "35bbbdf550a9d8ad54db798e372000f6"
elif platform.machine()=='aarch64':
filename = "dnnl_lnx_2.2.0_cpu_gomp_aarch64.tgz"
md5 = "72cf9b0b8fd6c3c786d35a9daaee22b8"
else:
raise RuntimeError(f"platform.machine()=={platform.machine()} not support yet,"
" Please contact us on https://github.com/jittor/jittor ")
elif os.name == "nt":
# url = "https://github.com/oneapi-src/oneDNN/releases/download/v2.2/dnnl_win_2.2.0_cpu_iomp.zip"
# url = "https://github.com/oneapi-src/oneDNN/releases/download/v2.2/dnnl_win_2.2.0_cpu_vcomp.zip"
filename = "dnnl_win_2.2.0_cpu_vcomp.zip"
md5 = "fa12c693b2ec07700d174e1e99d60a7e"
elif platform.system() == "Darwin":
if platform.machine() == "arm64":
filename = "dnnl_mac_2.2.0_cpu_omp_arm64.tgz"
md5 = "d8fdf56d3cf618685d22d18f08119f88"
else:
filename = "dnnl_mac_2.2.0_cpu_omp_x86_64.tgz"
md5 = "6e2f065d6a589c82081536b684768fe6"
else:
raise RuntimeError(f"platform.machine()=={platform.machine()} not support yet,"
" Please contact us on https://github.com/jittor/jittor ")
if not url:
url = "https://cg.cs.tsinghua.edu.cn/jittor/assets/" + filename
fullname = os.path.join(root_folder, filename)
dirname = os.path.join(root_folder, filename.rsplit(".",1)[0])
if not (os.path.isfile(os.path.join(dirname, "lib", "libmkldnn.so")) or
os.path.isfile(os.path.join(dirname, "bin", "dnnl.dll")) or
os.path.isfile(os.path.join(dirname, "lib", "libmkldnn.dylib"))):
LOG.i("Downloading mkl...")
download_url_to_local(url, filename, root_folder, md5)
if fullname.endswith(".zip"):
import zipfile
with zipfile.ZipFile(fullname, "r") as f:
f.extractall(root_folder)
else:
import tarfile
with tarfile.open(fullname, "r") as tar:
tar.extractall(root_folder)
if os.name == 'nt':
# this env is used for execute example/text
bin_path = os.path.join(dirname, "bin")
sys.path.append(bin_path)
os.environ["PATH"] = os.environ.get("PATH", "") + ";" + bin_path
cmd = f"cd /d {dirname}/examples && {cc_path} {dirname}/examples/cnn_inference_f32.cpp -I{dirname}/include -Fe: {dirname}/examples/test.exe {fix_cl_flags(cc_flags).replace('-LD', '')} {dirname}/lib/mkldnn.lib"
assert 0 == os.system(cmd)
assert 0 == os.system(f"{dirname}/examples/test")
elif platform.system() == "Darwin":
assert 0 == os.system(f"cd {dirname}/examples && "
f"{cc_path} -std=c++14 cnn_inference_f32.cpp -Ofast -lmkldnn -I ../include -L ../lib -o test && DYLD_LIBRARY_PATH=../lib/ ./test")
else:
assert 0 == os.system(f"cd {dirname}/examples && "
f"{cc_path} -std=c++14 cnn_inference_f32.cpp -Ofast -lmkldnn -I ../include -L ../lib -o test && LD_LIBRARY_PATH=../lib/ ./test")
def setup_mkl():
global mkl_ops, use_mkl
use_mkl = os.environ.get("use_mkl", "1")=="1"
mkl_ops = None
if not use_mkl: return
# pytorch mkl is conflict with jittor mkl
# yield error "free: invalide size" or
# "mmap error"
# import pytorch(>1.8) first can fix this problem
# try:
# # jt.dirty_fix_pytorch_runtime_error()
# import torch
# from torch import nn
# except:
# torch = None
mkl_include_path = os.environ.get("mkl_include_path")
mkl_lib_path = os.environ.get("mkl_lib_path")
if mkl_lib_path is None or mkl_include_path is None:
LOG.v("setup mkl...")
# mkl_path = os.path.join(cache_path, "mkl")
# mkl_path decouple with cc_path
mkl_path = os.path.join(jit_utils.home(), ".cache", "jittor", "mkl")
make_cache_dir(mkl_path)
install_mkl(mkl_path)
mkl_home = ""
for name in os.listdir(mkl_path):
if name.startswith("dnnl") and os.path.isdir(os.path.join(mkl_path, name)):
mkl_home = os.path.join(mkl_path, name)
break
assert mkl_home!=""
mkl_include_path = os.path.join(mkl_home, "include")
mkl_lib_path = os.path.join(mkl_home, "lib")
mkl_lib_name = os.path.join(mkl_lib_path, "libmkldnn.so")
extra_flags = f" -I\"{mkl_include_path}\" -L\"{mkl_lib_path}\" -lmkldnn "
if os.name == 'nt':
mkl_lib_name = os.path.join(mkl_home, 'bin', 'dnnl.dll')
mkl_bin_path = os.path.join(mkl_home, 'bin')
extra_flags = f" -I\"{mkl_include_path}\" -L\"{mkl_lib_path}\" -L\"{mkl_bin_path}\" -ldnnl "
elif platform.system() == "Darwin":
mkl_lib_name = os.path.join(mkl_lib_path, "libmkldnn.dylib")
assert os.path.isdir(mkl_include_path)
assert os.path.isdir(mkl_lib_path)
assert os.path.isfile(mkl_lib_name)
LOG.v(f"mkl_include_path: {mkl_include_path}")
LOG.v(f"mkl_lib_path: {mkl_lib_path}")
LOG.v(f"mkl_lib_name: {mkl_lib_name}")
# We do not link manualy, link in custom ops
# ctypes.CDLL(mkl_lib_name, dlopen_flags)
mkl_op_dir = os.path.join(jittor_path, "extern", "mkl", "ops")
mkl_op_files = [os.path.join(mkl_op_dir, name) for name in os.listdir(mkl_op_dir)]
mkl_ops = compile_custom_ops(mkl_op_files, extra_flags=extra_flags)
LOG.vv("Get mkl_ops: "+str(dir(mkl_ops)))
def install_cub(root_folder):
url = "https://github.com/NVIDIA/cub/archive/1.11.0.tar.gz"
url = "https://codeload.github.com/NVIDIA/cub/tar.gz/1.11.0"
filename = "cub-1.11.0.tgz"
md5 = "97196a885598e40592100e1caaf3d5ea"
fullname = os.path.join(root_folder, filename)
dirname = os.path.join(root_folder, filename.replace(".tgz",""))
if not os.path.isfile(os.path.join(dirname, "examples", "device/example_device_radix_sort.cu")):
LOG.i("Downloading cub...")
download_url_to_local(url, filename, root_folder, md5)
import tarfile
with tarfile.open(fullname, "r") as tar:
tar.extractall(root_folder)
# assert 0 == os.system(f"cd {dirname}/examples && "
# f"{nvcc_path} --cudart=shared -ccbin=\"{cc_path}\" device/example_device_radix_sort.cu -O2 -I.. -std=c++14 -o test")
# if core.get_device_count():
# assert 0 == os.system(f"cd {dirname}/examples && ./test")
return dirname
def setup_cub():
global cub_home
cub_home = ""
cub_path = os.path.join(jit_utils.home(), ".cache", "jittor", "cub")
cuda_version = int(get_version(nvcc_path)[1:-1].split('.')[0])
extra_flags = ""
if cuda_version < 11:
cub_home = install_cub(cub_path)
extra_flags = f"-I{cub_home}"
cub_home += "/"
setup_cuda_lib("cub", link=False, extra_flags=extra_flags)
def setup_cuda_extern():
if not has_cuda: return
def split(a): return a.replace(";",":").split(":")
check_ld_path = split(os.environ.get("LD_LIBRARY_PATH", "")) + \
split(os.environ.get("PATH", ""))
for cp in check_ld_path:
cp = cp.lower()
if "cuda" in cp and \
"lib" in cp and \
"jtcuda" not in cp:
LOG.w(f"CUDA related path found in LD_LIBRARY_PATH or PATH, "
"This path may cause jittor found the wrong libs, "
"please unset LD_LIBRARY_PATH and remove cuda lib path in Path. \n"
"Or you can let jittor install cuda for you: `python3.x -m jittor_utils.install_cuda`")
break
LOG.vv("setup cuda extern...")
cache_path_cuda = os.path.join(cache_path, "cuda")
cuda_include = os.path.join(jittor_path, "extern", "cuda", "inc")
make_cache_dir(cache_path_cuda)
cuda_extern_src = os.path.join(jittor_path, "extern", "cuda", "src")
cuda_extern_files = [os.path.join(cuda_extern_src, name)
for name in os.listdir(cuda_extern_src)]
so_name = os.path.join(cache_path_cuda, "libcuda_extern"+so)
compile(cc_path, cc_flags+f" -I\"{cuda_include}\" ", cuda_extern_files, so_name)
link_cuda_extern = f" -L\"{cache_path_cuda}\" -llibcuda_extern "
ctypes.CDLL(so_name, dlopen_flags)
try:
setup_cub()
except Exception as e:
import traceback
line = traceback.format_exc()
LOG.w(f"CUDA found but cub is not loaded:\n{line}")
libs = ["cublas", "cudnn", "curand", "cufft"]
# in cuda 11.4, module memory comsumptions:
# default context: 259 MB
# cublas: 340 MB
# cudnn: 340 MB
if int(os.environ.get("conv_opt", "0")):
libs = ["cublas", "curand"]
for lib_name in libs:
try:
setup_cuda_lib(lib_name, extra_flags=link_cuda_extern)
except Exception as e:
msg = f"CUDA found but {lib_name} is not loaded:\n"
if lib_name == "cudnn":
msg += """Develop version of CUDNN not found,
please refer to CUDA offical tar file installation:
https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-tar"""
if platform.machine() in ["x86_64", "AMD64"]:
msg += f"""
or you can let jittor install cuda and cudnn for you:
>>> python3.{sys.version_info.minor} -m jittor_utils.install_cuda
"""
LOG.f(msg)
def setup_cuda_lib(lib_name, link=True, extra_flags=""):
arch_key = "x86_64"
if platform.machine() not in ["x86_64", "AMD64"]:
arch_key = "aarch64"
globals()[lib_name+"_ops"] = None
globals()[lib_name] = None
if not has_cuda: return
LOG.v(f"setup {lib_name}...")
culib_path = os.path.join(cuda_lib, f"lib{lib_name}.so")
jt_cuda_include = os.path.join(jittor_path, "extern", "cuda", "inc")
jt_culib_include = os.path.join(jittor_path, "extern", "cuda", lib_name, "inc")
link_flags = ""
if link:
extra_include_path = os.path.abspath(os.path.join(cuda_include, "..", f"targets/{arch_key}-linux/include"))
extra_lib_path = os.path.abspath(os.path.join(cuda_lib, "..", f"targets/{arch_key}-linux/lib"))
cuda_include_name = search_file([cuda_include, extra_include_path, "/usr/include"], lib_name+".h")
# cuda11 prefer cudnn 8
nvcc_version = get_int_version(nvcc_path)
if has_corex:
nvcc_version = (10,2,89)
prefer_version = ()
if nvcc_version[0] == 11:
prefer_version = ("8",)
culib_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], f"lib{lib_name}.so", prefer_version)
if lib_name == "cublas" and nvcc_version[0] >= 10:
# manual link libcublasLt.so
try:
cublas_lt_lib_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], f"libcublasLt.so", nvcc_version)
ctypes.CDLL(cublas_lt_lib_path, dlopen_flags)
except:
# some aarch64 os, such as uos with FT2000 cpu,
# it's cuda 10 doesn't have libcublasLt.so
pass
if lib_name == "cudnn":
# cudnn cannot found libcudnn_cnn_train.so.8, we manual link for it.
if nvcc_version >= (11,0,0):
libs = ["libcudnn_ops_infer.so", "libcudnn_ops_train.so", "libcudnn_cnn_infer.so", "libcudnn_cnn_train.so"]
for l in libs:
ex_cudnn_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], l, prefer_version)
ctypes.CDLL(ex_cudnn_path, dlopen_flags)
# dynamic link cuda library
# ctypes.CDLL(culib_path, dlopen_flags)
# link_flags = f"-l{lib_name} -L\"{cuda_lib}\""
link_flags = f"-l{lib_name} -L\"{os.path.dirname(culib_path)}\""
# print("link_flags", link_flags, culib_path)
# find all source files
culib_src_dir = os.path.join(jittor_path, "extern", "cuda", lib_name)
culib_src_files = []
for r, _, f in os.walk(culib_src_dir):
for fname in f:
culib_src_files.append(os.path.join(r, fname))
if len(culib_src_files) == 0:
return
# compile and get operators
culib = compile_custom_ops(culib_src_files, return_module=True,
extra_flags=f" -I\"{jt_cuda_include}\" -I\"{jt_culib_include}\" {link_flags} {extra_flags} ")
culib_ops = culib.ops
globals()[lib_name+"_ops"] = culib_ops
globals()[lib_name] = culib
LOG.vv(f"Get {lib_name}_ops: "+str(dir(culib_ops)))
def _setup_fake_cuda_lib(lib_name=None, link=True, extra_flags=""):
if lib_name is None:
lib_names = ["cudnn", "cublas", "curand", "cufft", "cub", "cutt", "cutlass"]
for lib_name in lib_names:
_setup_fake_cuda_lib(lib_name, link, extra_flags)
return
arch_key = "x86_64"
if platform.machine() not in ["x86_64", "AMD64"]:
arch_key = "aarch64"
globals()[lib_name+"_ops"] = None
globals()[lib_name] = None
LOG.v(f"setup {lib_name}...")
jt_cuda_include = os.path.join(jittor_path, "extern", "cuda", "inc")
jt_culib_include = os.path.join(jittor_path, "extern", "cuda", lib_name, "inc")
# find all source files
culib_src_dir = os.path.join(jittor_path, "extern", "cuda", lib_name, "ops")
culib_src_files = []
for r, _, f in os.walk(culib_src_dir):
for fname in f:
if fname.endswith("op.cc") or fname.endswith("op.h"):
culib_src_files.append(os.path.join(r, fname))
if len(culib_src_files) == 0:
return
# compile and get operators
culib = compile_custom_ops(culib_src_files, return_module=True,
extra_flags=f" -I\"{jt_cuda_include}\" -I\"{jt_culib_include}\" {extra_flags} ")
culib_ops = culib.ops
globals()[lib_name+"_ops"] = culib_ops
globals()[lib_name] = culib
LOG.vv(f"Get {lib_name}_ops: "+str(dir(culib_ops)))
if setup_fake_cuda_lib:
_setup_fake_cuda_lib()
def install_cutt(root_folder):
# Modified from: https://github.com/ap-hynninen/cutt
url = "https://codeload.github.com/Jittor/cutt/zip/v1.2"
filename = "cutt-1.2.zip"
fullname = os.path.join(root_folder, filename)
dirname = os.path.join(root_folder, filename.replace(".zip",""))
true_md5 = "14d0fd1132c8cd657dc3cf29ce4db931"
if os.path.exists(fullname):
from jittor_utils.misc import calculate_md5
md5 = calculate_md5(fullname)
if md5 != true_md5:
os.remove(fullname)
shutil.rmtree(dirname)
CUTT_PATH = os.environ.get("CUTT_PATH", "")
if not os.path.isfile(os.path.join(cache_path, "libcutt"+so)) or CUTT_PATH:
if CUTT_PATH:
dirname = CUTT_PATH
else:
LOG.i("Downloading cutt...")
download_url_to_local(url, filename, root_folder, true_md5)
import zipfile
zf = zipfile.ZipFile(fullname)
try:
zf.extractall(path=root_folder)
except RuntimeError as e:
print(e)
raise
zf.close()
LOG.i("installing cutt...")
# -Xptxas -dlcm=ca actually not work
arch_flag = " -Xptxas -dlcm=ca "
if len(flags.cuda_archs):
arch_flag = f" -arch=compute_{min(flags.cuda_archs)} "
arch_flag += ''.join(map(lambda x:f' -code=sm_{x} ', flags.cuda_archs))
cutt_include = f" -I\"{dirname}/include\" -I\"{dirname}/src\" "
files = glob.glob(dirname+"/src/*.c*", recursive=True)
files2 = []
for f in files:
if f.endswith("cutt_bench.cpp") or \
f.endswith("cutt_test.cpp"):
continue
files2.append(f)
cutt_flags = cc_flags+opt_flags+cutt_include
compile(cc_path, cutt_flags, files2, cache_path+"/libcutt"+so, cuda_flags=arch_flag)
return dirname
def setup_cutt():
global cutt_ops, use_cutt
if not has_cuda:
use_cutt = False
return
use_cutt = os.environ.get("use_cutt", "1")=="1"
cutt_ops = None
if not use_cutt: return
cutt_include_path = os.environ.get("cutt_include_path")
cutt_lib_path = os.environ.get("cutt_lib_path")
if cutt_lib_path is None or cutt_include_path is None:
LOG.v("setup cutt...")
# cutt_path decouple with cc_path
cutt_path = os.path.join(jit_utils.home(), ".cache", "jittor", "cutt")
make_cache_dir(cutt_path)
install_cutt(cutt_path)
cutt_home = os.path.join(cutt_path, "cutt-1.2")
cutt_include_path = os.path.join(cutt_home, "src")
cutt_lib_path = cache_path
cutt_lib_name = os.path.join(cutt_lib_path, "libcutt"+so)
assert os.path.isdir(cutt_include_path)
assert os.path.isdir(cutt_lib_path)
assert os.path.isfile(cutt_lib_name), cutt_lib_name
LOG.v(f"cutt_include_path: {cutt_include_path}")
LOG.v(f"cutt_lib_path: {cutt_lib_path}")
LOG.v(f"cutt_lib_name: {cutt_lib_name}")
# We do not link manualy, link in custom ops
ctypes.CDLL(cutt_lib_name, dlopen_flags)
cutt_op_dir = os.path.join(jittor_path, "extern", "cuda", "cutt", "ops")
cutt_op_files = [os.path.join(cutt_op_dir, name) for name in os.listdir(cutt_op_dir)]
cutt_ops = compile_custom_ops(cutt_op_files,
extra_flags=f" -I\"{cutt_include_path}\" -L\"{cutt_lib_path}\" -llibcutt ")
LOG.vv("Get cutt_ops: "+str(dir(cutt_ops)))
def install_cutlass(root_folder):
# Modified from: https://github.com/ap-hynninen/cutlass
url = "https://cloud.tsinghua.edu.cn/f/171e49e5825549548bc4/?dl=1"
filename = "cutlass.zip"
fullname = os.path.join(root_folder, filename)
dirname = os.path.join(root_folder, filename.replace(".zip",""))
true_md5 = "999ecb7e217e40c497bc3d0ded6643f0"
if os.path.exists(fullname):
from jittor_utils.misc import calculate_md5
md5 = calculate_md5(fullname)
if md5 != true_md5:
os.remove(fullname)
shutil.rmtree(dirname)
CUTLASS_PATH = os.environ.get("CUTLASS_PATH", "")
if not os.path.isfile(os.path.join(jit_utils.home(), ".cache/jittor/cutlass/cutlass/include/cutlass/cutlass.h")) or CUTLASS_PATH:
if CUTLASS_PATH:
dirname = CUTLASS_PATH
else:
LOG.i("Downloading cutlass...")
download_url_to_local(url, filename, root_folder, true_md5)
import zipfile
zf = zipfile.ZipFile(fullname)
try:
zf.extractall(path=root_folder)
except RuntimeError as e:
print(e)
raise
zf.close()
# LOG.i("installing cutlass...")
# # -Xptxas -dlcm=ca actually not work
# arch_flag = " -Xptxas -dlcm=ca "
# if len(flags.cuda_archs):
# arch_flag = f" -arch=compute_{min(flags.cuda_archs)} "
# arch_flag += ''.join(map(lambda x:f' -code=sm_{x} ', flags.cuda_archs))
# cutlass_include = f" -I\"{dirname}/include\" -I\"{dirname}/src\" "
# files = glob.glob(dirname+"/src/*.c*", recursive=True)
# files2 = []
# for f in files:
# if f.endswith("cutlass_bench.cpp") or \
# f.endswith("cutlass_test.cpp"):
# continue
# files2.append(f)
# cutlass_flags = cc_flags+opt_flags+cutlass_include
# compile(cc_path, cutlass_flags, files2, cache_path+"/libcutlass"+so, cuda_flags=arch_flag)
return dirname
def setup_cutlass():
global cutlass_ops, use_cutlass
if not has_cuda:
use_cutlass = False
return
use_cutlass = os.environ.get("use_cutlass", "1")=="1"
cutlass_ops = None
if not use_cutlass: return
cutlass_include_path = os.environ.get("cutlass_include_path")
if cutlass_include_path is None:
LOG.v("setup cutlass...")
# cutlass_path decouple with cc_path
cutlass_path = os.path.join(jit_utils.home(), ".cache", "jittor", "cutlass")
make_cache_dir(cutlass_path)
install_cutlass(cutlass_path)
def install_nccl(root_folder):
url = "https://github.com/NVIDIA/nccl/archive/v2.8.4-1.tar.gz"
url = "https://codeload.github.com/NVIDIA/nccl/tar.gz/v2.8.4-1"
filename = "nccl.tgz"
fullname = os.path.join(root_folder, filename)
dirname = os.path.join(root_folder, "nccl-2.8.4-1")
true_md5 = "900666558c5bc43e0a5e84045b88a06f"
if os.path.exists(fullname):
md5 = run_cmd('md5sum '+fullname).split()[0]
if md5 != true_md5:
os.remove(fullname)
if os.path.isdir(dirname):
shutil.rmtree(dirname)
if not os.path.isfile(os.path.join(dirname, "build", "lib", "libnccl.so")):
if not os.path.isfile(os.path.join(root_folder, filename)):
LOG.i("Downloading nccl...")
download_url_to_local(url, filename, root_folder, true_md5)
if core.get_device_count() == 0:
return
if not inside_mpi():
return
import tarfile
with tarfile.open(fullname, "r") as tar:
tar.extractall(root_folder)
LOG.i("installing nccl...")
arch_flag = ""
if len(flags.cuda_archs):
arch_flag = f" -arch=compute_{min(flags.cuda_archs)} "
arch_flag += ''.join(map(lambda x:f' -code=sm_{x} ', flags.cuda_archs))
run_cmd(f"CC=\"{cc_path}\" CXX=\"{cc_path}\" make -j8 src.build CUDA_HOME='{cuda_home}' NVCC_GENCODE='{arch_flag} --cudart=shared ' ", cwd=dirname)
return dirname
def setup_nccl():
global nccl, nccl_ops, use_nccl
use_nccl = os.environ.get("use_nccl", "1")=="1"
nccl = None
nccl_ops = None
if not has_cuda or not has_mpi:
use_nccl = False
return
if not use_nccl: return
nccl_include_path = os.environ.get("nccl_include_path")
nccl_lib_path = os.environ.get("nccl_lib_path")
if nccl_lib_path is None or nccl_include_path is None:
LOG.v("setup nccl...")
# nccl_path decouple with cc_path
nccl_path = os.path.join(jit_utils.home(), ".cache", "jittor", "nccl")
make_cache_dir(nccl_path)
nccl_home = install_nccl(nccl_path)
if nccl_home is None: return
nccl_include_path = os.path.join(nccl_home, "build", "include")
nccl_lib_path = os.path.join(nccl_home, "build", "lib")
if not inside_mpi():
return
nccl_lib_name = os.path.join(nccl_lib_path, "libnccl.so")
assert os.path.isdir(nccl_include_path)
assert os.path.isdir(nccl_lib_path)
assert os.path.isfile(nccl_lib_name), nccl_lib_name
LOG.v(f"nccl_include_path: {nccl_include_path}")
LOG.v(f"nccl_lib_path: {nccl_lib_path}")
LOG.v(f"nccl_lib_name: {nccl_lib_name}")
# We do not link manualy, link in custom ops
ctypes.CDLL(nccl_lib_name, dlopen_flags)
nccl_src_dir = os.path.join(jittor_path, "extern", "cuda", "nccl")
nccl_src_files = []
for r, _, f in os.walk(nccl_src_dir):
for fname in f:
nccl_src_files.append(os.path.join(r, fname))
nccl = compile_custom_ops(nccl_src_files,
extra_flags=f" -I\"{nccl_include_path}\" {mpi_compile_flags} ",
return_module=True, dlopen_flags=os.RTLD_GLOBAL | os.RTLD_NOW,
gen_name_="jittor_nccl_core")
nccl_ops = nccl.ops
LOG.vv("Get nccl_ops: "+str(dir(nccl_ops)))
def manual_link(flags):
lib_dirs = []
libs = []
for f in flags.split():
if f.startswith("-l"):
libs.append(f[2:])
elif f.startswith("-L"):
lib_dirs.append(f[2:])
LOG.v("manual_link:", flags)
LOG.v("lib_dirs:", lib_dirs)
LOG.v("libs:", libs)
for lib in libs:
for d in lib_dirs:
libname = os.path.join(d, f"lib{lib}.so")
if os.path.isfile(libname):
LOG.v("link:", libname)
ctypes.CDLL(libname, dlopen_flags)
break
def inside_mpi():
return "OMPI_COMM_WORLD_SIZE" in os.environ
def setup_mpi():
global mpi_ops, mpi, use_mpi
global mpicc_path, has_mpi
use_mpi = os.environ.get("use_mpi", "1")=="1"
mpi_ops = None
mpi = None
has_mpi = False
if not use_mpi: return
mpicc_path = env_or_try_find('mpicc_path', 'mpicc')
if mpicc_path == "":
# LOG.i("mpicc not found, distribution disabled.")
use_mpi = False
else:
use_mpi = True
has_mpi = True
if not use_mpi:
return
global mpi_compile_flags, mpi_link_flags, mpi_flags
mpi_compile_flags = run_cmd(mpicc_path+" --showme:compile")
mpi_link_flags = run_cmd(mpicc_path+" --showme:link")
mpi_flags = mpi_compile_flags + " " + mpi_link_flags
LOG.v("mpi_flags: "+mpi_flags)
# find all source files
mpi_src_dir = os.path.join(jittor_path, "extern", "mpi")
mpi_src_files = []
for r, _, f in os.walk(mpi_src_dir):
for fname in f:
mpi_src_files.append(os.path.join(r, fname))
# mpi compile flags add for nccl
mpi_compile_flags += f" -I\"{os.path.join(mpi_src_dir, 'inc')}\" "
mpi_compile_flags = mpi_compile_flags.replace("-pthread", "")
mpi_version = get_version(mpicc_path)
if mpi_version.startswith("(1.") or mpi_version.startswith("(2."):
# mpi version 1.x need to link like this
manual_link(mpi_flags)
# mpi(4.x) cannot use deepbind, it need to
# share the 'environ' symbol.
mpi = compile_custom_ops(mpi_src_files,
extra_flags=f" {mpi_flags} ", return_module=True,
dlopen_flags=os.RTLD_GLOBAL | os.RTLD_NOW, gen_name_="jittor_mpi_core")
mpi_ops = mpi.ops
LOG.vv("Get mpi: "+str(mpi.__dict__.keys()))
LOG.vv("Get mpi_ops: "+str(mpi_ops.__dict__.keys()))
def wrapper(func):
def inner(self, *args, **kw):
return func(self, *args, **kw)
inner.__doc__ = func.__doc__
return inner
for k in mpi_ops.__dict__:
if not k.startswith("mpi_"): continue
if k == "mpi_test": continue
setattr(core.Var, k, wrapper(mpi_ops.__dict__[k]))
in_mpi = inside_mpi()
FIX_TORCH_ERROR = 0
if os.name != 'nt' and not in_mpi:
FIX_TORCH_ERROR = 1
if "FIX_TORCH_ERROR" in os.environ:
FIX_TORCH_ERROR = os.environ["FIX_TORCH_ERROR"] != "0"
if FIX_TORCH_ERROR:
try:
import torch
from jittor_utils import dirty_fix_pytorch_runtime_error
dirty_fix_pytorch_runtime_error()
except:
pass
cudnn = cublas = curand = cufft = None
setup_mpi()
rank = mpi.world_rank() if in_mpi else 0
world_size = mpi.world_size() if in_mpi else 1
setup_nccl()
setup_cutt()
setup_cutlass()
# try:
setup_mkl()
# except Exception as e:
# LOG.w("MKL install failed, msg:", e)
setup_cuda_extern()
# install backend extern library
for mod in jit_utils.backends:
if mod.install_extern():
break

1431
python/jittor/compiler.py Normal file

File diff suppressed because it is too large Load Diff

274
python/jittor/contrib.py Normal file
View File

@ -0,0 +1,274 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Guowei Yang <471184555@qq.com>
# Guoye Yang <498731903@qq.com>
# Dun Liang <randonlang@gmail.com>.
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
import numpy as np
from jittor import pool
from collections.abc import Sequence
def argmax_pool(x, size, stride, padding=0):
if stride<=0:
raise RuntimeError(f"stride must be > 0, but got {stride}")
return pool.pool(x, size, 'maximum', padding, stride)
def concat(arr, dim):
'''Concat Operator can concat a list of jt Var at a specfic dimension.
* [in] x: input var list for concat
* [in] dim: concat which dim
* [out] out: concat result
Example::
>>> jt.concat([jt.array([[1],[2]]), jt.array([[2],[2]])], dim=1)
jt.Var([[1 2]
[2 2]], dtype=int32)
'''
# TODO: low performance when concat lots of vars
total_dim = 0
if dim < 0: dim += len(arr[0].shape)
for a in arr:
total_dim += a.shape[dim]
cdim = 0
s = None
indexes = [ f"i{i}" for i in range(len(a.shape)) ]
for a in arr:
shape = list(a.shape)
shape[dim] = total_dim
indexes[dim] = f"i{dim}-{cdim}"
b = a.reindex(shape, indexes)
# ugly fix for preventing large fused op
if len(arr)>=100:
b.stop_fuse()
if s is None:
s = b
else:
s += b
cdim += a.shape[dim]
return s
def check(bc):
bc = np.array(bc)
if ((bc != 1) * (bc != bc.max(0))).sum() > 0:
raise Exception(f"Shape not match.")
else:
return bc.max(0)
def slice_var_index(x, slices):
if not isinstance(slices, tuple):
slices = (slices,)
if isinstance(slices[0], jt.Var):
if len(slices) == 1 and slices[0].dtype == "bool":
return slice_var_index(x, tuple(slices[0].where()))
bc = []
ml = -1
for idx, s in enumerate(slices):
if isinstance(s, jt.Var):
shape = s.shape
elif isinstance(s, np.ndarray):
shape = list(s.shape)
elif isinstance(s, list):
shape = list(np.array(s).shape)
else:
continue
if len(shape) >= ml:
ml = len(shape)
bc.append(shape)
for idx, shape in enumerate(bc):
if len(shape) < ml:
shape = (ml - len(shape)) * [1] + shape
bc[idx] = shape
if len(bc) >= 1:
bc_shape = check(bc)
ss = []
for idx, s in enumerate(slices):
if isinstance(s, np.ndarray) or isinstance(s, list):
ss.append(jt.array(s).broadcast(bc_shape.tolist()))
elif isinstance(s, jt.Var):
ss.append(s.broadcast(bc_shape.tolist()))
else:
ss.append(s)
slices = ss
out_shape = []
out_index = []
shape = x.shape
cnt_list = 0
extras_idx = []
extras = []
has_ellipse = 0
ellipse_index = 0
for s,i in zip(slices,range(len(slices))):
if isinstance(s,type(...)):
has_ellipse+=1
ellipse_index = i
if has_ellipse>1:
raise Exception(f"There are more than one ...")
elif has_ellipse==1:
slices = list(slices)
del slices[ellipse_index]
while len(slices)<len(shape):
slices.insert(ellipse_index,slice(None))
for i in range(len(shape)):
if i>=len(slices):
s = slice(None)
else:
s = slices[i]
sp = shape[i]
j = len(out_shape)
if isinstance(s, int):
if s<0: s += sp
out_index.append(str(s))
elif isinstance(s, slice):
if s == slice(None):
out_shape.append(sp)
out_index.append(f"i{j}")
continue
start = 0 if s.start is None else s.start
stop = sp if s.stop is None else s.stop
step = 1 if s.step is None else s.step
if start<0: start += sp
if stop<0: stop += sp
if stop>sp+1: stop = sp
out_shape.append(1+int(max(0, (stop-start-1)//step)))
out_index.append(f"{start}+i{j}*{step}")
elif isinstance(s, jt.Var):
if cnt_list == 0:
for idx in range(len(bc_shape)):
extras_idx.append(f"i{len(out_shape) + idx}")
out_shape += bc_shape.tolist()
out_index.append(f"@e{cnt_list}("+ ",".join(extras_idx) + ")")
cnt_list += 1
extras.append(s)
else:
raise Exception(f"Not support slice {s}")
if len(out_shape)==0:
out_shape = [1]
# Stop fuse both input and output, prevent recompile
x.stop_fuse()
return (out_shape, out_index, 0, [], extras)
def _slice_var_old(x, slices):
reindex_args = slice_var_index(x, slices)
x.stop_fuse()
return x.reindex(*reindex_args).stop_fuse()
def _setitem_old(x, slices, value):
reindex_args = slice_var_index(x, slices)
reindex_reduce_args = (x.shape, reindex_args[1]) + reindex_args[3:]
xslice = x.stop_fuse().reindex(*reindex_args).stop_fuse()
value = jt.broadcast(value, xslice)
value = value.cast(x.dtype)
one = jt.broadcast(1, xslice)
if not isinstance(reindex_args[0][0], jt.Var):
reindex_args = (x.shape,) + reindex_args[1:]
mask = one.reindex_reduce("add", *reindex_reduce_args)
data = value.reindex_reduce("add", *reindex_reduce_args)
# Stop fuse both input and output, prevent recompile
out = mask.ternary(data, x).stop_fuse()
x.assign(out)
return x
# PATCH
def getitem(x, slices):
if isinstance(slices, jt.Var) and slices.dtype == "bool":
return getitem(x, slices.where())
if isinstance(slices, tuple):
ss = []
for s in slices:
if isinstance(s, jt.Var) and s.dtype == "bool":
ss.extend(s.where())
else:
ss.append(s)
slices = tuple(ss)
return x.getitem(slices)
def setitem(x, slices, value):
if isinstance(slices, jt.Var) and slices.dtype == "bool":
if slices.shape == x.shape:
if isinstance(value, (int, float)):
value = jt.array(value).broadcast(x.shape)
return x.assign(slices.ternary(value, x))
elif isinstance(value, jt.Var) and value.shape == [1,]:
value = jt.broadcast(value, x.shape)
return x.assign(slices.ternary(value, x))
slices = slices.where()
elif isinstance(slices, tuple):
ss = []
for s in slices:
if isinstance(s, jt.Var) and s.dtype == "bool":
ss.extend(s.where())
else:
ss.append(s)
slices = tuple(ss)
return x.check_cascade_setitem(x.setitem(slices, value))
jt.Var.__getitem__ = jt.Var.slice_var = getitem
jt.Var.__setitem__ = setitem
def _merge_dtypes(dtypes):
dtype = dtypes[0]
for i in range(1, len(dtypes)):
dtype = jt.binary_dtype_infer("add", dtype, dtypes[i])
return dtype
@jt.flag_scope(amp_reg=4) # _custom_flag
def concat(arr, dim=0):
'''Concat Operator can concat a list of jt Var at a specfic dimension.
* [in] x: input var list for concat
* [in] dim: concat which dim
* return: concat result
Example::
jt.concat([jt.array([[1],[2]]), jt.array([[2],[2]])], dim=1)
# return jt.Var([[1,2],[2,2]],dtype=int32)
'''
if not isinstance(arr, Sequence):
raise TypeError("concat arr needs to be a tuple or list")
if len(arr) == 0:
raise ValueError("need at least one array to concat")
total_dim = 0
base_dim = len(arr[0].shape)
if dim < 0: dim += base_dim
if dim < 0 or dim >= base_dim:
raise IndexError(f"Dimension out of range (expected to be in range of [{-base_dim}, {base_dim-1}], but got {dim})")
dtypes = []
for a in arr:
if len(a.shape) != base_dim:
raise RuntimeError(f"get different number of dimensions of {base_dim} and {len(a.shape)}")
for i in range(base_dim):
if i != dim and a.shape[i] != arr[0].shape[i]:
raise RuntimeError(f"Sizes of vars must match except in dimension {dim}. Expected size {arr[0].shape[i]} but got size {a.shape[i]} for dimension number {i} in the list.")
total_dim += a.shape[dim]
dtypes.append(str(a.dtype))
cdim = 0
shape = list(a.shape)
shape[dim] = total_dim
s = jt.empty(shape, dtype = _merge_dtypes(dtypes))
slices = [slice(None)]*len(a.shape)
for a in arr:
if a.shape[dim] == 0:
continue
slices[dim] = slice(cdim, cdim+a.shape[dim])
# print(slices, type(a))
s = s.setitem(tuple(slices), a)
# s = jt.setitem(s, tuple(slices), a)
cdim += a.shape[dim]
return s
cat = concat

View File

@ -0,0 +1,6 @@
from .dataset import Dataset, ImageFolder, dataset_root, TensorDataset, VarDataset, DataLoader
from .mnist import MNIST
from .cifar import CIFAR10, CIFAR100
from .voc import VOC
from .sampler import *

View File

@ -0,0 +1,189 @@
import os
from jittor_utils.misc import download_and_extract_archive, check_integrity
from PIL import Image
import sys, pickle
import numpy as np
from jittor.dataset import Dataset, dataset_root
class CIFAR10(Dataset):
"""`CIFAR10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
Args:
root (string): Root directory of dataset where directory
``cifar-10-batches-py`` exists or will be saved to if download is set to True.
train (bool, optional): If True, creates dataset from training set, otherwise
creates from test set.
transform (callable, optional): A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
Example::
from jittor.dataset.cifar import CIFAR10
a = CIFAR10()
a.set_attrs(batch_size=16)
for imgs, labels in a:
print(imgs.shape, labels.shape)
break
"""
base_folder = 'cifar-10-batches-py'
url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
filename = "cifar-10-python.tar.gz"
tgz_md5 = 'c58f30108f718f92721af3b95e74349a'
train_list = [
['data_batch_1', 'c99cafc152244af753f735de768cd75f'],
['data_batch_2', 'd4bba439e000b95fd0a9bffe97cbabec'],
['data_batch_3', '54ebc095f3ab1f0389bbae665268c751'],
['data_batch_4', '634d18415352ddfa80567beed471001a'],
['data_batch_5', '482c414d41f54cd18b22e5b47cb7c3cb'],
]
test_list = [
['test_batch', '40351d587109b95175f43aff81a1287e'],
]
meta = {
'filename': 'batches.meta',
'key': 'label_names',
'md5': '5ff9c542aee3614f3951f8cda6e48888',
}
def __init__(self, root=dataset_root+"/cifar_data/", train=True, transform=None, target_transform=None,
download=True):
super(CIFAR10, self).__init__()
self.root = root
self.transform=transform
self.target_transform=target_transform
self.train = train # training set or test set
if download:
self.download()
if not self._check_integrity():
raise RuntimeError('Dataset not found or corrupted.' +
' You can use download=True to download it')
if self.train:
downloaded_list = self.train_list
else:
downloaded_list = self.test_list
self.data = []
self.targets = []
# now load the picked numpy arrays
for file_name, checksum in downloaded_list:
file_path = os.path.join(self.root, self.base_folder, file_name)
with open(file_path, 'rb') as f:
if sys.version_info[0] == 2:
entry = pickle.load(f)
else:
entry = pickle.load(f, encoding='latin1')
self.data.append(entry['data'])
if 'labels' in entry:
self.targets.extend(entry['labels'])
else:
self.targets.extend(entry['fine_labels'])
self.data = np.vstack(self.data).reshape(-1, 3, 32, 32)
self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC
self._load_meta()
def _load_meta(self):
path = os.path.join(self.root, self.base_folder, self.meta['filename'])
if not check_integrity(path, self.meta['md5']):
raise RuntimeError('Dataset metadata file not found or corrupted.' +
' You can use download=True to download it')
with open(path, 'rb') as infile:
if sys.version_info[0] == 2:
data = pickle.load(infile)
else:
data = pickle.load(infile, encoding='latin1')
self.classes = data[self.meta['key']]
self.class_to_idx = {_class: i for i, _class in enumerate(self.classes)}
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (image, target) where target is index of the target class.
"""
img, target = self.data[index], self.targets[index]
# doing this so that it is consistent with all other datasets
# to return a PIL Image
img = Image.fromarray(img)
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
def __len__(self):
return len(self.data)
def _check_integrity(self):
root = self.root
for fentry in (self.train_list + self.test_list):
filename, md5 = fentry[0], fentry[1]
fpath = os.path.join(root, self.base_folder, filename)
if not check_integrity(fpath, md5):
return False
return True
def download(self):
if self._check_integrity():
print('Files already downloaded and verified')
return
download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
def extra_repr(self):
return "Split: {}".format("Train" if self.train is True else "Test")
class CIFAR100(CIFAR10):
"""`CIFAR100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_ Dataset.
This is a subclass of the `CIFAR10` Dataset.
Example::
from jittor.dataset.cifar import CIFAR100
a = CIFAR100()
a.set_attrs(batch_size=16)
for imgs, labels in a:
print(imgs.shape, labels.shape)
break
"""
base_folder = 'cifar-100-python'
url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
filename = "cifar-100-python.tar.gz"
tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'
train_list = [
['train', '16019d7e3df5f24257cddd939b257f8d'],
]
test_list = [
['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],
]
meta = {
'filename': 'meta',
'key': 'fine_label_names',
'md5': '7973b15100ade9c7d40fb424638fde48',
}

View File

@ -0,0 +1,728 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Meng-Hao Guo <guomenghao1997@gmail.com>
# Dun Liang <randonlang@gmail.com>.
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import numpy as np
from urllib import request
import gzip
import pickle
import os
from jittor.dataset.utils import get_random_list, get_order_list, collate_batch, HookTimer
from collections.abc import Sequence, Mapping
import pathlib
from PIL import Image
import multiprocessing as mp
import signal
from jittor_utils import LOG
import jittor as jt
import time
import jittor_utils as jit_utils
dataset_root = os.path.join(jit_utils.home(), ".cache", "jittor", "dataset")
mp_log_v = os.environ.get("mp_log_v", 0)
mpi = jt.mpi
img_open_hook = HookTimer(Image, "open")
CHECK_MEMORY = int(os.environ.get("CHECK_MEMORY", "0"))
if os.name == "nt":
from multiprocessing import shared_memory
class RingBuffer:
def __init__(self, size, shm=None):
for i in range(100):
if (1<<i) >= size: break
size = 1<<i
init = False
if shm is None:
init = True
shm = shared_memory.SharedMemory(create=True, size=size+1024)
rb = jt.core.RingBuffer(size, id(shm.buf), init)
self.size = size
self.shm = shm
self.rb = rb
def __reduce__(self):
return (RingBuffer, (self.size, self.shm))
def __del__(self):
del self.rb
del self.shm
def push(self, obj): self.send(obj)
def pop(self): return self.recv()
def send(self, obj): self.rb.push(obj)
def recv(self): return self.rb.pop()
def clear(self): return self.rb.clear()
def stop(self): return self.rb.stop()
def is_stop(self): return self.rb.is_stop()
def total_pop(self): return self.rb.total_pop()
def total_push(self): return self.rb.total_push()
def __repr__(self): return repr(self.rb)
def keep_numpy_array(self, keep): self.rb.keep_numpy_array(keep)
jt.RingBuffer = RingBuffer
class Worker:
def __init__(self, target, args, buffer_size, keep_numpy_array=False):
self.buffer = jt.RingBuffer(buffer_size)
self.buffer.keep_numpy_array(keep_numpy_array)
self.status = mp.Array('f', 5, lock=False)
self.p = mp.Process(target=target, args=args+(self.buffer,self.status))
self.p.daemon = True
self.p.start()
class Dataset(object):
'''
Base class for reading data.
Args::
[in] batch_size(int): batch size, default 16.
[in] shuffle(bool): shuffle at each epoch, default False.
[in] drop_last(bool): if true, the last batch of dataset might smaller than batch_size, default True.
[in] num_workers(int): number of workers for loading data.
[in] buffer_size(int): buffer size for each worker in bytes, default(512MB).
[in] keep_numpy_array(bool): return numpy array rather than jittor array, default(False).
[in] endless(bool): will this dataset yield data forever, default(False).
Example::
class YourDataset(Dataset):
def __init__(self):
super().__init__()
self.set_attrs(total_len=1024)
def __getitem__(self, k):
return k, k*k
dataset = YourDataset().set_attrs(batch_size=256, shuffle=True)
for x, y in dataset:
......
'''
def __init__(self,
batch_size = 16,
shuffle = False,
drop_last = False,
num_workers = 0,
buffer_size = 512*1024*1024,
stop_grad = True,
keep_numpy_array = False,
endless = False):
super().__init__()
if os.environ.get("DISABLE_MULTIPROCESSING", '0') == '1':
num_workers = 0
self.total_len = None
self.batch_size = batch_size
self.shuffle = shuffle
self.drop_last = drop_last
self.num_workers = num_workers
self.buffer_size = buffer_size
self.stop_grad = stop_grad
self.keep_numpy_array = keep_numpy_array
self.endless = endless
self.epoch_id = 0
self.sampler = None
self._disable_workers = False
self._shuffle_rng = np.random.default_rng(1)
self.dataset = self
def __getitem__(self, index):
raise NotImplementedError
def __batch_len__(self):
assert self.total_len >= 0
assert self.batch_size > 0
if self.drop_last:
return self.total_len // self.batch_size
return (self.total_len-1) // self.batch_size + 1
def __len__(self):
return self.__batch_len__()
def set_attrs(self, **kw):
'''
You can set attributes of dataset by using set_attrs function, including total_len, batch_size, shuffle, drop_last, num_workers, buffer_size.
Example::
dataset = YourDataset().set_attrs(batch_size=256, shuffle=True)
Attrs:
* batch_size(int): batch size, default 16.
* total_len(int): total lenght.
* shuffle(bool): shuffle at each epoch, default False.
* drop_last(bool): if true, the last batch of dataset might smaller than batch_size, default True.
* num_workers: number of workers for loading data
* buffer_size: buffer size for each worker in bytes, default(512MB).
* stop_grad: stop grad for data, default(True).
'''
for k,v in kw.items():
assert hasattr(self, k), k
setattr(self, k, v)
self.reset()
return self
def to_jittor(self, batch):
'''
Change batch data to jittor array, such as np.ndarray, int, and float.
'''
if self.keep_numpy_array: return batch
if isinstance(batch, jt.Var): return batch
to_jt = lambda x: jt.array(x).stop_grad() \
if self.stop_grad else jt.array(x)
if isinstance(batch, np.ndarray):
return to_jt(batch)
if isinstance(batch, dict):
new_batch = {}
for k,v in batch.items():
new_batch[k] = self.to_jittor(v)
return new_batch
if not isinstance(batch, (list, tuple)):
return batch
new_batch = []
for a in batch:
if isinstance(a, np.ndarray):
new_batch.append(to_jt(a))
else:
new_batch.append(self.to_jittor(a))
return new_batch
def collate_batch(self, batch):
'''
Puts each data field into a tensor with outer dimension batch size.
Args::
[in] batch(list): A list of variables, such as jt.var, Image.Image, np.ndarray, int, float, str and so on.
'''
return collate_batch(batch)
def terminate(self):
'''
Terminate is used to terminate multi-process worker reading data.
'''
if hasattr(self, "workers"):
for w in self.workers:
w.p.terminate()
def _worker_main(self, worker_id, buffer, status):
import jittor_utils
jt.flags.use_cuda_host_allocator = 0
jittor_utils.cc.init_subprocess()
jt.jt_init_subprocess()
seed = jt.get_seed()
wseed = (seed ^ (worker_id*1167)) ^ 1234
jt.set_global_seed(wseed)
# parallel_op_compiler still problematic,
# it is not work on ubuntu 16.04. but worked on ubuntu 20.04
# it seems like the static value of parallel compiler
# is not correctly init.
jt.flags.use_parallel_op_compiler = 0
import time
try:
gid_obj = self.gid.get_obj()
gid_lock = self.gid.get_lock()
start = time.time()
while True:
# get id
with gid_lock:
while buffer.is_stop() or self.idqueue.is_stop() or \
gid_obj.value >= self.batch_len:
self.num_idle.value += 1
self.num_idle_c.notify()
self.gidc.wait()
self.num_idle.value -= 1
cid = gid_obj.value
batch_index_list = self.index_list_numpy[
cid*self.real_batch_size:
min(self.real_len, (cid+1)*self.real_batch_size)
].copy()
gid_obj.value += 1
with self.idqueue_lock:
self.idqueue.push(worker_id)
now = time.time()
other_time = now - start
start = now
# load and transform data
batch = []
if mp_log_v:
print(f"#{worker_id} {os.getpid()} load batch", cid*self.real_batch_size, min(self.real_len, (cid+1)*self.real_batch_size))
for i in batch_index_list:
batch.append(self[i])
batch = self.collate_batch(batch)
now = time.time()
data_time = now - start
start = now
# send data to main process
if mp_log_v:
print(f"#{worker_id} {os.getpid()} send", type(batch).__name__, [ type(b).__name__ for b in batch ], buffer)
try:
buffer.send(batch)
except:
if buffer.is_stop():
continue
raise
now = time.time()
send_time = now - start
start = now
status[0], status[1], status[2], status[3], status[4] = \
other_time, data_time, send_time, \
other_time + data_time + send_time, \
img_open_hook.duration
img_open_hook.duration = 0.0
except:
import traceback
line = traceback.format_exc()
print(line)
os.kill(os.getppid(), signal.SIGINT)
exit(0)
def display_worker_status(self):
''' Display dataset worker status, when dataset.num_workers > 0, it will display infomation blow:
.. code-block:: console
progress:479/5005
batch(s): 0.302 wait(s):0.000
recv(s): 0.069 to_jittor(s):0.021
recv_raw_call: 6720.0
last 10 workers: [6, 7, 3, 0, 2, 4, 7, 5, 6, 1]
ID wait(s) load(s) send(s) total
#0 0.000 1.340 2.026 3.366 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#1 0.000 1.451 3.607 5.058 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#2 0.000 1.278 1.235 2.513 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#3 0.000 1.426 1.927 3.353 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#4 0.000 1.452 1.074 2.526 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#5 0.000 1.422 3.204 4.625 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#6 0.000 1.445 1.953 3.398 Buffer(free=0.000% l=462425368 r=462425368 size=536870912)
#7 0.000 1.582 0.507 2.090 Buffer(free=0.000% l=308283552 r=308283552 size=536870912)
Meaning of the outputs:
* progress: dataset loading progress (current/total)
* batch: batch time, exclude data loading time
* wait: time of main proc wait worker proc
* recv: time of recv batch data
* to_jittor: time of batch data to jittor variable
* recv_raw_call: total number of underlying recv_raw called
* last 10 workers: id of last 10 workers which main proc load from.
* table meaning
* ID: worker id
* wait: worker wait time
* open: worker image open time
* load: worker load time
* buffer: ring buffer status, such as how many free space, left index, right index, total size(bytes).
Example::
from jittor.dataset import Dataset
class YourDataset(Dataset):
pass
dataset = YourDataset().set_attrs(num_workers=8)
for x, y in dataset:
dataset.display_worker_status()
'''
if not hasattr(self, "workers"):
return
msg = [""]
msg.append(f"progress:{self.batch_id}/{self.batch_len}")
msg.append(f"batch(s): {self.batch_time:.3f}\twait(s):{self.wait_time:.3f}")
msg.append(f"recv(s): {self.recv_time:.3f}\tto_jittor(s):{self.to_jittor_time:.3f}")
msg.append(f"last 10 workers: {self.last_ids}")
msg.append(f"ID\twait(s)\topen(s)\tload(s)\tsend(s)\ttotal(s)")
for i in range(self.num_workers):
w = self.workers[i]
s = w.status
msg.append(f"#{i}\t{s[0]:.3f}\t{s[4]:.3f}\t{s[1]:.3f}\t{s[2]:.3f}\t{s[3]:.3f}\t{w.buffer}")
LOG.i('\n'.join(msg))
def _stop_all_workers(self):
# stop workers
for w in self.workers:
w.buffer.stop()
self.idqueue.stop()
# wait until all workers idle
if self.num_idle.value < self.num_workers:
with self.gid.get_lock():
self.gid.get_obj().value = self.batch_len
if mp_log_v:
print("idle num", self.num_idle.value)
while self.num_idle.value < self.num_workers:
self.num_idle_c.wait()
if mp_log_v:
print("idle num", self.num_idle.value)
# clean workers' buffer
for w in self.workers:
w.buffer.clear()
self.idqueue.clear()
self.gid.value = 0
def _init_workers(self, index_list):
jt.migrate_all_to_cpu()
jt.clean()
jt.gc()
self.index_list = mp.Array('i', self.real_len, lock=False)
workers = []
# get worker id
self.idqueue = jt.RingBuffer(2048)
self.idqueue_lock = mp.Lock()
# global token index
self.gid = mp.Value('i', self.batch_len)
self.gid.value = 0
# global token index condition
self.gidc = mp.Condition(self.gid.get_lock())
# number of idle workers
self.num_idle = mp.Value('i', 0, lock=False)
# number of idle workers condition
self.num_idle_c = mp.Condition(self.gid.get_lock())
self.index_list_numpy = np.ndarray(dtype='int32', shape=self.real_len, buffer=self.index_list)
self.index_list_numpy[:] = index_list
for i in range(self.num_workers):
w = Worker(target=self._worker_main, args=(i,),
buffer_size=self.buffer_size,
keep_numpy_array=self.keep_numpy_array)
workers.append(w)
self.workers = workers
def reset(self):
if not hasattr(self, "workers"):
return
self._stop_all_workers()
self.terminate()
del self.index_list
del self.idqueue
del self.idqueue_lock
del self.gid
del self.gidc
del self.num_idle
del self.num_idle_c
del self.workers
del self.index_list_numpy
def __del__(self):
if mp_log_v:
print("dataset deleted")
try:
self.terminate()
except:
pass
def __deepcopy__(self, memo=None, _nil=[]):
from copy import deepcopy
if memo is None:
memo = {}
d = id(self)
y = memo.get(d, _nil)
if y is not _nil:
return y
obj = self.__class__.__new__(self.__class__)
memo[d] = id(obj)
exclude_key = {"index_list", "idqueue", "idqueue_lock", "gid", "gidc", "num_idle", "num_idle_c", "workers", "index_list_numpy", "dataset", "idqueue", "idqueue_lock"}
for k,v in self.__dict__.items():
if k in exclude_key: continue
obj.__setattr__(k, deepcopy(v))
obj.dataset = obj
return obj
def __real_len__(self):
if self.total_len is None:
self.total_len = len(self)
return self.total_len
def _get_index_list(self):
if self.total_len is None:
self.total_len = len(self)
# maybe rewrite by sampler
total_len = self.total_len
if self.sampler:
index_list = list(self.sampler.__iter__())
total_len = len(index_list)
# check is not batch sampler
if len(index_list):
assert not isinstance(index_list[0], (list,tuple)), "Batch sampler not support yet."
elif self.shuffle == False:
index_list = get_order_list(self.total_len)
else:
# using _shuffle_rng to generate multiprocess
# consist shuffle list
# index_list = get_random_list(self.total_len)
index_list = self._shuffle_rng.permutation(range(self.total_len))
# scatter index_list for all mpi process
# scatter rule:
# batch 1 batch 2
# [........] [........] ...
# 00011122 00011122
# if last batch is smaller than world_size
# pad to world_size
# last batch
# [.] -> [012]
if jt.in_mpi:
world_size = mpi.world_size()
world_rank = mpi.world_rank()
index_list = np.int32(index_list)
# TODO: mpi broadcast in subprocess has bug, fix it
# mpi.broadcast(index_list, 0)
assert self.batch_size >= world_size, \
f"Batch size({self.batch_size}) is smaller than MPI world_size({world_size})"
real_batch_size = (self.batch_size-1) // world_size + 1
if real_batch_size * world_size != self.batch_size:
LOG.w("Batch size is not divisible by MPI world size, "
"The distributed version may be different from "
"the single-process version.")
fix_batch = total_len // self.batch_size
last_batch = total_len - fix_batch * self.batch_size
fix_batch_l = index_list[0:fix_batch*self.batch_size] \
.reshape(-1,self.batch_size)
fix_batch_l = fix_batch_l[
:,real_batch_size*world_rank:real_batch_size*(world_rank+1)]
real_batch_size = fix_batch_l.shape[1]
fix_batch_l = fix_batch_l.flatten()
if not self.drop_last and last_batch > 0:
last_batch_l = index_list[-last_batch:]
real_last_batch = (last_batch-1)//world_size+1
l = real_last_batch * world_rank
r = l + real_last_batch
if r > last_batch:
r = last_batch
l = r-real_last_batch
index_list = np.concatenate([fix_batch_l, last_batch_l[l:r]])
else:
index_list = fix_batch_l
self.real_len = len(index_list)
self.real_batch_size = real_batch_size
# assert total_len // self.batch_size == \
# self.real_len // self.real_batch_size, f"Number of batches({total_len // self.batch_size}!={self.real_len // self.real_batch_size}) not match, total_len: {total_len}, batch_size: {self.batch_size}, real_len: {self.real_len}, real_batch_size: {self.real_batch_size}"
# print(f"Number of batches({total_len // self.batch_size}!={self.real_len // self.real_batch_size}) not match, total_len: {total_len}, batch_size: {self.batch_size}, real_len: {self.real_len}, real_batch_size: {self.real_batch_size}")
# print("mpi dataset init ")
else:
self.real_len = len(index_list)
self.real_batch_size = self.batch_size
if self.drop_last:
self.batch_len = self.real_len // self.real_batch_size
else:
self.batch_len = (self.real_len-1) // self.real_batch_size + 1
return index_list
def _epochs(self):
if self.endless:
while True:
yield
self.epoch_id += 1
else:
yield
def __iter__(self):
if self._disable_workers:
self.num_workers = 0
index_list = self._get_index_list()
if not hasattr(self, "workers") and self.num_workers:
self._init_workers(index_list)
self.last_ids = [-1] * 10
if self.num_workers:
start = time.time()
self.batch_time = 0
gid_obj = self.gid.get_obj()
gid_lock = self.gid.get_lock()
for _ in self._epochs():
with gid_lock:
if self.num_idle.value:
self.gidc.notify_all()
for i in range(self.batch_len):
if self.num_idle.value:
with gid_lock:
if self.num_idle.value and \
gid_obj.value >= self.batch_len:
index_list = self._get_index_list()
self.index_list_numpy[:] = index_list
gid_obj.value = 0
self.gidc.notify_all()
# get which worker has this batch
worker_id = self.idqueue.pop()
now = time.time()
self.wait_time = now - start
start = now
self.last_ids[i%10] = worker_id
self.batch_id = i
w = self.workers[worker_id]
if mp_log_v:
print(f"#{worker_id} {os.getpid()} recv buffer", w.buffer)
batch = w.buffer.recv()
now = time.time()
self.recv_time = now - start
start = now
if mp_log_v:
print(f"#{worker_id} {os.getpid()} recv", type(batch).__name__, [ type(b).__name__ for b in batch ])
batch = self.to_jittor(batch)
now = time.time()
self.to_jittor_time = now - start
start = now
yield batch
now = time.time()
self.batch_time = now - start
start = now
if CHECK_MEMORY and self.batch_id % CHECK_MEMORY == 0:
jt.display_memory_info()
else:
for _ in self._epochs():
self.batch_id = 0
batch_data = []
for idx in index_list:
batch_data.append(self[int(idx)])
if len(batch_data) == self.real_batch_size:
batch_data = self.collate_batch(batch_data)
tmp = batch_data
batch_data = self.to_jittor(batch_data)
# breakpoint()
yield batch_data
self.batch_id += 1
if CHECK_MEMORY and self.batch_id % CHECK_MEMORY == 0:
jt.display_memory_info()
batch_data = []
# depend on drop_last
if not self.drop_last and len(batch_data) > 0:
batch_data = self.collate_batch(batch_data)
batch_data = self.to_jittor(batch_data)
self.batch_id += 1
yield batch_data
def DataLoader(dataset: Dataset, *args, **kargs):
""" Simple dataloader.
Example::
train_dir = './data/celebA_train'
train_dataset = ImageFolder(train_dir)
dataloader = jt.dataset.DataLoader(train_dataset, batch_size=8)
"""
return dataset.set_attrs(*args, **kargs)
class ImageFolder(Dataset):
"""
A image classify dataset, load image and label from directory::
* root/label1/img1.png
* root/label1/img2.png
* ...
* root/label2/img1.png
* root/label2/img2.png
* ...
Args::
[in] root(string): Root directory path.
Attributes::
* classes(list): List of the class names.
* class_to_idx(dict): map from class_name to class_index.
* imgs(list): List of (image_path, class_index) tuples
Example::
train_dir = './data/celebA_train'
train_loader = ImageFolder(train_dir).set_attrs(batch_size=batch_size, shuffle=True)
for batch_idx, (x_, target) in enumerate(train_loader):
...
"""
def __init__(self, root, transform=None):
super().__init__()
self.root = root
self.transform = transform
self.classes = sorted([d.name for d in os.scandir(root) if d.is_dir()])
self.class_to_idx = {v:k for k,v in enumerate(self.classes)}
self.imgs = []
image_exts = set(('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff'))
for i, class_name in enumerate(self.classes):
class_dir = os.path.join(root, class_name)
for dname, _, fnames in sorted(os.walk(class_dir, followlinks=True)):
for fname in sorted(fnames):
if os.path.splitext(fname)[-1].lower() in image_exts:
path = os.path.join(class_dir, fname)
self.imgs.append((path, i))
LOG.i(f"Found {len(self.classes)} classes and {len(self.imgs)} images.")
self.set_attrs(total_len=len(self.imgs))
def __getitem__(self, k):
with open(self.imgs[k][0], 'rb') as f:
img = Image.open(f).convert('RGB')
if self.transform:
img = self.transform(img)
return img, self.imgs[k][1]
class VarDataset(Dataset):
""" Dataset using Var directly, TensorDataset is alias of VarDataset, Example::
import jittor as jt
from jittor.dataset import VarDataset
x = jt.array([1,2,3])
y = jt.array([4,5,6])
z = jt.array([7,8,9])
dataset = VarDataset(x, y, z)
dataset.set_attrs(batch_size=1)
for a,b,c in dataset:
print(a,b,c)
# will print
# 1,4,7
# 2,5,8
# 3,6,9
"""
def __init__(self, *args):
super().__init__()
self.args = args
self._disable_workers = True
assert len(args), "At lease one args"
l = len(args[0])
for a in args:
assert l == len(a), "Len should be the same"
self.set_attrs(total_len=l)
def __getitem__(self, idx):
return [ a[idx] for a in self.args ]
def collate_batch(self, batch):
b = collate_batch(batch)
for i in range(len(self.args)):
x = b[i]
if jt.is_var(self.args[i]) and self.args[i].ndim == 1:
x.assign(x.squeeze(-1))
return b
TensorDataset = VarDataset

View File

@ -0,0 +1,200 @@
# ***************************************************************
# Copyright(c) 2019
# Meng-Hao Guo <guomenghao1997@gmail.com>
# Dun Liang <randonlang@gmail.com>.
# All Rights Reserved.
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import os
import string
import numpy as np
import gzip
from PIL import Image
# our lib jittor import
from jittor.dataset.dataset import Dataset, dataset_root
from jittor_utils.misc import ensure_dir, download_url_to_local
import jittor as jt
import jittor.transform as trans
class MNIST(Dataset):
'''
Jittor's own class for loading MNIST dataset.
Args::
[in] data_root(str): your data root.
[in] train(bool): choose model train or val.
[in] download(bool): Download data automatically if download is True.
[in] batch_size(int): Data batch size.
[in] shuffle(bool): Shuffle data if true.
[in] transform(jittor.transform): transform data.
Example::
from jittor.dataset.mnist import MNIST
train_loader = MNIST(train=True).set_attrs(batch_size=16, shuffle=True)
for i, (imgs, target) in enumerate(train_loader):
...
'''
def __init__(self, data_root=dataset_root+"/mnist_data/",
train=True,
download=True,
batch_size = 16,
shuffle = False,
transform=None):
# if you want to test resnet etc you should set input_channel = 3, because the net set 3 as the input dimensions
super().__init__()
self.data_root = data_root
self.is_train = train
self.transform = transform
self.batch_size = batch_size
self.shuffle = shuffle
if download == True:
self.download_url()
filesname = [
"train-images-idx3-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-labels-idx1-ubyte.gz"
]
self.mnist = {}
if self.is_train:
with gzip.open(data_root + filesname[0], 'rb') as f:
self.mnist["images"] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28, 28)
with gzip.open(data_root + filesname[2], 'rb') as f:
self.mnist["labels"] = np.frombuffer(f.read(), np.uint8, offset=8)
else:
with gzip.open(data_root + filesname[1], 'rb') as f:
self.mnist["images"] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28, 28)
with gzip.open(data_root + filesname[3], 'rb') as f:
self.mnist["labels"] = np.frombuffer(f.read(), np.uint8, offset=8)
assert(self.mnist["images"].shape[0] == self.mnist["labels"].shape[0])
self.total_len = self.mnist["images"].shape[0]
# this function must be called
self.set_attrs(total_len = self.total_len)
def __getitem__(self, index):
img = Image.fromarray(self.mnist['images'][index]).convert('RGB')
if self.transform:
img = self.transform(img)
return trans.to_tensor(img), self.mnist['labels'][index]
def download_url(self):
'''
Download mnist data set function, this function will be called when download is True.
'''
resources = [
("https://storage.googleapis.com/cvdf-datasets/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://storage.googleapis.com/cvdf-datasets/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
("https://storage.googleapis.com/cvdf-datasets/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
("https://storage.googleapis.com/cvdf-datasets/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
]
for url, md5 in resources:
filename = url.rpartition('/')[2]
download_url_to_local(url, filename, self.data_root, md5)
class EMNIST(Dataset):
'''
Jittor's own class for loading EMNIST dataset.
Args::
[in] data_root(str): your data root.
[in] split(str): one of 'byclass', 'bymerge', 'balanced', 'letters', 'digits', 'mnist'.
[in] train(bool): choose model train or val.
[in] download(bool): Download data automatically if download is True.
[in] batch_size(int): Data batch size.
[in] shuffle(bool): Shuffle data if true.
[in] transform(jittor.transform): transform data.
Example::
from jittor.dataset.mnist import EMNIST
train_loader = EMNIST(train=True).set_attrs(batch_size=16, shuffle=True)
for i, (imgs, target) in enumerate(train_loader):
...
'''
_merged_classes = {'c', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 's', 'u', 'v', 'w', 'x', 'y', 'z'}
_all_classes = set(string.digits + string.ascii_letters)
classes_split_dict = {
'byclass': sorted(list(_all_classes)),
'bymerge': sorted(list(_all_classes - _merged_classes)),
'balanced': sorted(list(_all_classes - _merged_classes)),
'letters': ['N/A'] + list(string.ascii_lowercase),
'digits': list(string.digits),
'mnist': list(string.digits),
}
def __init__(self, data_root=dataset_root+"/emnist_data/",
split='byclass',
train=True,
download=True,
batch_size = 16,
shuffle = False,
transform=None):
# if you want to test resnet etc you should set input_channel = 3, because the net set 3 as the input dimensions
super().__init__()
self.data_root = data_root
self.is_train = train
self.transform = transform
self.batch_size = batch_size
self.shuffle = shuffle
if download == True:
self.download_url()
data_root = os.path.join(data_root, "gzip")
filesname = [
f"emnist-{split}-train-images-idx3-ubyte.gz",
f"emnist-{split}-t10k-images-idx3-ubyte.gz",
f"emnist-{split}-train-labels-idx1-ubyte.gz",
f"emnist-{split}-t10k-labels-idx1-ubyte.gz"
]
for i in range(4):
filesname[i] = os.path.join(data_root, filesname[i])
self.mnist = {}
if self.is_train:
with gzip.open(filesname[0], 'rb') as f:
self.mnist["images"] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28, 28).transpose(0,2,1)
with gzip.open(filesname[2], 'rb') as f:
self.mnist["labels"] = np.frombuffer(f.read(), np.uint8, offset=8)
else:
with gzip.open(filesname[1], 'rb') as f:
self.mnist["images"] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28, 28).transpose(0,2,1)
with gzip.open(filesname[3], 'rb') as f:
self.mnist["labels"] = np.frombuffer(f.read(), np.uint8, offset=8)
assert(self.mnist["images"].shape[0] == self.mnist["labels"].shape[0])
self.total_len = self.mnist["images"].shape[0]
# this function must be called
self.set_attrs(total_len = self.total_len)
def __getitem__(self, index):
img = Image.fromarray(self.mnist['images'][index]).convert('RGB')
if self.transform:
img = self.transform(img)
return trans.to_tensor(img), self.mnist['labels'][index]
def download_url(self):
'''
Download mnist data set function, this function will be called when download is True.
'''
resources = [
("https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip", "58c8d27c78d21e728a6bc7b3cc06412e"),
]
for url, md5 in resources:
filename = "emnist.zip"
download_url_to_local(url, filename, self.data_root, md5)
import zipfile
zf = zipfile.ZipFile(os.path.join(self.data_root, filename))
try:
zf.extractall(path=self.data_root)
except RuntimeError as e:
print(e)
raise
zf.close()

View File

@ -0,0 +1,126 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Hao-Yang Peng
# Dun Liang <randonlang@gmail.com>.
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
from .dataset import Dataset
import numpy as np
from PIL import Image
class Sampler():
def __init__(self, dataset):
self.dataset = dataset
# MUST set sampler here
dataset.sampler = self
def __iter__(self):
raise NotImplementedError
def __len__(self):
raise NotImplementedError
class SequentialSampler(Sampler):
def __init__(self, dataset):
# MUST set sampler here
dataset.sampler = self
self.dataset = dataset
def __iter__(self):
return iter(range(self.dataset.__real_len__() if hasattr(self.dataset,"__real_len__") else self.dataset.__len__()))
def __len__(self):
return self.dataset.__real_len__() if hasattr(self.dataset,"__real_len__") else self.dataset.__len__()
class RandomSampler(Sampler):
def __init__(self, dataset, replacement=False, num_samples=None):
# MUST set sampler here
dataset.sampler = self
self.dataset = dataset
self.rep = replacement
self._num_samples = num_samples
self._shuffle_rng = np.random.default_rng(1)
@property
def num_samples(self):
if self._num_samples is None:
return self.dataset.__real_len__() if hasattr(self.dataset,"__real_len__") else self.dataset.__len__()
return self._num_samples
def __len__(self):
return self.num_samples
def __iter__(self):
n = self.dataset.__real_len__() if hasattr(self.dataset,"__real_len__") else self.dataset.__len__()
if self.rep:
return iter(self._shuffle_rng.integers(low=0, high=n, size=(self.num_samples,), dtype=np.int64).tolist())
return iter(self._shuffle_rng.permutation(n).tolist())
class SkipFirstBatchesSampler(Sampler):
def __init__(self, sampler, num_skip_batches):
# MUST set sampler here
sampler.dataset.sampler = self
self.sampler = sampler
self.num_skip_batches = num_skip_batches
def __len__(self):
return len(self.sampler) - self.num_skip_batches
def __iter__(self):
return iter(list(iter(self.sampler))[self.num_skip_batches:])
class SubsetRandomSampler(Sampler):
def __init__(self, dataset, indice):
'''
testdataset = TestSamplerDataset()
subsetsampler = SubsetRandomSampler(testdataset, (20, 30))
for i, data in enumerate(testdataset):
# data between 20 ~ 29
......
'''
# MUST set sampler here
dataset.sampler = self
self.dataset = dataset
self.indices = indice
dlen = dataset.__real_len__() if hasattr(dataset,"__real_len__") else dataset.__len__()
assert indice[0] >= 0 and indice[1] < dlen and indice[0] < indice[1]
def __iter__(self):
return (int(i) + self.indices[0] for i in np.random.permutation(self.indices[1] - self.indices[0]))
def __len__(self):
return self.indices[1] - self.indices[0]
class BatchSampler(Sampler):
def __init__(self, sampler, batch_size, drop_last):
self.sampler = sampler
self.batch_size = batch_size
self.drop_last = drop_last
def __iter__(self):
batch = []
for idx in self.sampler:
batch.append(idx)
if len(batch) == self.batch_size:
yield batch
batch = []
if len(batch) > 0 and not self.drop_last:
yield batch
def __len__(self):
if self.drop_last:
return len(self.sampler) // self.batch_size
else:
return (len(self.sampler) + self.batch_size - 1) // self.batch_size

View File

@ -0,0 +1,68 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Meng-Hao Guo <guomenghao1997@gmail.com>
# Dun Liang <randonlang@gmail.com>.
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
import numpy as np
from collections.abc import Sequence, Mapping
from PIL import Image
import time
def get_random_list(n):
return list(np.random.permutation(range(n)))
def get_order_list(n):
return [i for i in range(n)]
def collate_batch(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
real_size = len(batch)
elem = batch[0]
elem_type = type(elem)
if isinstance(elem, jt.Var):
temp_data = jt.stack([data for data in batch], 0)
return temp_data
if elem_type is np.ndarray:
temp_data = np.stack([data for data in batch], 0)
return temp_data
elif np.issubdtype(elem_type, np.integer):
return np.int32(batch)
elif isinstance(elem, int):
return np.int32(batch)
elif isinstance(elem, float):
return np.float32(batch)
elif isinstance(elem, str):
return batch
elif isinstance(elem, Mapping):
return {key: collate_batch([d[key] for d in batch]) for key in elem}
elif isinstance(elem, tuple):
transposed = zip(*batch)
return tuple(collate_batch(samples) for samples in transposed)
elif isinstance(elem, Sequence):
transposed = zip(*batch)
return [collate_batch(samples) for samples in transposed]
elif isinstance(elem, Image.Image):
temp_data = np.stack([np.array(data) for data in batch], 0)
return temp_data
else:
raise TypeError(f"Not support type <{elem_type.__name__}>")
class HookTimer:
def __init__(self, obj, attr):
self.origin = getattr(obj, attr)
self.duration = 0.0
setattr(obj, attr, self)
def __call__(self, *args, **kw):
start = time.time()
rt = self.origin(*args, **kw)
self.duration += time.time() - start
return rt

View File

@ -0,0 +1,70 @@
# ***************************************************************
# Copyright(c) 2019
# Meng-Hao Guo <guomenghao1997@gmail.com>
# Dun Liang <randonlang@gmail.com>.
# All Rights Reserved.
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import numpy as np
import os
from PIL import Image
from .dataset import Dataset, dataset_root
class VOC(Dataset):
'''
Jittor's own class for loading VOC dataset.
Args::
[in] data_root(str): your data root.
[in] split(str): which split you want to use, train or val.
Attribute::
NUM_CLASSES: Number of total categories, default is 21.
Example::
from jittor.dataset.voc import VOC
train_loader = VOC(data_root='...').set_attrs(batch_size=16, shuffle=True)
for i, (imgs, target) in enumerate(train_loader):
...
'''
NUM_CLASSES = 21
def __init__(self, data_root=dataset_root+'/voc/', split='train'):
super().__init__()
''' total_len , batch_size, shuffle must be set '''
self.data_root = data_root
self.split = split
self.image_root = os.path.join(data_root, 'JPEGImages')
self.label_root = os.path.join(data_root, 'SegmentationClass')
self.data_list_path = os.path.join(self.data_root, 'ImageSets', 'Segmentation', self.split + '.txt')
self.image_path = []
self.label_path = []
with open(self.data_list_path, "r") as f:
lines = f.read().splitlines()
for idx, line in enumerate(lines):
_img_path = os.path.join(self.image_root, line + '.jpg')
_label_path = os.path.join(self.label_root, line + '.png')
assert os.path.isfile(_img_path)
assert os.path.isfile(_label_path)
self.image_path.append(_img_path)
self.label_path.append(_label_path)
self.set_attrs(total_len = len(self.image_path))
def __getitem__(self, index):
_img = Image.open(self.image_path[index])
_label = Image.open(self.label_path[index])
_img = _img.resize((513, 513))
_label = _label.resize((513, 513))
_img = np.array(_img)
_label = np.array(_label)
_img = _img.transpose(2,0,1)
return _img, _label

View File

@ -0,0 +1,107 @@
import jittor as jt
from jittor import nn
import numpy as np
# import pylab as pl
# 隐空间向量长度
latent_dim = 100
# 类别数量
n_classes = 10
# 图片大小
img_size = 32
# 图片通道数量
channels = 1
# 图片张量的形状
img_shape = (channels, img_size, img_size)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.label_emb = nn.Embedding(n_classes, n_classes)
def block(in_feat, out_feat, normalize=True):
layers = [nn.Linear(in_feat, out_feat)]
if normalize:
layers.append(nn.BatchNorm1d(out_feat, 0.8))
layers.append(nn.LeakyReLU(0.2))
return layers
self.model = nn.Sequential(
*block((latent_dim + n_classes), 128, normalize=False),
*block(128, 256),
*block(256, 512),
*block(512, 1024),
nn.Linear(1024, int(np.prod(img_shape))),
nn.Tanh())
def execute(self, noise, labels):
gen_input = jt.concat((self.label_emb(labels), noise), dim=1)
img = self.model(gen_input)
img = img.view((img.shape[0], *img_shape))
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.label_embedding = nn.Embedding(n_classes, n_classes)
self.model = nn.Sequential(
nn.Linear((n_classes + int(np.prod(img_shape))), 512),
nn.LeakyReLU(0.2),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2),
nn.Linear(512, 512),
nn.Dropout(0.4),
nn.LeakyReLU(0.2),
nn.Linear(512, 1))
def execute(self, img, labels):
d_in = jt.concat((img.view((img.shape[0], (- 1))), self.label_embedding(labels)), dim=1)
validity = self.model(d_in)
return validity
# 定义模型
generator = Generator()
discriminator = Discriminator()
generator.eval()
discriminator.eval()
# 加载参数
generator.load('https://cg.cs.tsinghua.edu.cn/jittor/assets/build/generator_last.pkl')
discriminator.load('https://cg.cs.tsinghua.edu.cn/jittor/assets/build/discriminator_last.pkl')
def gen_img(number):
print(number, type(number))
n_row = len(number)
z = jt.array(np.random.normal(0, 1, (n_row, latent_dim))).float32().stop_grad()
labels = jt.array(np.array([int(number[num]) for num in range(n_row)])).float32().stop_grad()
gen_imgs = generator(z,labels)
gen_imgs = gen_imgs.transpose((1,2,0,3)).reshape(gen_imgs.shape[2], -1)
gen_imgs = gen_imgs[:,:,None].broadcast(gen_imgs.shape+(3,)) # .uint8()
gen_imgs = (gen_imgs - gen_imgs.min()) / (gen_imgs.max() - gen_imgs.min()) * 255
gen_imgs = gen_imgs.uint8()
# print(gen_imgs.shape, gen_imgs.max(), gen_imgs.min())
return gen_imgs.numpy()
# gen_imgs = gen_imgs.data.transpose((1,2,0,3))[0].reshape((gen_imgs.shape[2], -1))
# print(gen_imgs.shape)
return gen_imgs[:,:,None]
from PIL import Image
import pywebio as pw
# 定义一串数字
number = "201962517"
# gen_img(number)
Image.fromarray(gen_img(number))
# pl.imshow()
# pl.show()
# print("done")
def web_server():
pw.pin.put_input("number", label="输入用于生成的数字(由计图框架支持)")
pw.output.put_buttons(['Gen image'],
lambda _: pw.output.put_image(Image.fromarray(gen_img(pw.pin.pin.number))))
pw.start_server(web_server, port=8123)

View File

@ -0,0 +1,325 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Guoye Yang <498731903@qq.com>
# Dun Liang <randonlang@gmail.com>.
#
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
from jittor import init
from jittor import nn
from jittor import Function
class DepthwiseConv(Function):
def __init__(self, stride=1, padding=0, dilation=1):
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
self.padding = padding if isinstance(padding, tuple) else (padding, padding)
self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation)
def execute(self, x, weight):
if not jt.flags.use_cuda or not jt.compiler.is_cuda:
return nn.conv2d(x, weight, None, self.stride, self.padding, self.dilation, x.shape[1])
self.save_vars = x, weight
N,C,H,W = x.shape
o,i,Kh,Kw = weight.shape
assert(o == C)
oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1
ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1
filter_height, filter_width = Kh, Kw
self.Khw = Kh, Kw
assert oh>0 and ow>0
output = jt.code(
[N, C, oh, ow],
x.dtype,
[x, weight],
cuda_header = """
template <typename T,
int filter_height,
int filter_width,
int stride_height,
int stride_width>
__global__ void KernelDepthwiseConv(
const T *const input_data, const T *const filter_data, const int batch_size,
const int output_channels, const int output_height,
const int output_width, const int input_channels,
const int input_height, const int input_width,
const int padding_height, const int padding_width,
const int dilate_height, const int dilate_width, T *const output_data) {
const int kWeghtSize = filter_height * filter_width;
T r_weight[kWeghtSize];
const int batch = blockIdx.y;
const int c_out = blockIdx.x;
const T* weight = filter_data + c_out * filter_height * filter_width;
for (int i = 0; i < filter_height * filter_width; i++) r_weight[i] = weight[i];
for (int w_out = threadIdx.x; w_out < output_width; w_out += blockDim.x) {
for (int h_out = threadIdx.y; h_out < output_height; h_out += blockDim.y) {
const int batch = blockIdx.y;
const int c_out = blockIdx.x;
const int c_in = c_out;
T value = 0;
const int h_in_start = -padding_height + h_out * stride_height;
const int w_in_start = -padding_width + w_out * stride_width;
const int h_in_end = h_in_start + filter_height * dilate_height;
const int w_in_end = w_in_start + filter_width * dilate_width;
const int in_offset =
((batch * input_channels + c_in) * input_height) * input_width;
const int h_end = h_in_end < input_height ? h_in_end : input_height;
const int w_end = w_in_end < input_width ? w_in_end : input_width;
const int h_start = h_in_start > 0 ? h_in_start : 0;
const int w_start = w_in_start > 0 ? w_in_start : 0;
for (int h_in = h_in_start, h_f = 0; h_f < filter_height;
h_in += dilate_height, h_f++) {
for (int w_in = w_in_start, w_f = 0; w_f < filter_width;
w_in += dilate_width, w_f++) {
if (h_in >= 0 && h_in < input_height && w_in >= 0 &&
w_in < input_width) {
const int offset = in_offset + h_in * input_width + w_in;
value += r_weight[h_f * filter_width + w_f] * input_data[offset];
}
}
}
int index =
((batch * gridDim.x + c_out) * output_height + h_out) * output_width +
w_out;
output_data[index] = value;
}
}
}
""",
cuda_src=f"""
@alias(input, in0)
@alias(filter, in1)
@alias(output, out)
const int batch_size = input_shape0;
const int input_channels = input_shape1;
const int input_height = input_shape2;
const int input_width = input_shape3;
const int output_channels = output_shape1;
const int output_height = output_shape2;
const int output_width = output_shape3;
const int ksize_height = {Kh};
const int ksize_width = {Kw};
const int stride_height = {self.stride[0]};
const int stride_width = {self.stride[1]};
const int padding_height = {self.padding[0]};
const int padding_width = {self.padding[1]};
const int dilate_height = {self.dilation[0]};
const int dilate_width = {self.dilation[1]};
int thread = 512;
if (output_width > 1024 && output_width <= 2048)
thread = (output_width - 1) / 2 + 1;
else if (output_width > 512 && output_width <= 1024)
thread = output_width;
int blocks = std::min(std::max(thread / output_width, 1), output_height);
dim3 threads(std::min(output_width, thread), blocks, 1);
dim3 grid(output_channels, batch_size, 1);
KernelDepthwiseConv<
input_type, ksize_height, ksize_width,
stride_height, stride_width>
<<<grid, threads>>>(
input_p, filter_p, batch_size, output_channels, output_height,
output_width, input_channels, input_height, input_width,
padding_height, padding_width, dilate_height,
dilate_width, output_p);
"""
)
return output
def grad(self, grad):
x, weight = self.save_vars
Kh, Kw = self.Khw
return jt.code([x.shape, weight.shape], [x.dtype, weight.dtype], [x, weight, grad],
cuda_header = f"#include <{jt.compile_extern.cub_home}cub/cub.cuh>"+"""
template <typename T>
__device__ __inline__ void CudaAtomicAddWithWarp(T* sum, T value) {
typedef cub::WarpReduce<T> WarpReduce;
typename WarpReduce::TempStorage temp_storage;
value = WarpReduce(temp_storage).Sum(value);
if (cub::LaneId() == 0)
atomicAdd(sum, value);
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template <typename T,
int filter_height,
int filter_width,
int stride_height,
int stride_width>
__global__ void KernelDepthwiseConvInputGradCFilter(
const T *const input_data, const T *const output_grad_data,
const T *const filter_data, const int batch_size,
const int output_channels, const int output_height,
const int output_width, const int input_channels,
const int input_height, const int input_width,
const int padding_height, const int padding_width,
const int dilate_height, const int dilate_width,
T *const input_grad_data) {
const int kWeghtSize = filter_height * filter_width + 1;
T r_weight[kWeghtSize];
const int batch = blockIdx.y;
const int c_in = blockIdx.x;
const T* weight = filter_data + c_in * filter_height * filter_width;
for (int i = 0; i < filter_height * filter_width; i++)
r_weight[i] =
weight[filter_height * filter_width - i - 1];
for (int w_in = threadIdx.x; w_in < input_width; w_in += blockDim.x) {
for (int h_in = threadIdx.y; h_in < input_height; h_in += blockDim.y) {
const int batch = blockIdx.y;
const int c_in = blockIdx.x;
int h_out_start = h_in - (filter_height - 1) * dilate_height + padding_height;
int w_out_start = w_in - (filter_width - 1) * dilate_width + padding_width;
T value = 0;
int index =
((batch * gridDim.x + c_in) * input_height + h_in) * input_width +
w_in;
for (int h_out = h_out_start, h_f = 0; h_f < filter_height;
h_out += dilate_height, h_f++) {
for (int w_out = w_out_start, w_f = 0; w_f < filter_width;
w_out += dilate_width, w_f++) {
int s_h_out = h_out / stride_height;
int s_w_out = w_out / stride_width;
if (h_out % stride_height == 0 && w_out % stride_width == 0 &&
s_h_out >= 0 && s_h_out < output_height && s_w_out >= 0 &&
s_w_out < output_width) {
const int output_grad_offset =
((batch * output_channels + c_in) * output_height +
s_h_out) *
output_width +
s_w_out;
value +=
output_grad_data[output_grad_offset] *
r_weight[h_f * filter_width + w_f];
}
}
}
input_grad_data[index] = value;
}
}
}
// Cuda kernel to compute the depthwise convolution backprop w.r.t. filter.
template <typename T>
__global__ void KernelDepthwiseConvFilterGrad(
const T* output_grad_data, const T* input_data, const int num,
const int output_channels, const int output_height, const int output_width,
const int input_channels, const int input_height, const int input_width,
const int filter_height,
const int filter_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width, const int dilate_height,
const int dilate_width, T* filter_grad_data) {
T s = 0;
int gbid = (((blockIdx.z * blockDim.z + threadIdx.z) * gridDim.y) + blockIdx.y) * gridDim.x + blockIdx.x;
for (int image_w = threadIdx.x; image_w < output_width;
image_w += blockDim.x) {
for (int bid = 0; bid < num; bid++) {
//for (int bid = threadIdx.z; bid < num; bid+=blockDim.z) {
for (int image_h = threadIdx.y; image_h < output_height;
image_h += blockDim.y) {
int kernel_id = blockIdx.z;
int kernel_h = blockIdx.y * dilate_height - padding_height;
int kernel_w = blockIdx.x * dilate_width - padding_width;
int image_hk = image_h * stride_height + kernel_h;
int image_wk = image_w * stride_width + kernel_w;
if (image_hk < 0 || image_hk >= input_height) continue;
if (image_wk < 0 || image_wk >= input_width) continue;
#define gaid(N, C, H, W) \
((((N)*gridDim.z + (C)) * output_height + (H)) * output_width + (W))
int input_id = ((bid * gridDim.z +
kernel_id) *
input_height +
image_hk) *
input_width +
image_wk;
s += output_grad_data[gaid(bid, kernel_id, image_h, image_w)] *
input_data[input_id];
#undef gaid
}
}
}
CudaAtomicAddWithWarp(&filter_grad_data[gbid], s);
}
""",
cuda_src=f"""
// source for backward to data
@alias(input, in0)
@alias(filter, in1)
@alias(output_grad, in2)
@alias(input_grad, out0)
@alias(filter_grad, out1)
const int batch_size = input_shape0;
const int input_channels = input_shape1;
const int input_height = input_shape2;
const int input_width = input_shape3;
const int output_channels = output_grad_shape1;
const int output_height = output_grad_shape2;
const int output_width = output_grad_shape3;
const int ksize_height = {Kh};
const int ksize_width = {Kw};
const int stride_height = {self.stride[0]};
const int stride_width = {self.stride[1]};
const int padding_height = {self.padding[0]};
const int padding_width = {self.padding[1]};
const int dilate_height = {self.dilation[0]};
const int dilate_width = {self.dilation[1]};
int thread = 512;
if (input_width > 1024 && input_width <= 2048)
thread = (input_width - 1) / 2 + 1;
else if (input_width > 512 && input_width <= 1024)
thread = input_width;
int blocks = std::min(std::max(thread / input_width, 1), input_height);
dim3 threads(std::min(input_width, thread), blocks, 1);
dim3 grid(input_channels, batch_size, 1);
KernelDepthwiseConvInputGradCFilter<
input_type, ksize_height, ksize_width
, stride_height, stride_width>
<<<grid, threads, 0>>>(
input_p, output_grad_p, filter_p, batch_size,
output_channels, output_height, output_width, input_channels,
input_height, input_width, padding_height,
padding_width, dilate_height, dilate_width, input_grad_p);
// source for backward to filter
int block_size = 512;
if (output_width > 1024 && output_width <= 2048)
block_size = (output_width - 1) / 2 + 1;
else if (output_width > 512 && output_width <= 1024)
block_size = output_width;
int crop_output_height =
std::min(std::max(block_size / output_width, 1), output_height);
grid = dim3(ksize_width, ksize_height, output_channels);
threads = dim3(std::min(output_width, block_size), crop_output_height, 1);
cudaMemsetAsync(filter_grad_p, 0, filter_grad->size);
KernelDepthwiseConvFilterGrad<
input_type><<<grid, threads, 0>>>(
output_grad_p, input_p, batch_size, output_channels,
output_height, output_width, input_channels, input_height,
input_width, ksize_height, ksize_width,
stride_height, stride_width, padding_height, padding_width,
dilate_height, dilate_width, filter_grad_p);
"""
)

View File

@ -0,0 +1,190 @@
# ***************************************************************
# Copyright (c) 2023 Jittor. All Rights Reserved.
# Maintainers:
# Haoyang Peng <2247838039@qq.com>
# Dun Liang <randonlang@gmail.com>.
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import math
import os
import numpy as np
import jittor as jt
from jittor import nn
from jittor.nn import binary_cross_entropy_with_logits
from jittor import lgamma, igamma
from jittor.math_util.gamma import gamma_grad, sample_gamma
def simple_presum(x):
src = '''
__inline_static__
@python.jittor.auto_parallel(1)
void kernel(int n0, int i0, in0_type* x, in0_type* out, int nl) {
out[i0*(nl+1)] = 0;
for (int i=0; i<nl; i++)
out[i0*(nl+1)+i+1] = out[i0*(nl+1)+i] + x[i0*nl+i];
}
kernel(in0->num/in0->shape[in0->shape.size()-1], 0, in0_p, out0_p, in0->shape[in0->shape.size()-1]);
'''
return jt.code(x.shape[:-1]+(x.shape[-1]+1,), x.dtype, [x],
cpu_src=src, cuda_src=src)
class OneHotCategorical:
def __init__(self, probs=None, logits=None):
Categorical.__init__(self, probs, logits)
def sample(self, sample_shape=[]):
shape = sample_shape + self.probs.shape[:-1] + (1,)
rand = jt.rand(shape)
one_hot = jt.logical_and(self.cum_probs_l < rand, rand <= self.cum_probs_r).float()
return one_hot
def log_prob(self, x):
x = jt.argmax(x, dim=-1)[0]
return Categorical.log_prob(self, x)
def entropy(self):
p_log_p = self.logits * self.probs
return -p_log_p.sum(-1)
class Categorical:
def __init__(self, probs=None, logits=None):
assert not (probs is None and logits is None)
if probs is None:
# cannot align to pytorch
probs = jt.sigmoid(logits)
probs = probs / probs.sum(-1, True)
if logits is None:
logits = jt.safe_log(probs)
with jt.no_grad():
self.probs = probs
self.logits = logits
self.cum_probs = simple_presum(self.probs)
self.cum_probs_l = self.cum_probs[..., :-1]
self.cum_probs_r = self.cum_probs[..., 1:]
def sample(self, sample_shape=()):
shape = sample_shape + self.probs.shape[:-1] + (1,)
rand = jt.rand(shape)
one_hot = jt.logical_and(self.cum_probs_l < rand, rand <= self.cum_probs_r)
index = one_hot.index(one_hot.ndim - 1)
return (one_hot * index).sum(-1)
def log_prob(self, x):
a = self.probs.ndim
b = x.ndim
indexes = tuple( f'i{i}' for i in range(b-a+1, b) )
indexes = indexes + (x,)
return jt.safe_log(self.probs).getitem(indexes)
def entropy(self):
p_log_p = self.logits * self.probs
return -p_log_p.sum(-1)
class Normal:
def __init__(self, mu, sigma):
self.mu = mu
self.sigma = sigma
def sample(self, sample_shape=None):
return jt.normal(jt.array(self.mu), jt.array(self.sigma),size=sample_shape)
def log_prob(self, x):
var = self.sigma**2
log_scale = jt.safe_log(self.sigma)
return -((x-self.mu)**2) / (2*var) - log_scale-np.log(np.sqrt(2*np.pi))
def entropy(self):
return 0.5+0.5*np.log(2*np.pi)+jt.safe_log(self.sigma)
class Uniform:
def __init__(self,low,high):
self.low = low
self.high = high
assert high > low
def sample(self,sample_shape):
return jt.uniform(self.low,self.high,sample_shape)
def log_prob(self,x):
if x < self.low or x >= self.high:
return math.inf
return -jt.safe_log(self.high - self.low)
def entropy(self):
return jt.safe_log(self.high - self.low)
class Geometric:
def __init__(self,p=None,logits=None):
assert (p is not None) or (logits is not None)
assert 0 < p and p < 1
if p is None:
self.prob = jt.sigmoid(logits)
self.logits = logits
elif logits is None:
self.prob = p
self.logits = -jt.safe_log(1. / p - 1)
def sample(self, sample_shape):
u = jt.rand(sample_shape)
return (jt.safe_log(u) / (jt.safe_log(-self.probs+1))).floor_int()
def log_prob(self, x):
return x*jt.safe_log(-self.prob+1)+jt.safe_log(self.prob)
def entropy(self):
return binary_cross_entropy_with_logits(jt.array(self.logits),jt.array(self.prob)) / self.prob
class GammaDistribution:
'''
For now only support gamma distribution.
'''
def __init__(self, concentration, rate):
self.concentration = concentration
self.rate = rate
self.lgamma_alpha = lgamma.apply(jt.array([concentration,]))
def sample(self, shape):
return sample_gamma(self.concentration, shape)
def cdf(self, value):
return igamma(self.concentration, value)
def log_prob(self, value):
return (self.concentration * jt.log(self.rate) +
(self.concentration - 1) * jt.log(value) -
self.rate * value - self.lgamma_alpha)
def mean(self):
return self.concentration / self.rate
def mode(self):
return np.minimum((self.concentration - 1) / self.rate, 1)
def variance(self):
return self.concentration / (self.rate * self.rate)
def kl_divergence(cur_dist, old_dist):
assert isinstance(cur_dist, type(old_dist))
if isinstance(cur_dist, Normal):
vr = (cur_dist.sigma / old_dist.sigma)**2
t1 = ((cur_dist.mu - old_dist.mu) / old_dist.sigma)**2
return 0.5*(vr+t1-1-jt.safe_log(vr))
if isinstance(cur_dist, Categorical) or isinstance(cur_dist,OneHotCategorical):
t = cur_dist.probs * (cur_dist.logits-old_dist.logits)
return t.sum(-1)
if isinstance(cur_dist, Uniform):
res = jt.safe_log((old_dist.high - old_dist.low) / (cur_dist.high - cur_dist.low))
if old_dist.low > cur_dist.low or old_dist.high < cur_dist.high:
res = math.inf
return res
if isinstance(cur_dist, Geometric):
return -cur_dist.entropy() - jt.safe_log(-old_dist.prob+1) / cur_dist.prob - old_dist.logits

View File

@ -0,0 +1,8 @@
class EinopsError(RuntimeError):
""" Runtime error thrown by einops """
pass
__all__ = ['rearrange', 'reduce', 'repeat', 'parse_shape', 'asnumpy', 'EinopsError']
from jittor.einops.einops import rearrange, reduce, repeat, parse_shape, asnumpy

Some files were not shown because too many files have changed in this diff Show More