mirror of https://github.com/Jittor/Jittor
226 lines
7.8 KiB
Python
226 lines
7.8 KiB
Python
import sys, os
|
|
|
|
suffix = ""
|
|
|
|
import jittor as jt
|
|
import time
|
|
from pathlib import Path
|
|
home_path = str(Path.home())
|
|
perf_path = os.path.join(home_path, ".cache", "jittor_perf")
|
|
|
|
def main():
|
|
os.makedirs(perf_path+"/src/jittor", exist_ok=True)
|
|
os.makedirs(perf_path+"/src/jittor_utils", exist_ok=True)
|
|
os.system(f"cp -rL {jt.flags.jittor_path} {perf_path+'/src/'}")
|
|
os.system(f"cp -rL {jt.flags.jittor_path}/../jittor_utils {perf_path+'/src/'}")
|
|
use_torch_1_4 = os.environ.get("use_torch_1_4", "0") == "1"
|
|
dockerfile_src = r"""
|
|
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
|
|
|
|
RUN echo \
|
|
"deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic main restricted universe multiverse\n\
|
|
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-updates main restricted universe multiverse\n\
|
|
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-backports main restricted universe multiverse\n\
|
|
deb [trusted=yes] https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ bionic-security main restricted universe multiverse" > /etc/apt/sources.list
|
|
|
|
# RUN rm -rf /var/lib/apt/lists/*
|
|
RUN apt update || true
|
|
|
|
RUN apt install wget \
|
|
python3.7 python3.7-dev \
|
|
g++ build-essential -y
|
|
|
|
WORKDIR /usr/src
|
|
|
|
RUN apt download python3-distutils && dpkg-deb -x ./python3-distutils* / \
|
|
&& wget -O - https://bootstrap.pypa.io/get-pip.py | python3.7
|
|
|
|
# change tsinghua mirror
|
|
RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
|
|
|
RUN pip3 install \
|
|
pybind11 \
|
|
numpy \
|
|
tqdm \
|
|
pillow \
|
|
astunparse
|
|
|
|
RUN pip3 install torch torchvision
|
|
"""
|
|
global suffix
|
|
if use_torch_1_4:
|
|
suffix = "_1_4"
|
|
dockerfile_src = dockerfile_src.replace("torch ", "torch==1.4.0 ")
|
|
dockerfile_src = dockerfile_src.replace("torchvision", "torchvision==0.5.0")
|
|
with open("/tmp/perf_dockerfile", 'w') as f:
|
|
f.write(dockerfile_src)
|
|
assert os.system("sudo nvidia-smi -lgc 1500") == 0
|
|
|
|
# if the docker image is not built
|
|
if os.system(f"sudo docker image inspect jittor/jittor-perf{suffix}"):
|
|
assert os.system(f"sudo docker build --tag jittor/jittor-perf{suffix} -f /tmp/perf_dockerfile .") == 0
|
|
|
|
# run once for compile source
|
|
jt_fps = test_main("jittor", "resnet50", 1)
|
|
|
|
logs = ""
|
|
# resnext50_32x4d with bs=8 cannot pass this test
|
|
#### inference test
|
|
for model_name in ["resnet50", "wide_resnet50_2", # "resnext50_32x4d",
|
|
"resnet152", "wide_resnet101_2", "resnext101_32x8d",
|
|
"alexnet", "vgg11", "squeezenet1_1", "mobilenet_v2",
|
|
"densenet121", "densenet169", "densenet201",
|
|
"res2net50", "res2net101"]:
|
|
for bs in [1, 2, 4, 8, 16, 32, 64, 128]:
|
|
jt_fps = test_main("jittor", model_name, bs)
|
|
logs += f"jittor-{model_name}-{bs} {jt_fps}\n"
|
|
tc_fps = test_main("torch", model_name, bs)
|
|
logs += f"torch-{model_name}-{bs} {tc_fps}\n"
|
|
logs += f"compare-{model_name}-{bs} {jt_fps/tc_fps}\n"
|
|
print(logs)
|
|
#### train test
|
|
for model_name in ["train_resnet50", "train_resnet101"
|
|
]:
|
|
for bs in [1, 2, 4, 8, 16, 32, 64, 128]:
|
|
jt_fps = test_main("jittor", model_name, bs)
|
|
logs += f"jittor-{model_name}-{bs} {jt_fps}\n"
|
|
tc_fps = test_main("torch", model_name, bs)
|
|
logs += f"torch-{model_name}-{bs} {tc_fps}\n"
|
|
logs += f"compare-{model_name}-{bs} {jt_fps/tc_fps}\n"
|
|
print(logs)
|
|
with open(f"{perf_path}/jittor-perf{suffix}-latest.txt", "w") as f:
|
|
f.write(logs)
|
|
from datetime import datetime
|
|
with open(f"{perf_path}/jittor-perf{suffix}-{datetime.now()}.txt", "w") as f:
|
|
f.write(logs)
|
|
|
|
def test_main(name, model_name, bs):
|
|
cmd = f"sudo docker run --gpus all --rm -v {perf_path}:/root/.cache/jittor --network host jittor/jittor-perf{suffix} bash -c 'PYTHONPATH=/root/.cache/jittor/src python3.7 /root/.cache/jittor/src/jittor/test/perf/perf.py {name} {model_name} {bs}'"
|
|
fps = -1
|
|
try:
|
|
print("run cmd:", cmd)
|
|
if os.system(cmd) == 0:
|
|
with open(f"{perf_path}/{name}-{model_name}-{bs}.txt", 'r') as f:
|
|
fps = float(f.read().split()[3])
|
|
except:
|
|
pass
|
|
return fps
|
|
|
|
def time_iter(duration=2, min_iter=5):
|
|
start = time.time()
|
|
for i in range(10000000):
|
|
yield i
|
|
end = time.time()
|
|
if end-start>duration and i>=min_iter:
|
|
return
|
|
|
|
def test(name, model_name, bs):
|
|
print("hello", name, model_name, bs)
|
|
import numpy as np
|
|
import time
|
|
is_train = False
|
|
_model_name = model_name
|
|
if model_name.startswith("train_"):
|
|
is_train = True
|
|
model_name = model_name[6:]
|
|
if name == "torch":
|
|
import torch
|
|
import torchvision.models as tcmodels
|
|
from torch import optim
|
|
from torch import nn
|
|
torch.backends.cudnn.deterministic = False
|
|
torch.backends.cudnn.benchmark = True
|
|
model = tcmodels.__dict__[model_name]()
|
|
model = model.cuda()
|
|
else:
|
|
import jittor as jt
|
|
from jittor import optim
|
|
from jittor import nn
|
|
jt.flags.use_cuda = 1
|
|
jt.cudnn.set_algorithm_cache_size(10000)
|
|
import jittor.models as jtmodels
|
|
model = jtmodels.__dict__[model_name]()
|
|
if (model == "resnet152" or model == "resnet101") and bs == 128 and is_train:
|
|
jt.cudnn.set_max_workspace_ratio(0.05)
|
|
if is_train:
|
|
model.train()
|
|
else:
|
|
model.eval()
|
|
img_size = 224
|
|
if model_name == "inception_v3":
|
|
img_size = 300
|
|
test_img = np.random.random((bs, 3, img_size, img_size)).astype("float32")
|
|
if is_train:
|
|
label = (np.random.random((bs,)) * 1000).astype("int32")
|
|
if name == "torch":
|
|
test_img = torch.Tensor(test_img).cuda()
|
|
if is_train:
|
|
label = torch.LongTensor(label).cuda()
|
|
opt = optim.SGD(model.parameters(), 0.001)
|
|
sync = lambda: torch.cuda.synchronize()
|
|
jt = torch
|
|
else:
|
|
test_img = jt.array(test_img).stop_grad()
|
|
if is_train:
|
|
label = jt.array(label).stop_grad()
|
|
opt = optim.SGD(model.parameters(), 0.001)
|
|
sync = lambda: jt.sync_all(True)
|
|
|
|
sync()
|
|
use_profiler = os.environ.get("use_profiler", "0") == "1"
|
|
if hasattr(jt, "nograd"):
|
|
ng = jt.no_grad()
|
|
ng.__enter__()
|
|
def iter():
|
|
x = model(test_img)
|
|
if isinstance(x, tuple):
|
|
x = x[0]
|
|
if is_train:
|
|
loss = nn.CrossEntropyLoss()(x, label)
|
|
if name == "jittor":
|
|
opt.step(loss)
|
|
else:
|
|
opt.zero_grad()
|
|
loss.backward()
|
|
opt.step()
|
|
else:
|
|
if name == "jittor":
|
|
x.sync()
|
|
sync()
|
|
for i in time_iter():
|
|
iter()
|
|
sync()
|
|
for i in time_iter():
|
|
iter()
|
|
sync()
|
|
if use_profiler:
|
|
if name == "torch":
|
|
prof = torch.autograd.profiler.profile(use_cuda=True)
|
|
else:
|
|
prof = jt.profile_scope()
|
|
prof.__enter__()
|
|
if name == "jittor":
|
|
if hasattr(jt.flags, "use_parallel_op_compiler"):
|
|
jt.flags.use_parallel_op_compiler = 0
|
|
start = time.time()
|
|
for i in time_iter(10):
|
|
iter()
|
|
sync()
|
|
end = time.time()
|
|
if use_profiler:
|
|
prof.__exit__(None,None,None)
|
|
if name == "torch":
|
|
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=30))
|
|
total_iter = i+1
|
|
print("duration:", end-start, "FPS:", total_iter*bs/(end-start))
|
|
fpath = f"{home_path}/.cache/jittor/{name}-{_model_name}-{bs}.txt"
|
|
with open(fpath, 'w') as f:
|
|
f.write(f"duration: {end-start} FPS: {total_iter*bs/(end-start)}")
|
|
os.chmod(fpath, 0x666)
|
|
|
|
if len(sys.argv) <= 1:
|
|
main()
|
|
else:
|
|
name, model, bs = sys.argv[1:]
|
|
bs = int(bs)
|
|
test(name, model, bs) |