mirror of https://github.com/Jittor/Jittor
fix some bug & add sync weights
This commit is contained in:
parent
f22a8ec1fe
commit
178d7b259a
|
@ -42,7 +42,6 @@ void NcclBroadcastOp::jit_run() {
|
|||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
checkCudaErrors(ncclBroadcast(xp, yp, size, ncclFloat, root, comm, 0));
|
||||
checkCudaErrors(cudaStreamSynchronize(0));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -41,8 +41,7 @@ void NcclReduceOp::jit_run() {
|
|||
int size = 1 @for(i, 0, XDIM, * xshape@{i});
|
||||
auto* __restrict__ xp = x->ptr<Tx>();
|
||||
auto* __restrict__ yp = y->ptr<Tx>();
|
||||
checkCudaErrors(ncclReduce(xp, yp, size, ncclFloat, root, comm, 0));
|
||||
checkCudaErrors(cudaStreamSynchronize(0));
|
||||
checkCudaErrors(ncclReduce(xp, yp, size, ncclFloat, ncclSum, root, comm, 0));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -161,6 +161,14 @@ class SGD(object):
|
|||
# sync such parameters to reduce memory consumption
|
||||
jt.sync(self.no_grad_parameters)
|
||||
|
||||
def sync(self):
|
||||
ps = self.parameters
|
||||
for p in ps:
|
||||
temp = jt.compile_extern.nccl_ops.nccl_broadcast(p, 0)
|
||||
p -= p
|
||||
p += temp
|
||||
p.detach_inplace()
|
||||
|
||||
class Adam(object):
|
||||
""" Usage:
|
||||
optimizer = nn.Adam(model.parameters(), lr)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) 2020 Jittor. Authors:
|
||||
# Guoye Yang <498731903@qq.com>
|
||||
# Guowei Yang <471184555@qq.com>
|
||||
# Dun Liang <randonlang@gmail.com>.
|
||||
# All Rights Reserved.
|
||||
|
@ -10,12 +11,16 @@ import unittest
|
|||
import os, sys
|
||||
import jittor as jt
|
||||
import numpy as np
|
||||
from jittor import nn
|
||||
from jittor import nn, Module
|
||||
import copy
|
||||
n = 2
|
||||
|
||||
def test_all_reduce():
|
||||
print("test all_reduce")
|
||||
x = jt.random([5, 5])
|
||||
y = jt.compile_extern.nccl_ops.nccl_all_reduce(x)
|
||||
assert np.allclose(y.data, (x*3).data)
|
||||
assert np.allclose(y.data, (x*n).data)
|
||||
|
||||
def test_broadcast():
|
||||
print("test broadcast")
|
||||
|
@ -32,15 +37,45 @@ def test_reduce():
|
|||
print("test reduce")
|
||||
mpi = jt.compile_extern.mpi
|
||||
x = jt.random([5, 5])
|
||||
y = jt.compile_extern.nccl_ops.nccl_all_reduce(x)
|
||||
y = jt.compile_extern.nccl_ops.nccl_reduce(x, 0)
|
||||
y_ = y.data
|
||||
x_ = (x*n).data
|
||||
if mpi.world_rank() == 0:
|
||||
assert np.allclose(y.data, (x*3).data)
|
||||
assert np.allclose(y_, x_)
|
||||
|
||||
class Model(Module):
|
||||
def __init__(self):
|
||||
self.linear1 = nn.Linear(3, 3)
|
||||
self.linear2 = nn.Linear(3, 1024, False)
|
||||
|
||||
def execute(self, x):
|
||||
x = self.linear1(x)
|
||||
x = nn.relu(x)
|
||||
return self.linear2(x)
|
||||
|
||||
def test_sync():
|
||||
mpi = jt.compile_extern.mpi
|
||||
net = Model()
|
||||
SGD = nn.SGD(net.parameters(), 0.1, 0.9, 0.00001)
|
||||
if mpi.world_rank() == 0:
|
||||
net.linear1.weight *= 0
|
||||
net.linear2.weight *= 0
|
||||
net.linear1.bias *= 0
|
||||
net.linear1.weight += 1
|
||||
net.linear2.weight += 1
|
||||
net.linear1.bias += 1
|
||||
SGD.sync()
|
||||
assert np.allclose(net.linear1.weight.data, jt.ones(net.linear1.weight.shape).data)
|
||||
assert np.allclose(net.linear2.weight.data, jt.ones(net.linear2.weight.shape).data)
|
||||
assert np.allclose(net.linear1.bias.data, jt.ones(net.linear1.bias.shape).data)
|
||||
|
||||
|
||||
def main():
|
||||
np.random.seed(0)
|
||||
jt.set_seed(3)
|
||||
with jt.flag_scope(use_cuda=1):
|
||||
if jt.compile_extern.nccl_ops:
|
||||
test_sync()
|
||||
test_all_reduce()
|
||||
test_broadcast()
|
||||
test_reduce()
|
||||
|
@ -49,9 +84,9 @@ def main():
|
|||
class TestNcclOps(unittest.TestCase):
|
||||
def test(self):
|
||||
mpi = jt.compile_extern.mpi
|
||||
if mpi.world_size() == 1:
|
||||
if mpi.world_size() == 1 and n != 1:
|
||||
mpirun_path = jt.compiler.env_or_try_find('mpirun_path', 'mpirun')
|
||||
cmd = f"{mpirun_path} -np 3 {sys.executable} -m jittor.test.test_nccl_ops"
|
||||
cmd = f"{mpirun_path} -np {n} {sys.executable} -m jittor.test.test_nccl_ops"
|
||||
print("run cmd", cmd)
|
||||
jt.compiler.run_cmd(cmd)
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue