some comment

This commit is contained in:
Gword 2020-06-03 16:38:47 +08:00
parent b8f67748aa
commit 8a4113ed67
4 changed files with 29 additions and 67 deletions

View File

@ -134,14 +134,17 @@ class SGD(Optimizer):
class RMSprop(Optimizer):
""" RMSprop Optimizer.
Args:
params(list): parameters of model.
lr(float): learning rate.
eps(float): term added to the denominator to avoid division by zero, default 1e-8.
alpha(float): smoothing constant, default 0.99.
Example:
```
optimizer = nn.RMSprop(model.parameters(), lr, eps=1e-8, betas=(0.9, 0.999))
optimizer.step(loss)
```
optimizer = nn.RMSprop(model.parameters(), lr)
optimizer.step(loss)
"""
def __init__(self, params, lr=1e-2, eps=1e-8, alpha=0.99):
# def __init__(self, params, lr, eps=1e-8, betas=(0.9, 0.999), weight_decay=0):
super().__init__(params, lr)
self.eps = eps
self.alpha = alpha

View File

@ -1,59 +0,0 @@
# ***************************************************************
# Copyright (c) Jittor 2020, Author:
# All Rights Reserved.
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
import jittor as jt
import unittest
import numpy as np
from jittor import models
pass_this_test = False
try:
jt.dirty_fix_pytorch_runtime_error()
import torch
import torchvision
except Exception as e:
pass_this_test = True
def get_error(a, b):
return np.abs(a-b) / max(np.abs(a), np.abs(b), 1e-5) , np.abs(a-b)
def check(jt_mod, torch_mod, rtol=1e-2, atol=1e-5, mean_atol=1e-5):
pa = [ p for p in jt_mod.parameters() if not p.is_stop_grad() ]
pb = list(torch_mod.parameters())
assert len(pa) == len(pb)
error_count = 0
for a,b in zip(pa, pb):
assert a.shape == list(b.shape), (a.shape, b.shape, a.name())
stda, meana = np.std(a.numpy()), np.mean(a.numpy())
stdb, meanb = np.std(b.detach().numpy()), np.mean(b.detach().numpy())
r_err, a_err = get_error(stda, stdb)
if r_err > rtol and a_err > atol:
error_count += 1
print("compare std error", stda, stdb, r_err, a_err, a.name(), a.shape)
r_err, a_err = get_error(meana, meanb)
if r_err > rtol and a_err > mean_atol:
error_count += 1
print("compare mean error", meana, meanb, r_err, a_err, a.name(), a.shape)
assert error_count == 0
@unittest.skipIf(pass_this_test, f"pass init check, no torch found")
class TestInit(unittest.TestCase):
@classmethod
def setUpClass(self):
jt.seed(0)
np.random.seed(0)
torch.manual_seed(0)
def test_conv(self):
check(jt.nn.Conv(64, 256, 3), torch.nn.Conv2d(64, 256, 3), rtol=1e-1, mean_atol=1e-3)
def test_resnet(self):
check(models.resnet152(), torchvision.models.resnet152(), rtol=2e-2, mean_atol=1e-2)
if __name__ == "__main__":
unittest.main()

View File

@ -41,6 +41,8 @@ static void move_rely(KernelIR* inner_loop, KernelIR* outer_loop, KernelIR* def)
}
}
// sorder: Array that saves the allocation order of "tn"
// sfunc: Array of function names
static void tune_atomic(Pass* pass, KernelIR* ir, bool is_cuda, int tdim, vector<vector<int>> &sorder, vector<string> &sfunc) {
LOGvvvv << "tune_atomic" << ir->children;
vector<string> relys;
@ -224,6 +226,19 @@ void AtomicTunerPass::run() {
if (func_call->get_attr("dtype") != "__global__ void") continue;
tune_atomic(this, func_call.get(), is_cuda, 4, sorder, sfunc);
}
// Re-adjust the allocation order of "tn" according to the situation of atomic coverage, preferentially allocate the range not covered by atomic, for example:
// for (op0_index_t id0 = tid0; id0<range0; id0+=tnum0) {
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
// ...
// }
// }
// atomicAdd(...);
// }
// }
// The allocation order of "tn" will be: tn1, tn0, tn3, tn2
for (uint j=0;j<sfunc.size();j++)
for (uint i=0; i<ir->children.size(); i++) {
auto& func_call = ir->children[i];

View File

@ -343,9 +343,12 @@ void ParallelPass::run() {
new_block.swap(*func_call, true);
auto code = func_def->to_string();
bool has_atomic = code.find("atomic") != string::npos;
if (has_atomic && !fix_thread_num) {
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
}
if (!fix_thread_num) {
if (has_atomic) {
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
} else {
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + ")-2),0)," + S(thread_num) + ")";
}
}
ir->remove_all_unused();
}