mirror of https://github.com/Jittor/Jittor
some comment
This commit is contained in:
parent
b8f67748aa
commit
8a4113ed67
|
@ -134,14 +134,17 @@ class SGD(Optimizer):
|
|||
|
||||
class RMSprop(Optimizer):
|
||||
""" RMSprop Optimizer.
|
||||
Args:
|
||||
params(list): parameters of model.
|
||||
lr(float): learning rate.
|
||||
eps(float): term added to the denominator to avoid division by zero, default 1e-8.
|
||||
alpha(float): smoothing constant, default 0.99.
|
||||
|
||||
Example:
|
||||
```
|
||||
optimizer = nn.RMSprop(model.parameters(), lr, eps=1e-8, betas=(0.9, 0.999))
|
||||
optimizer.step(loss)
|
||||
```
|
||||
optimizer = nn.RMSprop(model.parameters(), lr)
|
||||
optimizer.step(loss)
|
||||
"""
|
||||
def __init__(self, params, lr=1e-2, eps=1e-8, alpha=0.99):
|
||||
# def __init__(self, params, lr, eps=1e-8, betas=(0.9, 0.999), weight_decay=0):
|
||||
super().__init__(params, lr)
|
||||
self.eps = eps
|
||||
self.alpha = alpha
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
# ***************************************************************
|
||||
# Copyright (c) Jittor 2020, Author:
|
||||
# All Rights Reserved.
|
||||
# This file is subject to the terms and conditions defined in
|
||||
# file 'LICENSE.txt', which is part of this source code package.
|
||||
# ***************************************************************
|
||||
import jittor as jt
|
||||
import unittest
|
||||
import numpy as np
|
||||
from jittor import models
|
||||
|
||||
pass_this_test = False
|
||||
try:
|
||||
jt.dirty_fix_pytorch_runtime_error()
|
||||
import torch
|
||||
import torchvision
|
||||
except Exception as e:
|
||||
pass_this_test = True
|
||||
|
||||
def get_error(a, b):
|
||||
return np.abs(a-b) / max(np.abs(a), np.abs(b), 1e-5) , np.abs(a-b)
|
||||
|
||||
def check(jt_mod, torch_mod, rtol=1e-2, atol=1e-5, mean_atol=1e-5):
|
||||
pa = [ p for p in jt_mod.parameters() if not p.is_stop_grad() ]
|
||||
pb = list(torch_mod.parameters())
|
||||
assert len(pa) == len(pb)
|
||||
error_count = 0
|
||||
for a,b in zip(pa, pb):
|
||||
assert a.shape == list(b.shape), (a.shape, b.shape, a.name())
|
||||
stda, meana = np.std(a.numpy()), np.mean(a.numpy())
|
||||
stdb, meanb = np.std(b.detach().numpy()), np.mean(b.detach().numpy())
|
||||
|
||||
r_err, a_err = get_error(stda, stdb)
|
||||
if r_err > rtol and a_err > atol:
|
||||
error_count += 1
|
||||
print("compare std error", stda, stdb, r_err, a_err, a.name(), a.shape)
|
||||
|
||||
r_err, a_err = get_error(meana, meanb)
|
||||
if r_err > rtol and a_err > mean_atol:
|
||||
error_count += 1
|
||||
print("compare mean error", meana, meanb, r_err, a_err, a.name(), a.shape)
|
||||
assert error_count == 0
|
||||
|
||||
@unittest.skipIf(pass_this_test, f"pass init check, no torch found")
|
||||
class TestInit(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
jt.seed(0)
|
||||
np.random.seed(0)
|
||||
torch.manual_seed(0)
|
||||
|
||||
def test_conv(self):
|
||||
check(jt.nn.Conv(64, 256, 3), torch.nn.Conv2d(64, 256, 3), rtol=1e-1, mean_atol=1e-3)
|
||||
|
||||
def test_resnet(self):
|
||||
check(models.resnet152(), torchvision.models.resnet152(), rtol=2e-2, mean_atol=1e-2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
|
@ -41,6 +41,8 @@ static void move_rely(KernelIR* inner_loop, KernelIR* outer_loop, KernelIR* def)
|
|||
}
|
||||
}
|
||||
|
||||
// sorder: Array that saves the allocation order of "tn"
|
||||
// sfunc: Array of function names
|
||||
static void tune_atomic(Pass* pass, KernelIR* ir, bool is_cuda, int tdim, vector<vector<int>> &sorder, vector<string> &sfunc) {
|
||||
LOGvvvv << "tune_atomic" << ir->children;
|
||||
vector<string> relys;
|
||||
|
@ -224,6 +226,19 @@ void AtomicTunerPass::run() {
|
|||
if (func_call->get_attr("dtype") != "__global__ void") continue;
|
||||
tune_atomic(this, func_call.get(), is_cuda, 4, sorder, sfunc);
|
||||
}
|
||||
|
||||
// Re-adjust the allocation order of "tn" according to the situation of atomic coverage, preferentially allocate the range not covered by atomic, for example:
|
||||
// for (op0_index_t id0 = tid0; id0<range0; id0+=tnum0) {
|
||||
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
|
||||
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
|
||||
// for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
|
||||
// ...
|
||||
// }
|
||||
// }
|
||||
// atomicAdd(...);
|
||||
// }
|
||||
// }
|
||||
// The allocation order of "tn" will be: tn1, tn0, tn3, tn2
|
||||
for (uint j=0;j<sfunc.size();j++)
|
||||
for (uint i=0; i<ir->children.size(); i++) {
|
||||
auto& func_call = ir->children[i];
|
||||
|
|
|
@ -343,9 +343,12 @@ void ParallelPass::run() {
|
|||
new_block.swap(*func_call, true);
|
||||
auto code = func_def->to_string();
|
||||
bool has_atomic = code.find("atomic") != string::npos;
|
||||
if (has_atomic && !fix_thread_num) {
|
||||
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
|
||||
}
|
||||
if (!fix_thread_num) {
|
||||
if (has_atomic) {
|
||||
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
|
||||
} else {
|
||||
func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + ")-2),0)," + S(thread_num) + ")";
|
||||
}
|
||||
}
|
||||
ir->remove_all_unused();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue