some comment

2020-06-03 16:38:47 +08:00 · 2020-06-03 16:38:47 +08:00 · 8a4113ed67
parent b8f67748aa
commit 8a4113ed67
4 changed files with 29 additions and 67 deletions
--- a/python/jittor/optim.py
+++ b/python/jittor/optim.py
@ -134,14 +134,17 @@ class SGD(Optimizer):

 class RMSprop(Optimizer):
    """ RMSprop Optimizer.
+    Args:
+        params(list): parameters of model.
+        lr(float): learning rate.
+        eps(float): term added to the denominator to avoid division by zero, default 1e-8.
+        alpha(float): smoothing constant, default 0.99.
+
    Example:
-    ```
-    optimizer = nn.RMSprop(model.parameters(), lr, eps=1e-8, betas=(0.9, 0.999))
-    optimizer.step(loss)
-    ```
+        optimizer = nn.RMSprop(model.parameters(), lr)
+        optimizer.step(loss)
    """
    def __init__(self, params, lr=1e-2, eps=1e-8, alpha=0.99):
-    # def __init__(self, params, lr, eps=1e-8, betas=(0.9, 0.999), weight_decay=0):
        super().__init__(params, lr)
        self.eps = eps
        self.alpha = alpha
--- a/python/jittor/test/test_init.py
+++ b/python/jittor/test/test_init.py
@ -1,59 +0,0 @@
-# ***************************************************************
-# Copyright (c) Jittor 2020, Author:
-# All Rights Reserved.
-# This file is subject to the terms and conditions defined in
-# file 'LICENSE.txt', which is part of this source code package.
-# ***************************************************************
-import jittor as jt
-import unittest
-import numpy as np
-from jittor import models
-
-pass_this_test = False
-try:
-    jt.dirty_fix_pytorch_runtime_error()
-    import torch
-    import torchvision
-except Exception as e:
-    pass_this_test = True
-
-def get_error(a, b):
-    return np.abs(a-b) / max(np.abs(a), np.abs(b), 1e-5) , np.abs(a-b)
-
-def check(jt_mod, torch_mod, rtol=1e-2, atol=1e-5, mean_atol=1e-5):
-    pa = [ p for p in jt_mod.parameters() if not p.is_stop_grad() ]
-    pb = list(torch_mod.parameters())
-    assert len(pa) == len(pb)
-    error_count = 0
-    for a,b in zip(pa, pb):
-        assert a.shape == list(b.shape), (a.shape, b.shape, a.name())
-        stda, meana = np.std(a.numpy()), np.mean(a.numpy())
-        stdb, meanb = np.std(b.detach().numpy()), np.mean(b.detach().numpy())
-
-        r_err, a_err = get_error(stda, stdb)
-        if r_err > rtol and a_err > atol:
-            error_count += 1
-            print("compare std error", stda, stdb, r_err, a_err, a.name(), a.shape)
-
-        r_err, a_err = get_error(meana, meanb)
-        if r_err > rtol and a_err > mean_atol:
-            error_count += 1
-            print("compare mean error", meana, meanb, r_err, a_err, a.name(), a.shape)
-    assert error_count == 0
-
-@unittest.skipIf(pass_this_test, f"pass init check, no torch found")
-class TestInit(unittest.TestCase):
-    @classmethod
-    def setUpClass(self):
-        jt.seed(0)
-        np.random.seed(0)
-        torch.manual_seed(0)
-
-    def test_conv(self):
-        check(jt.nn.Conv(64, 256, 3), torch.nn.Conv2d(64, 256, 3), rtol=1e-1, mean_atol=1e-3)
-
-    def test_resnet(self):
-        check(models.resnet152(), torchvision.models.resnet152(), rtol=2e-2, mean_atol=1e-2)
-
-if __name__ == "__main__":
-    unittest.main()
--- a/src/opt/pass/atomic_tuner_pass.cc
+++ b/src/opt/pass/atomic_tuner_pass.cc
@ -41,6 +41,8 @@ static void move_rely(KernelIR* inner_loop, KernelIR* outer_loop, KernelIR* def)
    }
 }

+// sorder: Array that saves the allocation order of "tn"
+// sfunc: Array of function names
 static void tune_atomic(Pass* pass, KernelIR* ir, bool is_cuda, int tdim, vector<vector<int>> &sorder, vector<string> &sfunc) {
    LOGvvvv << "tune_atomic" << ir->children;
    vector<string> relys;
@ -224,6 +226,19 @@ void AtomicTunerPass::run() {
        if (func_call->get_attr("dtype") != "__global__ void") continue;
        tune_atomic(this, func_call.get(), is_cuda, 4, sorder, sfunc);
    }
+
+    // Re-adjust the allocation order of "tn" according to the situation of atomic coverage, preferentially allocate the range not covered by atomic, for example:
+    // for (op0_index_t id0 = tid0; id0<range0; id0+=tnum0) {
+    //     for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
+    //         for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
+    //             for (op0_index_t id1 = tid1; id1<range1; id1+=tnum1) {
+    //                 ...
+    //             }
+    //         }
+    //         atomicAdd(...);
+    //     }
+    // }
+    // The allocation order of "tn" will be: tn1, tn0, tn3, tn2
    for (uint j=0;j<sfunc.size();j++)
        for (uint i=0; i<ir->children.size(); i++) {
            auto& func_call = ir->children[i];
--- a/src/opt/pass/parallel_pass.cc
+++ b/src/opt/pass/parallel_pass.cc
@ -343,9 +343,12 @@ void ParallelPass::run() {
        new_block.swap(*func_call, true);
        auto code = func_def->to_string(); 
        bool has_atomic = code.find("atomic") != string::npos;
-        if (has_atomic && !fix_thread_num) {
-            func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
-        }
+        if (!fix_thread_num) {
+            if (has_atomic) {
+                func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + "/16)-2),0)," + S(thread_num) + ")";
+            } else {
+                func_call->find_define("thread_num")->attrs["rvalue"] = "min(1<<max((NanoVector::get_nbits(" + nums + ")-2),0)," + S(thread_num) + ")";
+            }
    }
    ir->remove_all_unused();
 }