add rnncell and grucell

2021-04-29 16:08:05 +08:00 · 2021-04-29 16:08:05 +08:00 · 46e6a66411
parent 295393b556
commit 46e6a66411
2 changed files with 169 additions and 1 deletions
--- a/python/jittor/nn.py
+++ b/python/jittor/nn.py
@ -1460,7 +1460,7 @@ class LSTMCell(jt.Module):
    def execute(self, input, hx = None):
        if hx is None:
-            zeros = jt.zeros(input.shape[0], self.hidden_size, dtype=input.dtype)
+            zeros = jt.zeros((input.shape[0], self.hidden_size), dtype=input.dtype)
            h, c = zeros, zeros
        else:
            h, c = hx
@ -1481,6 +1481,120 @@ class LSTMCell(jt.Module):
        return h, c
 class RNNCell(jt.Module):
    def __init__(self, input_size, hidden_size, bias=True, nonlinearity = "tanh"):
        ''' An Elman RNN cell with tanh or ReLU non-linearity.
        :param input_size: The number of expected features in the input
        :type input_size: int
        :param hidden_size: The number of features in the hidden state
        :type hidden_size: int
        :param bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True.
        :type bias: bool, optional
        :param nonlinearity: The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'.
        :type nonlinearity: str, optional
        Example:
        >>> rnn = nn.RNNCell(10, 20)
        >>> input = jt.randn((6, 3, 10))
        >>> hx = jt.randn((3, 20))
        >>> output = []
        >>> for i in range(6):
                hx = rnn(input[i], hx)
                output.append(hx)
        '''
        super().__init__()
        self.hidden_size = hidden_size
        self.bias = bias
        self.nonlinearity = nonlinearity
        k = math.sqrt(1 / hidden_size)
        self.weight_ih = init.uniform((hidden_size, input_size), 'float32', -k, k)
        self.weight_hh = init.uniform((hidden_size, hidden_size), 'float32', -k, k)
        if bias:
            self.bias_ih = init.uniform((hidden_size,), 'float32', -k, k)
            self.bias_hh = init.uniform((hidden_size,), 'float32', -k, k)
    def execute(self, input, hx = None):
        if hx is None:
            hx = jt.zeros((input.shape[0], self.hidden_size), dtype=input.dtype)
        y = matmul_transpose(input, self.weight_ih)+matmul_transpose(hx, self.weight_hh)
        if self.bias:
            y= y + self.bias_ih + self.bias_hh
        if self.nonlinearity == 'tanh':
            y = y.tanh()
        elif self.nonlinearity == 'relu':
            y = relu(y) 
        else:
            raise RuntimeError("Unknown nonlinearity: {}".format(self.nonlinearity))
        return y
 class GRUCell(jt.Module):
    def __init__(self, input_size, hidden_size, bias=True):
        ''' A gated recurrent unit (GRU) cell.
        :param input_size: The number of expected features in the input
        :type input_size: int
        :param hidden_size: The number of features in the hidden state
        :type hidden_size: int
        :param bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True.
        :type bias: bool, optional
        Example:
        >>> rnn = nn.GRUCell(10, 20)
        >>> input = jt.randn((6, 3, 10))
        >>> hx = jt.randn((3, 20))
        >>> output = []
        >>> for i in range(6):
                hx = rnn(input[i], hx)
                output.append(hx)
        '''
        super().__init__()
        self.hidden_size = hidden_size
        self.bias = bias
        k = math.sqrt(1 / hidden_size)
        self.weight_ih = init.uniform((3*hidden_size, input_size), 'float32', -k, k)
        self.weight_hh = init.uniform((3*hidden_size, hidden_size), 'float32', -k, k)
        if bias:
            self.bias_ih = init.uniform((3*hidden_size,), 'float32', -k, k)
            self.bias_hh = init.uniform((3*hidden_size,), 'float32', -k, k)
    def execute(self, input, hx = None):
        if hx is None:
            hx = jt.zeros((input.shape[0], self.hidden_size), dtype=input.dtype)
        gi = matmul_transpose(input, self.weight_ih)
        gh = matmul_transpose(hx, self.weight_hh)
        if self.bias:
            gi += self.bias_ih
            gh += self.bias_hh
        i_r, i_i, i_n = gi.chunk(3, 1)
        h_r, h_i, h_n = gh.chunk(3, 1)
        resetgate = jt.sigmoid(i_r + h_r)
        inputgate = jt.sigmoid(i_i + h_i)
        newgate = jt.tanh(i_n + resetgate * h_n)
        hy = newgate + inputgate * (hx - newgate)
        return hy
 class RNNBase(Module):
    def __init__(self, mode: str, input_size: int, hidden_size: int, 
            num_layers: int = 1, bias: bool = True, batch_first: bool = False, 
--- a/python/jittor/test/test_rnn.py
+++ b/python/jittor/test/test_rnn.py
@ -119,6 +119,60 @@ class TestRNN(unittest.TestCase):
        j_output = j_output.data
        assert np.allclose(t_output, j_output, rtol=1e-03, atol=1e-06)
    def test_rnn_cell(self):
        np_h0 = torch.randn(3, 20).numpy()
        t_rnn = tnn.RNNCell(10, 20)
        input = torch.randn(2, 3, 10)
        h0 = torch.from_numpy(np_h0)
        t_output = []
        for i in range(input.size()[0]):
            h0 = t_rnn(input[i], h0)
            t_output.append(h0)
        t_output = torch.stack(t_output, dim=0)
        j_rnn = nn.RNNCell(10, 20)
        j_rnn.load_state_dict(t_rnn.state_dict())
        input = jt.float32(input.numpy())
        h0 = jt.float32(np_h0)
        j_output = []
        for i in range(input.size()[0]):
            h0 = j_rnn(input[i], h0)
            j_output.append(h0)
        j_output = jt.stack(j_output, dim=0)
        t_output = t_output.detach().numpy()
        j_output = j_output.data
        assert np.allclose(t_output, j_output, rtol=1e-03, atol=1e-06)
    def test_gru_cell(self):
        np_h0 = torch.randn(3, 20).numpy()
        t_rnn = tnn.GRUCell(10, 20)
        input = torch.randn(2, 3, 10)
        h0 = torch.from_numpy(np_h0)
        t_output = []
        for i in range(input.size()[0]):
            h0 = t_rnn(input[i], h0)
            t_output.append(h0)
        t_output = torch.stack(t_output, dim=0)
        j_rnn = nn.GRUCell(10, 20)
        j_rnn.load_state_dict(t_rnn.state_dict())
        input = jt.float32(input.numpy())
        h0 = jt.float32(np_h0)
        j_output = []
        for i in range(input.size()[0]):
            h0 = j_rnn(input[i], h0)
            j_output.append(h0)
        j_output = jt.stack(j_output, dim=0)
        t_output = t_output.detach().numpy()
        j_output = j_output.data
        assert np.allclose(t_output, j_output, rtol=1e-03, atol=1e-06)
    def test_lstm(self):
        h0 = np.random.rand(1, 24, 200).astype(np.float32)
        c0 = np.random.rand(1, 24, 200).astype(np.float32)