From 6e08781df42059bfe5731715d6acf32d33661347 Mon Sep 17 00:00:00 2001 From: zwy <576825820@qq.com> Date: Sun, 19 Apr 2020 22:01:32 +0800 Subject: [PATCH] models & converter & flatten/view/permute/adapool --- python/jittor/__init__.py | 29 +- python/jittor/models/__init__.py | 9 +- python/jittor/models/alexnet.py | 53 ++ python/jittor/models/googlenet.py | 143 ++++ python/jittor/models/inception.py | 268 +++++++ python/jittor/models/mnasnet.py | 99 +++ python/jittor/models/mobilenet.py | 88 +++ python/jittor/models/resnet.py | 281 +++---- python/jittor/models/shufflenetv2.py | 106 +++ python/jittor/models/squeezenet.py | 90 +++ python/jittor/models/vgg.py | 33 +- python/jittor/nn.py | 53 +- python/jittor/pool.py | 33 +- python/jittor/test/test_models.py | 116 +++ python/jittor/test/test_pytorch_converter.py | 514 ++++++------- python/jittor/test/test_pytorch_converter2.py | 264 ------- python/jittor/utils/pytorch_converter.py | 688 +++++++++++------- python/jittor/utils/pytorch_converter2.py | 117 --- src/var_holder.h | 13 + 19 files changed, 1818 insertions(+), 1179 deletions(-) create mode 100644 python/jittor/models/alexnet.py create mode 100644 python/jittor/models/googlenet.py create mode 100644 python/jittor/models/inception.py create mode 100644 python/jittor/models/mnasnet.py create mode 100644 python/jittor/models/mobilenet.py create mode 100644 python/jittor/models/shufflenetv2.py create mode 100644 python/jittor/models/squeezenet.py create mode 100644 python/jittor/test/test_models.py delete mode 100644 python/jittor/test/test_pytorch_converter2.py delete mode 100644 python/jittor/utils/pytorch_converter2.py diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py index 6598fd5e..3b253556 100644 --- a/python/jittor/__init__.py +++ b/python/jittor/__init__.py @@ -340,6 +340,32 @@ def detach(x): return x.clone().stop_grad().clone() Var.detach = detach +def view(x, *shape): + if isinstance(shape[0], tuple): + shape = shape[0] + return x.reshape(shape) +Var.view = view + +def permute(x, *dim): + if isinstance(dim[0], tuple): + dim = dim[0] + return transpose(x, dim) +Var.permute = permute + +def flatten(input, start_dim=0, end_dim=-1): + in_shape = input.shape + start_dim = len(in_shape) + start_dim if start_dim < 0 else start_dim + end_dim = len(in_shape) + end_dim if end_dim < 0 else end_dim + assert end_dim > start_dim, "end_dim should be larger than start_dim for flatten function" + out_shape = [] + for i in range(0,start_dim,1): out_shape.append(in_shape[i]) + dims = 1 + for i in range(start_dim, end_dim+1, 1): dims *= in_shape[i] + out_shape.append(dims) + for i in range(end_dim+1,len(in_shape),1): out_shape.append(in_shape[i]) + return input.reshape(out_shape) +Var.flatten = flatten + def detach_inplace(x): return x.swap(x.stop_grad().clone()) Var.start_grad = Var.detach_inplace = detach_inplace @@ -537,7 +563,8 @@ class Module: end = 1 break if end ==1: - print(f'init {key} fail ...') + # print(f'init {key} fail ...') + pass else: # print(f'init {key} success ...') if isinstance(params[key], np.ndarray) or isinstance(params[key], list): diff --git a/python/jittor/models/__init__.py b/python/jittor/models/__init__.py index ee464188..f604a905 100644 --- a/python/jittor/models/__init__.py +++ b/python/jittor/models/__init__.py @@ -1,2 +1,9 @@ from . import resnet -from . import vgg \ No newline at end of file +from . import vgg +from . import alexnet +from . import squeezenet +from . import inception +from . import googlenet +from . import mobilenet +from . import mnasnet +from . import shufflenetv2 \ No newline at end of file diff --git a/python/jittor/models/alexnet.py b/python/jittor/models/alexnet.py new file mode 100644 index 00000000..afe90854 --- /dev/null +++ b/python/jittor/models/alexnet.py @@ -0,0 +1,53 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. +import jittor as jt +import jittor.nn as nn + +__all__ = ['AlexNet', 'alexnet'] + +class AlexNet(nn.Module): + + def __init__(self, num_classes=1000): + super(AlexNet, self).__init__() + self.features = nn.Sequential( + nn.Conv(3, 64, kernel_size=11, stride=4, padding=2), + nn.Relu(), + nn.Pool(kernel_size=3, stride=2, op='maximum'), + nn.Conv(64, 192, kernel_size=5, padding=2), + nn.Relu(), nn.Pool(kernel_size=3, stride=2, op='maximum'), + nn.Conv(192, 384, kernel_size=3, padding=1), + nn.Relu(), + nn.Conv(384, 256, kernel_size=3, padding=1), + nn.Relu(), + nn.Conv(256, 256, kernel_size=3, padding=1), + nn.Relu(), + nn.Pool(kernel_size=3, stride=2, op='maximum') + ) + self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(((256 * 6) * 6), 4096), + nn.Relu(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.Relu(), + nn.Linear(4096, num_classes) + ) + + def execute(self, x): + x = self.features(x) + x = self.avgpool(x) + x = jt.reshape(x, (x.shape[0], (- 1))) + x = self.classifier(x) + return x + +def alexnet(**kwargs): + model = AlexNet(**kwargs) + return model diff --git a/python/jittor/models/googlenet.py b/python/jittor/models/googlenet.py new file mode 100644 index 00000000..26f4b933 --- /dev/null +++ b/python/jittor/models/googlenet.py @@ -0,0 +1,143 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. +import jittor as jt +from jittor import nn + +__all__ = ['GoogLeNet', 'googlenet'] + +def googlenet(**kwargs): + return GoogLeNet(**kwargs) + +class GoogLeNet(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=True, init_weights=True, blocks=None): + super(GoogLeNet, self).__init__() + if (blocks is None): + blocks = [BasicConv2d, Inception, InceptionAux] + assert (len(blocks) == 3) + conv_block = blocks[0] + inception_block = blocks[1] + inception_aux_block = blocks[2] + self.aux_logits = aux_logits + self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3) + self.maxpool1 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum') + self.conv2 = conv_block(64, 64, kernel_size=1) + self.conv3 = conv_block(64, 192, kernel_size=3, padding=1) + self.maxpool2 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum') + self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32) + self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64) + self.maxpool3 = nn.Pool(3, stride=2, ceil_mode=True, op='maximum') + self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64) + self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64) + self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64) + self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64) + self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128) + self.maxpool4 = nn.Pool(2, stride=2, ceil_mode=True, op='maximum') + self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128) + self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128) + if aux_logits: + self.aux1 = inception_aux_block(512, num_classes) + self.aux2 = inception_aux_block(528, num_classes) + else: + self.aux1 = None + self.aux2 = None + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.dropout = nn.Dropout(0.2) + self.fc = nn.Linear(1024, num_classes) + + def _forward(self, x): + x = self.conv1(x) + x = self.maxpool1(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.maxpool2(x) + x = self.inception3a(x) + x = self.inception3b(x) + x = self.maxpool3(x) + x = self.inception4a(x) + if (self.aux1 is not None): + aux1 = self.aux1(x) + x = self.inception4b(x) + x = self.inception4c(x) + x = self.inception4d(x) + if (self.aux2 is not None): + aux2 = self.aux2(x) + x = self.inception4e(x) + x = self.maxpool4(x) + x = self.inception5a(x) + x = self.inception5b(x) + x = self.avgpool(x) + + x = jt.reshape(x, (x.shape[0], (- 1))) + x = self.dropout(x) + x = self.fc(x) + return (x, aux2, aux1) + + def eager_outputs(self, x, aux2, aux1): + return x + + def execute(self, x): + (x, aux1, aux2) = self._forward(x) + aux_defined = (self.aux_logits) + return self.eager_outputs(x, aux2, aux1) + +class Inception(nn.Module): + + def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, conv_block=None): + super(Inception, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1) + self.branch2 = nn.Sequential(conv_block(in_channels, ch3x3red, kernel_size=1), conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)) + self.branch3 = nn.Sequential(conv_block(in_channels, ch5x5red, kernel_size=1), conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1)) + self.branch4 = nn.Sequential(nn.Pool(kernel_size=3, stride=1, padding=1, ceil_mode=True, op='maximum'), conv_block(in_channels, pool_proj, kernel_size=1)) + + def _forward(self, x): + branch1 = self.branch1(x) + branch2 = self.branch2(x) + branch3 = self.branch3(x) + branch4 = self.branch4(x) + outputs = [branch1, branch2, branch3, branch4] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.conv = conv_block(in_channels, 128, kernel_size=1) + self.fc1 = nn.Linear(2048, 1024) + self.fc2 = nn.Linear(1024, num_classes) + + def execute(self, x): + x = nn.AdaptiveAvgPool2d(4)(x) + x = self.conv(x) + x = jt.reshape(x, (x.shape[0], (- 1))) + x = nn.relu(self.fc1(x)) + x = nn.Dropout(0.7)(x) + x = self.fc2(x) + return x + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm(out_channels, eps=0.001) + + def execute(self, x): + x = self.conv(x) + x = self.bn(x) + return nn.relu(x) diff --git a/python/jittor/models/inception.py b/python/jittor/models/inception.py new file mode 100644 index 00000000..7fd154f8 --- /dev/null +++ b/python/jittor/models/inception.py @@ -0,0 +1,268 @@ + +import jittor as jt +from jittor import nn +__all__ = ['Inception3', 'inception_v3'] + +def inception_v3(pretrained=False, progress=True, **kwargs): + return Inception3(**kwargs) + +class Inception3(nn.Module): + + def __init__(self, num_classes=1000, aux_logits=True, inception_blocks=None, init_weights=True): + super(Inception3, self).__init__() + if (inception_blocks is None): + inception_blocks = [BasicConv2d, InceptionA, InceptionB, InceptionC, InceptionD, InceptionE, InceptionAux] + assert (len(inception_blocks) == 7) + conv_block = inception_blocks[0] + inception_a = inception_blocks[1] + inception_b = inception_blocks[2] + inception_c = inception_blocks[3] + inception_d = inception_blocks[4] + inception_e = inception_blocks[5] + inception_aux = inception_blocks[6] + self.aux_logits = aux_logits + self.Conv2d_1a_3x3 = conv_block(3, 32, kernel_size=3, stride=2) + self.Conv2d_2a_3x3 = conv_block(32, 32, kernel_size=3) + self.Conv2d_2b_3x3 = conv_block(32, 64, kernel_size=3, padding=1) + self.Conv2d_3b_1x1 = conv_block(64, 80, kernel_size=1) + self.Conv2d_4a_3x3 = conv_block(80, 192, kernel_size=3) + self.Mixed_5b = inception_a(192, pool_features=32) + self.Mixed_5c = inception_a(256, pool_features=64) + self.Mixed_5d = inception_a(288, pool_features=64) + self.Mixed_6a = inception_b(288) + self.Mixed_6b = inception_c(768, channels_7x7=128) + self.Mixed_6c = inception_c(768, channels_7x7=160) + self.Mixed_6d = inception_c(768, channels_7x7=160) + self.Mixed_6e = inception_c(768, channels_7x7=192) + if aux_logits: + self.AuxLogits = inception_aux(768, num_classes) + self.Mixed_7a = inception_d(768) + self.Mixed_7b = inception_e(1280) + self.Mixed_7c = inception_e(2048) + self.fc = nn.Linear(2048, num_classes) + + def _forward(self, x): + x = self.Conv2d_1a_3x3(x) + x = self.Conv2d_2a_3x3(x) + x = self.Conv2d_2b_3x3(x) + x = nn.pool(x, 3, "maximum", stride=2) + x = self.Conv2d_3b_1x1(x) + x = self.Conv2d_4a_3x3(x) + x = nn.pool(x, 3, "maximum", stride=2) + x = self.Mixed_5b(x) + x = self.Mixed_5c(x) + x = self.Mixed_5d(x) + x = self.Mixed_6a(x) + x = self.Mixed_6b(x) + x = self.Mixed_6c(x) + x = self.Mixed_6d(x) + x = self.Mixed_6e(x) + aux_defined = self.aux_logits + if aux_defined: + aux = self.AuxLogits(x) + else: + aux = None + x = self.Mixed_7a(x) + x = self.Mixed_7b(x) + x = self.Mixed_7c(x) + x = nn.AdaptiveAvgPool2d(1)(x) + x = nn.Dropout()(x) + x = jt.reshape(x, (x.shape[0], (- 1))) + x = self.fc(x) + return (x, aux) + + def eager_outputs(self, x, aux): + return x + + def execute(self, x): + (x, aux) = self._forward(x) + aux_defined = self.aux_logits + return self.eager_outputs(x, aux) + +class InceptionA(nn.Module): + + def __init__(self, in_channels, pool_features, conv_block=None): + super(InceptionA, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 64, kernel_size=1) + self.branch5x5_1 = conv_block(in_channels, 48, kernel_size=1) + self.branch5x5_2 = conv_block(48, 64, kernel_size=5, padding=2) + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, padding=1) + self.branch_pool = conv_block(in_channels, pool_features, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + branch5x5 = self.branch5x5_1(x) + branch5x5 = self.branch5x5_2(branch5x5) + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + branch_pool = nn.pool(x, 3, "mean", stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionB(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionB, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch3x3 = conv_block(in_channels, 384, kernel_size=3, stride=2) + self.branch3x3dbl_1 = conv_block(in_channels, 64, kernel_size=1) + self.branch3x3dbl_2 = conv_block(64, 96, kernel_size=3, padding=1) + self.branch3x3dbl_3 = conv_block(96, 96, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3(x) + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl) + branch_pool = nn.pool(x, 3, "maximum", stride=2) + outputs = [branch3x3, branch3x3dbl, branch_pool] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionC(nn.Module): + + def __init__(self, in_channels, channels_7x7, conv_block=None): + super(InceptionC, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 192, kernel_size=1) + c7 = channels_7x7 + self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1) + self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + branch7x7 = self.branch7x7_1(x) + branch7x7 = self.branch7x7_2(branch7x7) + branch7x7 = self.branch7x7_3(branch7x7) + branch7x7dbl = self.branch7x7dbl_1(x) + branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl) + branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl) + branch_pool = nn.pool(x, kernel_size=3, op="mean", stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionD(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionD, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch3x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch3x3_2 = conv_block(192, 320, kernel_size=3, stride=2) + self.branch7x7x3_1 = conv_block(in_channels, 192, kernel_size=1) + self.branch7x7x3_2 = conv_block(192, 192, kernel_size=(1, 7), padding=(0, 3)) + self.branch7x7x3_3 = conv_block(192, 192, kernel_size=(7, 1), padding=(3, 0)) + self.branch7x7x3_4 = conv_block(192, 192, kernel_size=3, stride=2) + + def _forward(self, x): + branch3x3 = self.branch3x3_1(x) + branch3x3 = self.branch3x3_2(branch3x3) + branch7x7x3 = self.branch7x7x3_1(x) + branch7x7x3 = self.branch7x7x3_2(branch7x7x3) + branch7x7x3 = self.branch7x7x3_3(branch7x7x3) + branch7x7x3 = self.branch7x7x3_4(branch7x7x3) + branch_pool = nn.pool(x, kernel_size=3, op="maximum", stride=2) + outputs = [branch3x3, branch7x7x3, branch_pool] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionE(nn.Module): + + def __init__(self, in_channels, conv_block=None): + super(InceptionE, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.branch1x1 = conv_block(in_channels, 320, kernel_size=1) + self.branch3x3_1 = conv_block(in_channels, 384, kernel_size=1) + self.branch3x3_2a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3_2b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + self.branch3x3dbl_1 = conv_block(in_channels, 448, kernel_size=1) + self.branch3x3dbl_2 = conv_block(448, 384, kernel_size=3, padding=1) + self.branch3x3dbl_3a = conv_block(384, 384, kernel_size=(1, 3), padding=(0, 1)) + self.branch3x3dbl_3b = conv_block(384, 384, kernel_size=(3, 1), padding=(1, 0)) + self.branch_pool = conv_block(in_channels, 192, kernel_size=1) + + def _forward(self, x): + branch1x1 = self.branch1x1(x) + branch3x3 = self.branch3x3_1(x) + branch3x3 = [self.branch3x3_2a(branch3x3), self.branch3x3_2b(branch3x3)] + branch3x3 = jt.contrib.concat(branch3x3, dim=1) + branch3x3dbl = self.branch3x3dbl_1(x) + branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl) + branch3x3dbl = [self.branch3x3dbl_3a(branch3x3dbl), self.branch3x3dbl_3b(branch3x3dbl)] + branch3x3dbl = jt.contrib.concat(branch3x3dbl, dim=1) + branch_pool = nn.pool(x, kernel_size=3, op="mean", stride=1, padding=1) + branch_pool = self.branch_pool(branch_pool) + outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool] + return outputs + + def execute(self, x): + outputs = self._forward(x) + return jt.contrib.concat(outputs, dim=1) + +class InceptionAux(nn.Module): + + def __init__(self, in_channels, num_classes, conv_block=None): + super(InceptionAux, self).__init__() + if (conv_block is None): + conv_block = BasicConv2d + self.conv0 = conv_block(in_channels, 128, kernel_size=1) + self.conv1 = conv_block(128, 768, kernel_size=5) + self.conv1.stddev = 0.01 + self.fc = nn.Linear(768, num_classes) + self.fc.stddev = 0.001 + + def execute(self, x): + x = nn.pool(x, kernel_size=5, op="mean", stride=3) + x = self.conv0(x) + x = self.conv1(x) + + + x = nn.AdaptiveAvgPool2d(1)(x) + x = jt.reshape(x, (x.shape[0], (- 1))) + x = self.fc(x) + return x + +class BasicConv2d(nn.Module): + + def __init__(self, in_channels, out_channels, **kwargs): + super(BasicConv2d, self).__init__() + self.conv = nn.Conv(in_channels, out_channels, bias=False, **kwargs) + self.bn = nn.BatchNorm(out_channels, eps=0.001) + + def execute(self, x): + x = self.conv(x) + x = self.bn(x) + return nn.relu(x) diff --git a/python/jittor/models/mnasnet.py b/python/jittor/models/mnasnet.py new file mode 100644 index 00000000..be8ea873 --- /dev/null +++ b/python/jittor/models/mnasnet.py @@ -0,0 +1,99 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. + +import jittor as jt +from jittor import nn +__all__ = ['MNASNet', 'mnasnet0_5', 'mnasnet0_75', 'mnasnet1_0', 'mnasnet1_3'] +_BN_MOMENTUM = (1 - 0.9997) + +class _InvertedResidual(nn.Module): + + def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, bn_momentum=0.1): + super(_InvertedResidual, self).__init__() + assert (stride in [1, 2]) + assert (kernel_size in [3, 5]) + mid_ch = (in_ch * expansion_factor) + self.apply_residual = ((in_ch == out_ch) and (stride == 1)) + self.layers = nn.Sequential(nn.Conv(in_ch, mid_ch, 1, bias=False), nn.BatchNorm(mid_ch, momentum=bn_momentum), nn.Relu(), nn.Conv(mid_ch, mid_ch, kernel_size, padding=(kernel_size // 2), stride=stride, groups=mid_ch, bias=False), nn.BatchNorm(mid_ch, momentum=bn_momentum), nn.Relu(), nn.Conv(mid_ch, out_ch, 1, bias=False), nn.BatchNorm(out_ch, momentum=bn_momentum)) + + def execute(self, input): + if self.apply_residual: + return (self.layers(input) + input) + else: + return self.layers(input) + +def _stack(in_ch, out_ch, kernel_size, stride, exp_factor, repeats, bn_momentum): + assert (repeats >= 1) + first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, bn_momentum=bn_momentum) + remaining = [] + for _ in range(1, repeats): + remaining.append(_InvertedResidual(out_ch, out_ch, kernel_size, 1, exp_factor, bn_momentum=bn_momentum)) + return nn.Sequential(first, *remaining) + +def _round_to_multiple_of(val, divisor, round_up_bias=0.9): + assert (0.0 < round_up_bias < 1.0) + new_val = max(divisor, ((int((val + (divisor / 2))) // divisor) * divisor)) + return (new_val if (new_val >= (round_up_bias * val)) else (new_val + divisor)) + +def _get_depths(alpha): + depths = [24, 40, 80, 96, 192, 320] + return [_round_to_multiple_of((depth * alpha), 8) for depth in depths] + +class MNASNet(nn.Module): + _version = 2 + + def __init__(self, alpha, num_classes=1000, dropout=0.2): + super(MNASNet, self).__init__() + assert (alpha > 0.0) + self.alpha = alpha + self.num_classes = num_classes + depths = _get_depths(alpha) + layers = [ + nn.Conv(3, 32, 3, padding=1, stride=2, bias=False), + nn.BatchNorm(32, momentum=_BN_MOMENTUM), + nn.Relu(), + nn.Conv(32, 32, 3, padding=1, stride=1, groups=32, bias=False), + nn.BatchNorm(32, momentum=_BN_MOMENTUM), + nn.Relu(), + nn.Conv(32, 16, 1, padding=0, stride=1, bias=False), + nn.BatchNorm(16, momentum=_BN_MOMENTUM), + _stack(16, depths[0], 3, 2, 3, 3, _BN_MOMENTUM), + _stack(depths[0], depths[1], 5, 2, 3, 3, _BN_MOMENTUM), + _stack(depths[1], depths[2], 5, 2, 6, 3, _BN_MOMENTUM), + _stack(depths[2], depths[3], 3, 1, 6, 2, _BN_MOMENTUM), + _stack(depths[3], depths[4], 5, 2, 6, 4, _BN_MOMENTUM), + _stack(depths[4], depths[5], 3, 1, 6, 1, _BN_MOMENTUM), + nn.Conv(depths[5], 1280, 1, padding=0, stride=1, bias=False), + nn.BatchNorm(1280, momentum=_BN_MOMENTUM), + nn.Relu() + ] + self.layers = nn.Sequential(*layers) + self.classifier = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(1280, num_classes)) + + def execute(self, x): + x = self.layers(x) + x = x.mean([2, 3]) + return self.classifier(x) + +def mnasnet0_5(**kwargs): + model = MNASNet(0.5, **kwargs) + return model + +def mnasnet0_75(**kwargs): + model = MNASNet(0.75, **kwargs) + return model + +def mnasnet1_0(**kwargs): + model = MNASNet(1.0, **kwargs) + return model + +def mnasnet1_3(**kwargs): + model = MNASNet(1.3, **kwargs) + return model diff --git a/python/jittor/models/mobilenet.py b/python/jittor/models/mobilenet.py new file mode 100644 index 00000000..305c4f86 --- /dev/null +++ b/python/jittor/models/mobilenet.py @@ -0,0 +1,88 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. + +import jittor as jt +from jittor import init +from jittor import nn +__all__ = ['MobileNetV2', 'mobilenet_v2'] + +def _make_divisible(v, divisor, min_value=None): + if (min_value is None): + min_value = divisor + new_v = max(min_value, ((int((v + (divisor / 2))) // divisor) * divisor)) + if (new_v < (0.9 * v)): + new_v += divisor + return new_v + +class ConvBNReLU(nn.Sequential): + + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = ((kernel_size - 1) // 2) + super(ConvBNReLU, self).__init__(nn.Conv(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), nn.BatchNorm(out_planes), nn.ReLU6()) + +class InvertedResidual(nn.Module): + + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert (stride in [1, 2]) + hidden_dim = int(round((inp * expand_ratio))) + self.use_res_connect = ((self.stride == 1) and (inp == oup)) + layers = [] + if (expand_ratio != 1): + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), nn.Conv(hidden_dim, oup, 1, 1, 0, bias=False), nn.BatchNorm(oup)]) + self.conv = nn.Sequential(*layers) + + def execute(self, x): + if self.use_res_connect: + return (x + self.conv(x)) + else: + return self.conv(x) + +class MobileNetV2(nn.Module): + + def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8, block=None): + super(MobileNetV2, self).__init__() + if (block is None): + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + if (inverted_residual_setting is None): + inverted_residual_setting = [[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1]] + if ((len(inverted_residual_setting) == 0) or (len(inverted_residual_setting[0]) != 4)): + raise ValueError('inverted_residual_setting should be non-empty or a 4-element list, got {}'.format(inverted_residual_setting)) + input_channel = _make_divisible((input_channel * width_mult), round_nearest) + self.last_channel = _make_divisible((last_channel * max(1.0, width_mult)), round_nearest) + features = [ConvBNReLU(3, input_channel, stride=2)] + for (t, c, n, s) in inverted_residual_setting: + output_channel = _make_divisible((c * width_mult), round_nearest) + for i in range(n): + stride = (s if (i == 0) else 1) + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + self.features = nn.Sequential(*features) + self.classifier = nn.Sequential(nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes)) + + def _forward_impl(self, x): + x = self.features(x) + x = nn.AdaptiveAvgPool2d(1)(x) + x = jt.reshape(x, (x.shape[0], -1)) + x = self.classifier(x) + return x + + def execute(self, x): + return self._forward_impl(x) + +def mobilenet_v2(): + model = MobileNetV2() + return model + diff --git a/python/jittor/models/resnet.py b/python/jittor/models/resnet.py index cb4fbe7f..eda56056 100644 --- a/python/jittor/models/resnet.py +++ b/python/jittor/models/resnet.py @@ -7,200 +7,127 @@ # This file is subject to the terms and conditions defined in # file 'LICENSE.txt', which is part of this source code package. # *************************************************************** +# This model is generated by pytorch converter. import jittor as jt from jittor import nn -from jittor import Module -@jt.var_scope('basic_block') -def basic_block(x, is_train, in_planes, out_planes, stride = 1): - identity = x - x = nn.conv(x, in_planes, out_planes, 3, 1, stride) - x = nn.batch_norm(x, is_train) - x = nn.relu(x) - x = nn.conv(x, out_planes, out_planes, 3, 1) - x = nn.batch_norm(x, is_train) - if in_planes!=out_planes: - identity = nn.conv(identity, in_planes, out_planes, 1, 0, stride) - identity = nn.batch_norm(identity, is_train) - x = x+identity - x = nn.relu(x) - return x +__all__ = ['ResNet', 'Resnet18', 'Resnet34', 'Resnet50', 'Resnet101', 'Resnet152', 'Resnext50_32x4d', 'Resnext101_32x8d', 'Wide_resnet50_2', 'Wide_resnet101_2'] -@jt.var_scope('make_layer') -def make_layer(x, is_train, out_planes, blocks, layer_in_planes, stride = 1): - x = basic_block(x, is_train, layer_in_planes, out_planes, stride) - layer_in_planes = out_planes +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + return nn.Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, groups=groups, bias=False, dilation=dilation) - for i in range(1, blocks): - x = basic_block(x, is_train, layer_in_planes, out_planes) - return x, layer_in_planes +def conv1x1(in_planes, out_planes, stride=1): + return nn.Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) -@jt.var_scope('bottleneck_block') -def bottleneck_block(x, is_train, in_planes, out_planes, stride = 1): - expansion = 4 - width = out_planes - identity = x - - x = nn.conv(x, in_planes, width, 1, 0) - x = nn.batch_norm(x, is_train) - x = nn.relu(x) - - x = nn.conv(x, width, width, 3, 1, stride) - x = nn.batch_norm(x, is_train) - x = nn.relu(x) - - x = nn.conv(x, width, out_planes * expansion, 1, 0) - x = nn.batch_norm(x, is_train) - - if in_planes != out_planes * expansion: - identity = nn.conv(identity, in_planes, out_planes * expansion, 1, 0, stride) - identity = nn.batch_norm(identity, is_train) - - x = x+identity - x = nn.relu(x) - return x - -@jt.var_scope('make_layer_bottleneck') -def make_layer_bottleneck(x, is_train, out_planes, blocks, layer_in_planes, stride = 1): - expansion = 4 - x = bottleneck_block(x, is_train, layer_in_planes, out_planes, stride) - layer_in_planes = out_planes * expansion - for i in range(1, blocks): - x = bottleneck_block(x, is_train, layer_in_planes, out_planes) - return x, layer_in_planes - -@jt.var_scope('resnet') -def resnet(x, is_train, block, layers, num_classes = 1000): - layer_in_planes = 64 - x = nn.conv(x, 3, layer_in_planes, 7, 3, 2) - x = nn.batch_norm(x, is_train) - x = nn.relu(x) - x = nn.pool(x, 3, "maximum", 1, 2) - x, layer_in_planes = block(x, is_train, 64, layers[0], layer_in_planes) - x, layer_in_planes = block(x, is_train, 128, layers[1], layer_in_planes, 2) - x, layer_in_planes = block(x, is_train, 256, layers[2], layer_in_planes, 2) - x, layer_in_planes = block(x, is_train, 512, layers[3], layer_in_planes, 2) - - x = x.reindex_reduce("add", [x.shape[0],x.shape[1]], ["i0","i1"])/x.shape[2]/x.shape[3] - x = nn.linear(x, num_classes) - - return x - -@jt.var_scope('resnet18', unique=True) -def resnet18(x, is_train): - return resnet(x, is_train, make_layer, [2, 2, 2, 2]) - -@jt.var_scope('resnet34', unique=True) -def resnet34(x, is_train): - return resnet(x, is_train, make_layer, [3, 4, 6, 3]) - -@jt.var_scope('resnet50', unique=True) -def resnet50(x, is_train): - return resnet(x, is_train, make_layer_bottleneck, [3, 4, 6, 3]) - -@jt.var_scope('resnet101', unique=True) -def resnet101(x, is_train): - return resnet(x, is_train, make_layer_bottleneck, [3, 4, 23, 3]) - -@jt.var_scope('resnet152', unique=True) -def resnet152(x, is_train): - return resnet(x, is_train, make_layer_bottleneck, [3, 8, 36, 3]) - -class BasicBlock(Module): +class BasicBlock(nn.Module): expansion = 1 - def __init__(self, inplanes, planes, stride=1, downsample=None): - self.conv1 = nn.Conv(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm(planes) + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if (norm_layer is None): + norm_layer = nn.BatchNorm + if ((groups != 1) or (base_width != 64)): + raise ValueError('BasicBlock only supports groups=1 and base_width=64') + if (dilation > 1): + raise NotImplementedError('Dilation > 1 not supported in BasicBlock') + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = norm_layer(planes) self.relu = nn.Relu() - self.conv2 = nn.Conv(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) - self.bn2 = nn.BatchNorm(planes) + self.conv2 = conv3x3(planes, planes) + self.bn2 = norm_layer(planes) self.downsample = downsample self.stride = stride - self.planes = planes def execute(self, x): - residual = x + identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual + if (self.downsample is not None): + identity = self.downsample(x) + out += identity out = self.relu(out) return out -class Bottleneck(Module): +class Bottleneck(nn.Module): expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - self.conv1 = nn.Conv(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm(planes) - self.conv2 = nn.Conv(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn2 = nn.BatchNorm(planes) - self.conv3 = nn.Conv(planes, planes * self.expansion, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm(planes * self.expansion) + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if (norm_layer is None): + norm_layer = nn.BatchNorm + width = (int((planes * (base_width / 64.0))) * groups) + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, (planes * self.expansion)) + self.bn3 = norm_layer((planes * self.expansion)) self.relu = nn.Relu() self.downsample = downsample self.stride = stride - + def execute(self, x): - residual = x - + identity = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) - out = self.conv2(out) out = self.bn2(out) out = self.relu(out) - out = self.conv3(out) out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual + if (self.downsample is not None): + identity = self.downsample(x) + out += identity out = self.relu(out) return out -class ResNet(Module): - def __init__(self, block, layers, num_classes=1000): +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): + super(ResNet, self).__init__() + if (norm_layer is None): + norm_layer = nn.BatchNorm + self._norm_layer = norm_layer self.inplanes = 64 - self.conv1 = nn.Conv(3, 64, kernel_size=7, stride=2, padding=3, bias=False) - self.bn1 = nn.BatchNorm(64) + self.dilation = 1 + if (replace_stride_with_dilation is None): + replace_stride_with_dilation = [False, False, False] + if (len(replace_stride_with_dilation) != 3): + raise ValueError('replace_stride_with_dilation should be None or a 3-element tuple, got {}'.format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) + self.bn1 = norm_layer(self.inplanes) self.relu = nn.Relu() - self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1) + self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1, op='maximum') self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.avgpool = nn.Pool(7, stride=1, op="mean") - self.fc = nn.Linear(512 * block.expansion, num_classes) - - def _make_layer(self, block, planes, blocks, stride=1): + self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear((512 * block.expansion), num_classes) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm(planes * block.expansion), - ) - + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if ((stride != 1) or (self.inplanes != (planes * block.expansion))): + downsample = nn.Sequential(conv1x1(self.inplanes, (planes * block.expansion), stride), norm_layer((planes * block.expansion))) layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) + self.inplanes = (planes * block.expansion) + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return nn.Sequential(*layers) - - def execute(self, x): + + def _forward_impl(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) @@ -209,29 +136,47 @@ class ResNet(Module): x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) - x = self.avgpool(x) - x = jt.reshape(x, [x.shape[0],-1]) + x = jt.reshape(x, (x.shape[0], (- 1))) x = self.fc(x) - return x -def Resnet18(): - model = ResNet(BasicBlock, [2,2,2,2]) + def execute(self, x): + return self._forward_impl(x) + +def _resnet(block, layers, **kwargs): + model = ResNet(block, layers, **kwargs) return model -def Resnet34(): - model = ResNet(BasicBlock, [3,4,6,3]) - return model +def Resnet18(**kwargs): + return _resnet(BasicBlock, [2, 2, 2, 2], **kwargs) -def Resnet50(): - model = ResNet(Bottleneck, [3,4,6,3]) - return model +def Resnet34(**kwargs): + return _resnet( BasicBlock, [3, 4, 6, 3], **kwargs) -def Resnet101(): - model = ResNet(Bottleneck, [3,4,23,3]) - return model +def Resnet50(**kwargs): + return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs) -def Resnet152(): - model = ResNet(Bottleneck, [3,8,36,3]) - return model \ No newline at end of file +def Resnet101(**kwargs): + return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs) + +def Resnet152(**kwargs): + return _resnet(Bottleneck, [3, 8, 36, 3], **kwargs) + +def Resnext50_32x4d(**kwargs): + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs) + +def Resnext101_32x8d(**kwargs): + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs) + +def Wide_resnet50_2(**kwargs): + kwargs['width_per_group'] = (64 * 2) + return _resnet(Bottleneck, [3, 4, 6, 3], **kwargs) + +def Wide_resnet101_2(**kwargs): + kwargs['width_per_group'] = (64 * 2) + return _resnet(Bottleneck, [3, 4, 23, 3], **kwargs) diff --git a/python/jittor/models/shufflenetv2.py b/python/jittor/models/shufflenetv2.py new file mode 100644 index 00000000..5c817a70 --- /dev/null +++ b/python/jittor/models/shufflenetv2.py @@ -0,0 +1,106 @@ + +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. +import jittor as jt +from jittor import nn + +__all__ = ['ShuffleNetV2', 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', 'shufflenet_v2_x2_0'] + +def channel_shuffle(x, groups): + (batchsize, num_channels, height, width) = x.data.shape + channels_per_group = (num_channels // groups) + x = jt.reshape(x, [batchsize, groups, channels_per_group, height, width]) + x = jt.transpose(x, (0,2,1,3,4)) + x = jt.reshape(x, [batchsize, (- 1), height, width]) + return x + +class InvertedResidual(nn.Module): + + def __init__(self, inp, oup, stride): + super(InvertedResidual, self).__init__() + if (not (1 <= stride <= 3)): + raise ValueError('illegal stride value') + self.stride = stride + branch_features = (oup // 2) + assert ((self.stride != 1) or (inp == (branch_features << 1))) + if (self.stride > 1): + self.branch1 = nn.Sequential(self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm(inp), nn.Conv(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu()) + else: + self.branch1 = nn.Sequential() + self.branch2 = nn.Sequential(nn.Conv((inp if (self.stride > 1) else branch_features), branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu(), self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1), nn.BatchNorm(branch_features), nn.Conv(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False), nn.BatchNorm(branch_features), nn.Relu()) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv(i, o, kernel_size, stride, padding, bias=bias, groups=i) + + def execute(self, x): + if (self.stride == 1): + x1 = x[:,0:x.shape[1]//2] + x2 = x[:,x.shape[1]//2:x.shape[1]] + out = jt.contrib.concat([x1, self.branch2(x2)], dim=1) + else: + out = jt.contrib.concat([self.branch1(x), self.branch2(x)], dim=1) + out = channel_shuffle(out, 2) + return out + +class ShuffleNetV2(nn.Module): + + def __init__(self, stages_repeats, stages_out_channels, num_classes=1000, inverted_residual=InvertedResidual): + super(ShuffleNetV2, self).__init__() + if (len(stages_repeats) != 3): + raise ValueError('expected stages_repeats as list of 3 positive ints') + if (len(stages_out_channels) != 5): + raise ValueError('expected stages_out_channels as list of 5 positive ints') + self._stage_out_channels = stages_out_channels + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential(nn.Conv(input_channels, output_channels, 3, 2, 1, bias=False), nn.BatchNorm(output_channels), nn.Relu()) + input_channels = output_channels + self.maxpool = nn.Pool(kernel_size=3, stride=2, padding=1, op='maximum') + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for (name, repeats, output_channels) in zip(stage_names, stages_repeats, self._stage_out_channels[1:]): + seq = [inverted_residual(input_channels, output_channels, 2)] + for i in range((repeats - 1)): + seq.append(inverted_residual(output_channels, output_channels, 1)) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + output_channels = self._stage_out_channels[(- 1)] + self.conv5 = nn.Sequential(nn.Conv(input_channels, output_channels, 1, 1, 0, bias=False), nn.BatchNorm(output_channels), nn.Relu()) + self.fc = nn.Linear(output_channels, num_classes) + + def _forward_impl(self, x): + x = self.conv1(x) + x = self.maxpool(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + x = x.mean([2, 3]) + x = self.fc(x) + return x + + def execute(self, x): + return self._forward_impl(x) + +def _shufflenetv2(arch, *args): + model = ShuffleNetV2(*args) + return model + +def shufflenet_v2_x0_5(): + return _shufflenetv2('shufflenetv2_x0.5', [4, 8, 4], [24, 48, 96, 192, 1024]) + +def shufflenet_v2_x1_0(): + return _shufflenetv2('shufflenetv2_x1.0', [4, 8, 4], [24, 116, 232, 464, 1024]) + +def shufflenet_v2_x1_5(): + return _shufflenetv2('shufflenetv2_x1.5', [4, 8, 4], [24, 176, 352, 704, 1024]) + +def shufflenet_v2_x2_0(): + return _shufflenetv2('shufflenetv2_x2.0', [4, 8, 4], [24, 244, 488, 976, 2048]) diff --git a/python/jittor/models/squeezenet.py b/python/jittor/models/squeezenet.py new file mode 100644 index 00000000..20fabbb2 --- /dev/null +++ b/python/jittor/models/squeezenet.py @@ -0,0 +1,90 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +# This model is generated by pytorch converter. +import jittor as jt +from jittor import nn +__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1'] + +class Fire(nn.Module): + + def __init__(self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes): + super(Fire, self).__init__() + self.inplanes = inplanes + self.squeeze = nn.Conv(inplanes, squeeze_planes, kernel_size=1) + self.squeeze_activation = nn.Relu() + self.expand1x1 = nn.Conv(squeeze_planes, expand1x1_planes, kernel_size=1) + self.expand1x1_activation = nn.Relu() + self.expand3x3 = nn.Conv(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1) + self.expand3x3_activation = nn.Relu() + + def execute(self, x): + x = self.squeeze_activation(self.squeeze(x)) + return jt.contrib.concat([self.expand1x1_activation(self.expand1x1(x)), self.expand3x3_activation(self.expand3x3(x))], dim=1) + +class SqueezeNet(nn.Module): + + def __init__(self, version='1_0', num_classes=1000): + super(SqueezeNet, self).__init__() + self.num_classes = num_classes + if (version == '1_0'): + self.features = nn.Sequential( + nn.Conv(3, 96, kernel_size=7, stride=2), + nn.Relu(), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(96, 16, 64, 64), + Fire(128, 16, 64, 64), + Fire(128, 32, 128, 128), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(256, 32, 128, 128), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(512, 64, 256, 256) + ) + elif (version == '1_1'): + self.features = nn.Sequential( + nn.Conv(3, 64, kernel_size=3, stride=2), + nn.Relu(), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(64, 16, 64, 64), + Fire(128, 16, 64, 64), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(128, 32, 128, 128), + Fire(256, 32, 128, 128), + nn.Pool(kernel_size=3, stride=2, ceil_mode=True, op='maximum'), + Fire(256, 48, 192, 192), + Fire(384, 48, 192, 192), + Fire(384, 64, 256, 256), + Fire(512, 64, 256, 256) + ) + else: + raise ValueError('Unsupported SqueezeNet version {version}:1_0 or 1_1 expected'.format(version=version)) + final_conv = nn.Conv(512, self.num_classes, kernel_size=1) + self.classifier = nn.Sequential( + nn.Dropout(p=0.5), + final_conv, + nn.Relu(), + nn.AdaptiveAvgPool2d((1, 1)) + ) + + def execute(self, x): + x = self.features(x) + x = self.classifier(x) + return jt.reshape(x, (x.shape[0], (- 1))) + +def _squeezenet(version, **kwargs): + model = SqueezeNet(version, **kwargs) + return model + +def squeezenet1_0(**kwargs): + return _squeezenet('1_0', **kwargs) + +def squeezenet1_1(**kwargs): + return _squeezenet('1_1', **kwargs) diff --git a/python/jittor/models/vgg.py b/python/jittor/models/vgg.py index f215cdce..ef593a4d 100644 --- a/python/jittor/models/vgg.py +++ b/python/jittor/models/vgg.py @@ -6,21 +6,21 @@ # This file is subject to the terms and conditions defined in # file 'LICENSE.txt', which is part of this source code package. # *************************************************************** +# This model is generated by pytorch converter. import jittor as jt from jittor import nn - __all__ = [ - 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', - 'vgg19_bn', 'vgg19', + 'VGG', 'VGG11', 'VGG11_bn', 'VGG13', 'VGG13_bn', 'VGG16', 'VGG16_bn', + 'VGG19_bn', 'VGG19', ] - class VGG(nn.Module): def __init__(self, features, num_classes=1000, init_weights=True): super(VGG, self).__init__() self.features = features + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(), @@ -33,6 +33,7 @@ class VGG(nn.Module): def execute(self, x): x = self.features(x) + x = self.avgpool(x) x = jt.reshape(x, [x.shape[0],-1]) x = self.classifier(x) return x @@ -67,56 +68,32 @@ def _vgg(arch, cfg, batch_norm, **kwargs): def VGG11(**kwargs): - r"""VGG 11-layer model (configuration "A") from - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg11', 'A', False, **kwargs) def VGG11_bn(**kwargs): - r"""VGG 11-layer model (configuration "A") with batch normalization - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg11_bn', 'A', True, **kwargs) def VGG13(**kwargs): - r"""VGG 13-layer model (configuration "B") - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg13', 'B', False, **kwargs) def VGG13_bn(**kwargs): - r"""VGG 13-layer model (configuration "B") with batch normalization - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg13_bn', 'B', True, **kwargs) def VGG16(**kwargs): - r"""VGG 16-layer model (configuration "D") - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg16', 'D', False, **kwargs) def VGG16_bn(**kwargs): - r"""VGG 16-layer model (configuration "D") with batch normalization - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg16_bn', 'D', True, **kwargs) def VGG19(**kwargs): - r"""VGG 19-layer model (configuration "E") - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg19', 'E', False, **kwargs) def VGG19_bn(**kwargs): - r"""VGG 19-layer model (configuration 'E') with batch normalization - `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ - """ return _vgg('vgg19_bn', 'E', True, **kwargs) \ No newline at end of file diff --git a/python/jittor/nn.py b/python/jittor/nn.py index 8f50f561..23d34618 100644 --- a/python/jittor/nn.py +++ b/python/jittor/nn.py @@ -13,7 +13,7 @@ import jittor as jt from jittor import init, Module import numpy as np import math -from jittor.pool import Pool, pool +from jittor.pool import Pool, pool, AdaptiveAvgPool2d def matmul_transpose(a, b): ''' @@ -99,6 +99,7 @@ def linear(x, n): def relu(x): return jt.maximum(x, 0) def leaky_relu(x, scale): return jt.ternary(x>0, x, x*scale) +def relu6(x): return jt.minimum(jt.maximum(x, 0), 6) #TODO dims is 4 will cause slowly execution def cross_entropy_loss(output, target, ignore_index=None): @@ -271,21 +272,22 @@ class BatchNorm(Module): Relu = jt.make_module(relu) ReLU = Relu Leaky_relu = jt.make_module(leaky_relu, 2) +ReLU6 = jt.make_module(relu6) Softmax = jt.make_module(softmax, 2) class Conv(Module): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): - assert groups == 1 - self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size) self.stride = stride if isinstance(stride, tuple) else (stride, stride) self.padding = padding if isinstance(padding, tuple) else (padding, padding) self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation) + self.groups = groups + assert in_channels % groups == 0, 'in_channels must be divisible by groups' + assert out_channels % groups == 0, 'out_channels must be divisible by groups' Kh, Kw = self.kernel_size - assert groups==1, "Group conv not supported yet." - self.weight = init.relu_invariant_gauss([out_channels, in_channels, Kh, Kw], dtype="float", mode="fan_out") + self.weight = init.relu_invariant_gauss([out_channels, in_channels // groups, Kh, Kw], dtype="float", mode="fan_out") if bias: self.bias = init.uniform([out_channels], dtype="float", low=-1, high=1) else: @@ -295,17 +297,36 @@ class Conv(Module): N,C,H,W = x.shape Kh, Kw = self.kernel_size assert C==self.in_channels - oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1 - ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1 - xx = x.reindex([N,self.out_channels,C,oh,ow,Kh,Kw], [ - 'i0', # Nid - 'i2', # Cid - f'i3*{self.stride[0]}-{self.padding[0]}+i5*{self.dilation[0]}', # Hid+Khid - f'i4*{self.stride[1]}-{self.padding[1]}+i6*{self.dilation[1]}', # Wid+KWid - ]) - ww = self.weight.broadcast(xx.shape, [0,3,4]) - yy = xx*ww - y = yy.sum([2,5,6]) # Kc, Kh, Kw + if self.groups == 1: + oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1 + ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1 + xx = x.reindex([N,self.out_channels,C,oh,ow,Kh,Kw], [ + 'i0', # Nid + 'i2', # Cid + f'i3*{self.stride[0]}-{self.padding[0]}+i5*{self.dilation[0]}', # Hid+Khid + f'i4*{self.stride[1]}-{self.padding[1]}+i6*{self.dilation[1]}', # Wid+KWid + ]) + ww = self.weight.broadcast(xx.shape, [0,3,4]) + yy = xx*ww + y = yy.sum([2,5,6]) # Kc, Kh, Kw + else: + G = self.groups + oc = self.out_channels + oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1 + ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1 + xx = x.reshape((N, G, C//G, H, W)) + xx = xx.reindex([N,G,oc//G,C//G,oh,ow,Kh,Kw], [ + 'i0', # Nid + 'i1', # Gid + 'i3', # C//G id + f'i4*{self.stride[0]}-{self.padding[0]}+i6*{self.dilation[0]}', # Hid+Khid + f'i5*{self.stride[1]}-{self.padding[1]}+i7*{self.dilation[1]}', # Wid+KWid + ]) + ww = self.weight.reshape((G, oc//G, C//G, Kh, Kw)) + ww = ww.broadcast(xx.shape, [0,4,5]) + yy = xx*ww + yy = yy.sum([3,6,7]) # oc//G, Kh, Kw + y = yy.reshape((N, oc, oh, ow)) if self.bias is not None: b = self.bias.broadcast(y.shape, [0,2,3]) y = y + b diff --git a/python/jittor/pool.py b/python/jittor/pool.py index 6b8afe15..79dbe95e 100644 --- a/python/jittor/pool.py +++ b/python/jittor/pool.py @@ -161,5 +161,34 @@ class Pool(Module): ]) return xx.reduce(self.op, [4,5]) -def pool(x, size, op, padding, stride = 1): - return Pool(size, stride, padding, op=op)(x) \ No newline at end of file + +class AdaptiveAvgPool2d(Module): + def __init__(self, output_size): + self.output_size = output_size + + def execute(self, x): + if isinstance(self.output_size, int): + oh = self.output_size + ow = self.output_size + elif isinstance(self.output_size, tuple) or isinstance(self.output_size, list): + oh = x.shape[2] if self.output_size[0] is None else self.output_size[0] + ow = x.shape[3] if self.output_size[1] is None else self.output_size[1] + else: + raise TypeError(f"AdaptiveAvgPool2d only support int, typle or list input. Not support {type(self.output_size)} yet.") + N,C,H,W = x.shape + self.sh = math.floor(H / oh) + self.sw = math.floor(W / ow) + self.ksh = H - (oh - 1) * self.sh + self.ksw = W - (ow - 1) * self.sw + h = (H-self.ksh)//self.sh+1 + w = (W-self.ksw)//self.sw+1 + xx = x.reindex([N,C,h,w,self.ksh,self.ksw], [ + "i0", # Nid + "i1", # Cid + f"i2*{self.sh}+i4", # Hid + f"i3*{self.sw}+i5", # Wid + ]) + return xx.reduce("mean", [4,5]) + +def pool(x, kernel_size, op, padding=0, stride = 1): + return Pool(kernel_size, stride, padding, op=op)(x) \ No newline at end of file diff --git a/python/jittor/test/test_models.py b/python/jittor/test/test_models.py new file mode 100644 index 00000000..cde0ea0d --- /dev/null +++ b/python/jittor/test/test_models.py @@ -0,0 +1,116 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +import unittest +import jittor as jt +import numpy as np +import jittor.models as jtmodels + +try: + jt.dirty_fix_pytorch_runtime_error() + import torch + import torchvision.models as tcmodels + from torch import nn +except: + torch = None + + +skip_this_test = False + + +@unittest.skipIf(skip_this_test, "skip_this_test") +class test_models(unittest.TestCase): + @classmethod + def setUpClass(self): + self.models = [ + ['inception_v3','inception_v3'], + ['squeezenet1_0','squeezenet1_0'], + ['squeezenet1_1','squeezenet1_1'], + ['alexnet','alexnet'], + ['resnet18','Resnet18'], + ['resnet34','Resnet34'], + ['resnet50','Resnet50'], + ['resnet101','Resnet101'], + ['resnet152','Resnet152'], + ['resnext50_32x4d','Resnext50_32x4d'], + ['resnext101_32x8d','Resnext101_32x8d'], + ['vgg11','VGG11'], + ['vgg11_bn','VGG11_bn'], + ['vgg13','VGG13'], + ['vgg13_bn','VGG13_bn'], + ['vgg16','VGG16'], + ['vgg16_bn','VGG16_bn'], + ['vgg19','VGG19'], + ['vgg19_bn','VGG19_bn'], + ['wide_resnet50_2','Wide_resnet50_2'], + ['wide_resnet101_2','Wide_resnet101_2'], + ['googlenet','googlenet'], + ['mobilenet_v2','mobilenet_v2'], + ['mnasnet0_5','mnasnet0_5'], + ['mnasnet0_75','mnasnet0_75'], + ['mnasnet1_0','mnasnet1_0'], + ['mnasnet1_3','mnasnet1_3'], + ['shufflenet_v2_x0_5','shufflenet_v2_x0_5'], + ['shufflenet_v2_x1_0','shufflenet_v2_x1_0'], + ['shufflenet_v2_x1_5','shufflenet_v2_x1_5'], + ['shufflenet_v2_x2_0','shufflenet_v2_x2_0'] + ] + + @unittest.skipIf(not jt.has_cuda, "Cuda not found") + @jt.flag_scope(use_cuda=1, use_stat_allocator=1) + def test_models(self): + threshold = 1e-2 + # Define numpy input image + bs = 1 + test_img = np.random.random((bs,3,224,224)).astype('float32') + # Define pytorch & jittor input image + pytorch_test_img = torch.Tensor(test_img).cuda() + jittor_test_img = jt.array(test_img) + for test_model in self.models: + if test_model[0] == "inception_v3": + test_img = np.random.random((bs,3,300,300)).astype('float32') + pytorch_test_img = torch.Tensor(test_img).cuda() + jittor_test_img = jt.array(test_img) + # Define pytorch & jittor model + pytorch_model = tcmodels.__dict__[test_model[0]]().cuda() + if 'resne' in test_model[0]: + jittor_model = jtmodels.resnet.__dict__[test_model[1]]() + elif 'vgg' in test_model[0]: + jittor_model = jtmodels.vgg.__dict__[test_model[1]]() + elif 'alexnet' in test_model[0]: + jittor_model = jtmodels.alexnet.__dict__[test_model[1]]() + elif 'squeezenet' in test_model[0]: + jittor_model = jtmodels.squeezenet.__dict__[test_model[1]]() + elif 'inception' in test_model[0]: + jittor_model = jtmodels.inception.__dict__[test_model[1]]() + elif 'googlenet' in test_model[0]: + jittor_model = jtmodels.googlenet.__dict__[test_model[1]]() + elif 'mobilenet' in test_model[0]: + jittor_model = jtmodels.mobilenet.__dict__[test_model[1]]() + elif 'mnasnet' in test_model[0]: + jittor_model = jtmodels.mnasnet.__dict__[test_model[1]]() + elif 'shufflenet' in test_model[0]: + jittor_model = jtmodels.shufflenetv2.__dict__[test_model[1]]() + # Set eval to avoid dropout layer + pytorch_model.eval() + jittor_model.eval() + # Jittor loads pytorch parameters to ensure forward alignment + jittor_model.load_parameters(pytorch_model.state_dict()) + # Judge pytorch & jittor forward relative error. If the differece is lower than threshold, this test passes. + pytorch_result = pytorch_model(pytorch_test_img) + jittor_result = jittor_model(jittor_test_img) + x = pytorch_result.detach().cpu().numpy() + 1 + y = jittor_result.data + 1 + relative_error = abs(x - y) / abs(y) + diff = relative_error.mean() + assert diff < threshold, f"[*] {test_model[1]} forward fails..., Relative Error: {diff}" + print(f"[*] {test_model[1]} forword passes with Relative Error {diff}") + print('all models pass test.') + +if __name__ == "__main__": + unittest.main() diff --git a/python/jittor/test/test_pytorch_converter.py b/python/jittor/test/test_pytorch_converter.py index 42090c45..21e92bc2 100644 --- a/python/jittor/test/test_pytorch_converter.py +++ b/python/jittor/test/test_pytorch_converter.py @@ -5,325 +5,233 @@ # *************************************************************** import unittest import jittor as jt -import math import numpy as np +from jittor.utils.pytorch_converter import convert +import os try: jt.dirty_fix_pytorch_runtime_error() import torch from torch import nn - from jittor.utils import pytorch_converter except: torch = None +code=""" +import torch.nn as nn +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + '''3x3 convolution with padding''' + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU() + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + out = self.relu(out) + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + out = self.relu(out) + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + +def resnet18(pretrained=False, **kwargs): + '''Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + ''' + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + '''Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + ''' + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + '''Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + ''' + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + '''Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + ''' + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + '''Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + ''' + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model +""" + @unittest.skipIf(torch is None, "pytorch not found.") class TestPytorchConverter(unittest.TestCase): - def test_simple(self): - def model(c): - a = torch.Tensor([1,2,3,4,0]) - b = a+a - b = b*2 - b = b[:2] - a = a[1. All Rights Reserved. -# This file is subject to the terms and conditions defined in -# file 'LICENSE.txt', which is part of this source code package. -# *************************************************************** -import unittest -import jittor as jt -import numpy as np -from jittor.utils.pytorch_converter2 import convert -import os - -try: - jt.dirty_fix_pytorch_runtime_error() - import torch - from torch import nn -except: - torch = None - -code=""" -import torch.nn as nn -import torch.utils.model_zoo as model_zoo - -__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] - - -model_urls = { - 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', - 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', - 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', - 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', - 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', -} - - -def conv3x3(in_planes, out_planes, stride=1): - '''3x3 convolution with padding''' - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = nn.BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * self.expansion) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet(nn.Module): - - def __init__(self, block, layers, num_classes=1000): - self.inplanes = 64 - super(ResNet, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, - bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.avgpool = nn.AvgPool2d(7, stride=1) - self.fc = nn.Linear(512 * block.expansion, num_classes) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - return x - -def resnet18(pretrained=False, **kwargs): - '''Constructs a ResNet-18 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - ''' - model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) - return model - - -def resnet34(pretrained=False, **kwargs): - '''Constructs a ResNet-34 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - ''' - model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) - return model - - -def resnet50(pretrained=False, **kwargs): - '''Constructs a ResNet-50 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - ''' - model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) - return model - - -def resnet101(pretrained=False, **kwargs): - '''Constructs a ResNet-101 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - ''' - model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) - return model - - -def resnet152(pretrained=False, **kwargs): - '''Constructs a ResNet-152 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - ''' - model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) - if pretrained: - model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) - return model - -import numpy as np -import torch -import random - -# setup random seed -def setup_seed(seed): - np.random.seed(seed) - random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = True -""" - -@unittest.skipIf(torch is None, "pytorch not found.") -class TestPytorchConverter2(unittest.TestCase): - def test_pytorch_converter2(self): - name1 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter2_1.py') - print(f"save source code into {name1}") - with open(name1, 'w') as f: - f.write(code) - - ret = convert(code) - - name2 = os.path.join(jt.flags.cache_path, 'test_pytorch_converter2_2.py') - print(f"save destination code into {name2}") - with open(name2, 'w') as f: - f.write(ret) - - from test_pytorch_converter2_1 import resnet18 as torch_resnet18 - from test_pytorch_converter2_2 import resnet18 as jittor_resnet18 - model_torch = torch_resnet18(False) - model_jittor = jittor_resnet18(False) - model_jittor.load_parameters(model_torch.state_dict()) - - img = np.random.randn(1,3,224,224).astype("float32") - img_torch = torch.Tensor(img) - img_jittor = jt.array(img) - - out_torch = model_torch(img_torch) - out_jittor = model_jittor(img_jittor) - assert abs((out_torch.cpu().detach().numpy() - out_jittor.data)).mean() < 1e-4 - -if __name__ == "__main__": - unittest.main() diff --git a/python/jittor/utils/pytorch_converter.py b/python/jittor/utils/pytorch_converter.py index 351ff358..06b8515d 100644 --- a/python/jittor/utils/pytorch_converter.py +++ b/python/jittor/utils/pytorch_converter.py @@ -1,288 +1,418 @@ # *************************************************************** -# Copyright (c) 2020 Jittor. Authors: Dun Liang . All Rights Reserved. +# Copyright (c) 2020 Jittor. Authors: +# Wenyang Zhou <576825820@qq.com> +# Dun Liang . +# All Rights Reserved. # This file is subject to the terms and conditions defined in # file 'LICENSE.txt', which is part of this source code package. # *************************************************************** -import sys -import contextlib -import os -import signal -import jittor as jt -jt.dirty_fix_pytorch_runtime_error() -import torch +import ast, astunparse +import numpy as np -class CallTree: - def __init__(self, parent, name): - self.parent = parent - self.name = name - self.children = [] - self.input = [] - self.output = [] - self.args = None - if parent is not None: - parent.children.append(self) +pjmap = { + # *************************************************************** + # Module + # *************************************************************** + 'Conv2d': { + 'pytorch': { + 'args': "in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'" + }, + 'jittor': { + 'module': 'nn', + 'name': 'Conv', + 'args': 'in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True' + }, + 'links': {}, + 'extras': {}, + }, + 'MaxPool2d': { + 'pytorch': { + 'args': 'kernel_size, stride=None, padding=0, dilation=1, return_indices=False', + }, + 'jittor': { + 'module': 'nn', + 'name': 'Pool', + 'args': 'kernel_size, stride=None, padding=0, dilation=None, return_indices=None, ceil_mode=False, op="maximum"' + }, + 'links': {}, + 'extras': { + "op": "'maximum'", + }, + }, + 'AvgPool2d': { + 'pytorch': { + 'args': 'kernel_size, stride=None, padding=0, dilation=1, return_indices=False', + }, + 'jittor': { + 'module': 'nn', + 'name': 'Pool', + 'args': 'kernel_size, stride=None, padding=0, dilation=None, return_indices=None, ceil_mode=False, op="maximum"' + }, + 'links': {}, + 'extras': { + "op": "'mean'", + }, + }, + 'ReLU': { + 'pytorch': { + 'args': 'inplace=False', + }, + 'jittor': { + 'module': 'nn', + 'name': 'ReLU', + 'args': '' + }, + 'links': {}, + 'extras': {}, + }, + 'BatchNorm2d': { + 'pytorch': { + 'args': 'num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True', + }, + 'jittor': { + 'module': 'nn', + 'name': 'BatchNorm', + 'args': 'num_features, eps=1e-5, momentum=0.1, affine=None, is_train=True' + }, + 'links': {}, + 'extras': {}, + }, + 'LeakyReLU': { + 'pytorch': { + 'args': 'negative_slope=0.01, inplace=False', + }, + 'jittor': { + 'module': 'nn', + 'name': 'Leaky_relu', + 'args': '' + }, + 'links': {}, + 'extras': {}, + }, + 'kaiming_normal_': { + 'pytorch': { + 'args': "tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'", + }, + 'jittor': { + 'module': 'init', + 'name': 'relu_invariant_gauss_', + 'args': 'var, mode="fan_in"' + }, + 'links': {'tensor': 'var'}, + 'extras': {}, + }, + 'constant_': { + 'pytorch': { + 'args': "tensor, val", + }, + 'jittor': { + 'module': 'init', + 'name': 'constant_', + 'args': 'var, value=0.0' + }, + 'links': {'tensor': 'var', 'val': 'value'}, + 'extras': {}, + }, + 'normal_': { + 'pytorch': { + 'args': "tensor, mean=0.0, std=1.0", + }, + 'jittor': { + 'module': 'init', + 'name': 'gauss_', + 'args': 'var, mean=0.0, std=1.0' + }, + 'links': {'tensor': 'var'}, + 'extras': {}, + }, + 'cat': { + 'pytorch': { + 'args': "tensors, dim=0, out=None", + }, + 'jittor': { + 'module': 'jt.contrib', + 'name': 'concat', + 'args': 'vars, dim=0' + }, + 'links': {'tensors': 'vars'}, + 'extras': {}, + }, + # *************************************************************** + # torch.Tensor.xxx(...) and torch.xxx(torch.Tensor, ...) + # Example: x.reshape([2,3]) and torch.reshape(x, [2,3]) + # *************************************************************** + 'flatten': { + 'pytorch': { + 'prefix': ['torch'], + 'args_prefix': 'input, start_dim=0, end_dim=-1', + 'args': 'start_dim=0, end_dim=-1', + }, + 'jittor': { + 'prefix': 'jt', + 'module': '', + 'name': 'flatten', + 'args_prefix': 'input, start_dim=0, end_dim=-1', + 'args': 'start_dim=0, end_dim=-1' + }, + 'links': {'aaaaa': 'bbbb'}, + 'extras': {}, + }, + 'reshape': { + 'pytorch': { + 'prefix': ['torch'], + 'args_prefix': 'input, shape', + 'args': 'shape', + }, + 'jittor': { + 'prefix': 'jt', + 'module': '', + 'name': 'reshape', + 'args_prefix': 'input, shape', + 'args': 'shape' + }, + 'links': {}, + 'extras': {}, + }, + 'permute': { + 'pytorch': { + 'prefix': [], + 'args_prefix': '', + 'args': '*dim', + }, + 'jittor': { + 'prefix': '', + 'module': '', + 'name': 'permute', + 'args_prefix': '', + 'args': '*dim' + }, + 'links': {}, + 'extras': {}, + }, + # 好像不需要如果一毛一样的话 + 'view': { + 'pytorch': { + 'prefix': [], + 'args_prefix': '', + 'args': '*shape', + }, + 'jittor': { + 'prefix': '', + 'module': '', + 'name': 'view', + 'args_prefix': '', + 'args': '*shape' + }, + 'links': {}, + 'extras': {}, + } +} - def __str__(self): - ss = [] - def dfs(v, depth): - s = " "*depth+f"{v.name} in:{v.input} out:{v.output}" - if v.args is not None: - s += f" args:{v.args}" - ss.append(s) - if len(v.children): - for c in v.children: - dfs(c, depth+1) - ss.append(s + " end") - dfs(self, 0) - return "\n".join(ss) - - def to_jt(self): - defs = [] - template = { - "add": "{0} + {1}", - "mul": "{0} * {1}", - "getitem": "{0}[{1}]", - "gt": "{0} > {1}", - } - def dfs(v): - if len(v.children)==0: - return - code = [] - code.append(f"def {v.name.split('.')[0]}({','.join(map(str,v.input))}):") - for c in v.children: - # parse the argument into jittor code - # code.append(f" # {c.args}") - if c.name == "BatchNorm2d.forward": - bn = c.args["self"] - code.append(f" {c.output[0]} = jt.nn.batch_norm({c.input[0]}, is_train={bn.training}, eps={bn.eps}, momentum={bn.momentum})") - continue - if c.name == "ReLU.forward": - code.append(f" {c.output[0]} = jt.nn.relu({c.input[0]})") - continue - if c.name == "MaxPool2d.forward": - po = c.args["self"] - code.append(f" {c.output[0]} = jt.nn.pool({c.input[0]}, size={po.kernel_size}, op='maximum', padding={po.padding}, stride={po.stride})") - continue - if c.name == "Conv2d.forward": - mod = c.args["self"] - code.append(f" # {mod}") - assert mod.kernel_size[0] == mod.kernel_size[1] - assert mod.padding[0] == mod.padding[1] - assert mod.stride[0] == mod.stride[1] - assert mod.bias == False - code.append(f" {c.output[0]} = nn.conv({c.output[0]}, {mod.in_channels}, {mod.out_channels}, {mod.kernel_size[0]}, {mod.padding[0]}, {mod.stride[0]})") - continue - if c.name.startswith("inj"): - if c.name.endswith("__init__"): - code.append(f" {c.args[0]} = jt.array({c.args[1]})") - else: - assert c.name.startswith("inj_torch_Tensor___") and \ - c.name.endswith("__") - name = c.name[19:-2] - if name in template: - code.append(f" {c.output[0]} = {template[name].format(*c.args)}") - else: - code.append(f" {c.output[0]} = __{name}__({', '.join(map(str,c.args))})") - else: - dfs(c) - out = "" - if len(c.output): - out = f"{','.join(map(str, c.output))} = " - code.append(f" {out}{c.name.split('.')[0]}({','.join(map(str,c.input))})") - if len(v.output): - code.append(f" return {','.join(map(str, v.output))}") - defs.extend(code) - dfs(self) - return "\n".join(defs) - -class TNode: - def __init__(self, s, v): - self.s = s - self.v = v - def __str__(self): - return self.s - def __repr__(self): - return self.s - -trace_depth = 0 -stack = [] -g_vars = {} -g_var_id = 0 -g_func_names = [] -call_tree = CallTree(None, "root") - -def push_stack(name=None, input=[]): - global trace_depth, call_tree - trace_depth += 1 - if name is not None: - # Do not re record functional - if len(stack) and ( - stack[-1][1].startswith("functional.") or - stack[-1][1].startswith("inj_") - ): - return - call_tree = CallTree(call_tree, name) - call_tree.input = input - stack.append((trace_depth, name)) - return call_tree - return None - -def pop_stack(output=[]): - global trace_depth, call_tree - if len(stack) and stack[-1][0] == trace_depth: - stack.pop() - call_tree.output = output - call_tree = call_tree.parent - trace_depth -= 1 - -def trace_calls(frame, event, arg): - def dfs(obj, func): - if isinstance(obj, list): - for i,v in enumerate(obj): - dfs(v, func) - if isinstance(v, torch.Tensor): - obj[i] = g_vars[id(v)] - elif isinstance(obj, dict): - for k,v in obj.items(): - if isinstance(v, tuple): - v = list(v) - obj[k] = v - dfs(v, func) - if isinstance(v, torch.Tensor): - obj[k] = g_vars[id(v)] - elif isinstance(obj, torch.Tensor): - func(obj) - global g_var_id - if event.endswith('call'): - co = frame.f_code - func_name = co.co_name - func_line_no = frame.f_lineno - func_filename = co.co_filename - args = "???" - t_values = [] - if event == "c_call": - func_name = arg.__name__ - else: - args = list(frame.f_locals.keys()) - if "self" in frame.f_locals: - func_name = type(frame.f_locals["self"]).__name__ + "." + func_name - - val = {k:frame.f_locals[k] for k in args} - def func(v): - global g_var_id - if id(v) not in g_vars: - if func_name.endswith("__init__"): - g_vars[id(v)] = TNode("array_"+str(g_var_id), v) - else: - g_vars[id(v)] = TNode("input_"+str(g_var_id), v) - g_var_id += 1 - t_values.append(g_vars[id(v)]) - dfs(val, func) +def replace(a): + if hasattr(a, "attr"): + if a.attr == "Conv2d": a.attr = "Conv" + if a.attr == "BatchNorm2d": a.attr = "BatchNorm" + if a.attr == "ReLU": a.attr = "Relu" + if a.attr == "AvgPool2d": a.attr = "Pool" + if a.attr == "MaxPool2d": a.attr = "Pool" + if a.attr == "LeakyReLU": a.attr = "Leaky_relu" - # get arguments you want - if func_name.endswith(".forward"): - ct = push_stack(func_name, t_values) - ct.args = val - elif func_filename.endswith("functional.py"): # TODO: not stable - push_stack("functional."+func_name, t_values) - elif func_name.startswith("inj_"): - ct = push_stack(func_name, t_values) - ct.args = val["a"] - elif func_name in g_func_names: - push_stack(func_name, t_values) + if hasattr(a, "id"): + if a.id == "Conv2d": a.id = "Conv" + if a.id == "BatchNorm2d": a.id = "BatchNorm" + if a.id == "ReLU": a.id = "Relu" + if a.id == "AvgPool2d": a.id = "Pool" + if a.id == "MaxPool2d": a.id = "Pool" + if a.id == "LeakyReLU": a.id = "Leaky_relu" + +import_flag = [] +def convert(code): + a = ast.parse(code) + dfs(a) + a.body.insert(0, ast.parse('import jittor as jt').body[0]) + if 'init' not in import_flag: + a.body.insert(1, ast.parse('from jittor import init').body[0]) + if 'nn' not in import_flag: + a.body.insert(2, ast.parse('from jittor import nn').body[0]) + return astunparse.unparse(a) + +def convert_(prefix, func_name, ags, kws): + info = pjmap[func_name] + p_prefix = info['pytorch']['prefix'] if 'prefix' in info['pytorch'].keys() else None + if p_prefix is not None and prefix in p_prefix: + p_ags = info['pytorch']['args_prefix'] + j_ags = info['jittor']['args_prefix'] + else: + p_ags = info['pytorch']['args'] + j_ags = info['jittor']['args'] + j_prefix = info['jittor']['prefix'] if 'prefix' in info['jittor'].keys() else None + j_module = info['jittor']['module'] + j_name = info['jittor']['name'] + links = info['links'] + extras = info['extras'] + jj_ags = [] + jj_kws = {} + pp_ags = [] + pp_kws = {} + if j_ags == '' and p_ags == '': + # no args in Pytorch and Jittor. + if p_prefix is None: + return f"{j_module}.{j_name}()" else: - push_stack() - jt.LOG.vvvv("----"*trace_depth+f"call: {func_name}({args}){t_values} # {func_filename}:{func_line_no}") - elif event.endswith('return'): - ret = [] - if event == "c_return": - jt.LOG.vvvv("----"*trace_depth+f"return {arg.__name__}: ???") - else: - co = frame.f_code - func_name = co.co_name - def func(arg): - global g_var_id - if id(arg) not in g_vars: - node = TNode(f"out_{g_var_id}", arg) - g_vars[id(arg)] = node - else: - node = g_vars[id(arg)] - ret.append(node) - g_var_id += 1 - dfs(arg, func) - if "self" in frame.f_locals: - func_name = type(frame.f_locals["self"]).__name__ + "." + func_name - jt.LOG.vvvv("----"*trace_depth+f"return {func_name}: {ret}") - pop_stack(ret) - return trace_calls - -@contextlib.contextmanager -def trace_scope(func_names=[]): - global g_func_names - g_func_names = func_names - with func_injection(): - try: - global trace_depth, g_var_id - sys.settrace(trace_calls) - trace_depth = 1 - stack.clear() - g_vars.clear() - call_tree.children.clear() - - g_var_id = 0 - yield - finally: - sys.settrace(None) - jt.LOG.v("="*20) - jt.LOG.v(call_tree) - - -@contextlib.contextmanager -def func_injection(): - names = [ - "torch.Tensor.__init__", - "torch.Tensor.__add__", - "torch.Tensor.__mul__", - "torch.Tensor.__sub__", - "torch.Tensor.__truediv__", - "torch.Tensor.__floordiv__", - "torch.Tensor.__getitem__", - # "torch.Tensor.__setitem__", - "torch.Tensor.__pow__", - "torch.Tensor.__mod__", - "torch.Tensor.__lt__", - "torch.Tensor.__le__", - "torch.Tensor.__gt__", - "torch.Tensor.__ge__", - "torch.Tensor.__eq__", - "torch.Tensor.__ne__", - "torch.Tensor.__lshift__", - "torch.Tensor.__rshift__", - "torch.Tensor.__and__", - "torch.Tensor.__or__", - "torch.Tensor.__xor__", - "torch.Tensor.__abs__", - "torch.Tensor.__neg__", - ] - try: - global inject_prevs - inject_prevs = [] - for name in names: - inject_prevs.append(eval(name)) - for i, name in enumerate(names): - new_name = "inj_" + name.replace(".", "_") - if name.endswith("__getitem__"): - exec(f"def {new_name}(*a): return torch._C._TensorBase.__getitem__(a[0], a[1] if isinstance(a[1], tuple) else (a[1],))") - elif name.endswith("__init__"): - exec(f"def {new_name}(*a, **b): return None") + if prefix in p_prefix: + return f"{j_prefix}.{j_name}()" else: - exec(f"def {new_name}(*a, **b): return inject_prevs[{i}](*a, **b)") - jt.LOG.v("inject", new_name) - exec(f"{name} = {new_name}") - yield - finally: - for i, name in enumerate(names): - prev = inject_prevs[i] - exec(f"{name} = prev") - torch.Tensor.__getitem__ = \ - lambda s, a: torch._C._TensorBase.__getitem__(s, a if isinstance(a, tuple) else (a,)) + return f"{prefix}.{j_name}()" + else: + j_ags = j_ags.replace(' ','').split(',') + for j_ag in j_ags: + if '=' in j_ag: + k,v = j_ag.split('=') + jj_kws[k] = v + else: + jj_ags.append(j_ag) + p_ags = p_ags.replace(' ','').split(',') + for p_ag in p_ags: + if '=' in p_ag: + k,v = p_ag.split('=') + pp_kws[k] = v + else: + pp_ags.append(p_ag) + if len(jj_ags) == 0 and len(pp_ags) != 0: + raise AttributeError(f"{func_name} in Jittor has no Attribute {pp_ags[0]}") + if len(pp_ags) > len(ags) + len(kws): + raise RuntimeError(f'There are needed {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you only provide {len(ags) + len(kws)}') + ags_ = [] + for i in range(len(pp_ags)): + if i < len(ags): + if '*' in pp_ags[i]: + ags_.append('(' + ', '.join(ags[i:]) + ')') + ags = ags_ + break + else: + ags_.append(ags[i]) + else: + break + if len(pp_ags) + len(list(pp_kws.keys())) < len(ags) + len(kws): + raise RuntimeError(f'There are only {len(pp_ags) + len(list(pp_kws.keys()))} args in Pytorch {func_name} function, but you provide {len(ags) + len(kws)}') + j_ags_flag = np.zeros(len(jj_ags)) + j_ags_values = {} + j_kws_values = {} + for i,ag in enumerate(ags): + if len(pp_ags) == 0: + ag_name = list(pp_kws.keys())[i] + elif i < len(pp_ags): + ag_name = pp_ags[i] + elif i >= len(pp_ags) and (i-len(pp_ags)) <= len(list(pp_kws.keys())): + ag_name = list(pp_kws.keys())[i-len(pp_ags)] + else: + raise RuntimeError(f'The args number is not matc{func_name} in Jittor has no Attribute {ag_name}') + if ag_name in links.keys(): + ag_name = links[ag_name] + if ag_name in jj_ags: + j_ags_flag[jj_ags.index(ag_name)] = 1 + j_ags_values[str(jj_ags.index(ag_name))] = ag + elif ag_name in jj_kws.keys(): + j_kws_values[ag_name] = ag + else: + raise AttributeError(f'{func_name} in Jittor has no Attribute {ag_name}') + for i,kw in enumerate(kws): + kw_name, kw_value = kw.split('=') + if kw_name in links.keys(): + kw_name = links[kw_name] + if kw_name in jj_ags: + j_ags_flag[jj_ags.index(kw_name)] = 1 + j_ags_values[str(jj_ags.index(kw_name))] = kw_value + elif kw_name in jj_kws.keys(): + j_kws_values[kw_name] = kw_value + else: + raise AttributeError(f'{func_name} in Jittor has no Attribute {kw_name}') + len_jj_ags = len(jj_ags) if len(jj_ags) == 0 or jj_ags[0] != '' else 0 + if j_ags_flag.sum() < len_jj_ags: + missing_args = [] + for i in range(len(jj_ags)): + if j_ags_flag[i] == 0: + missing_args.append(jj_ags[i]) + raise AttributeError(f"the needed args of {func_name} in Jittor is {', '.join(jj_ags)}, so you need to give value of {', '.join(missing_args)}.") + if extras: + for k in extras.keys(): + if k in jj_ags: + j_ags_values[str(jj_ags.index(k))] = extras[k] + elif k in jj_kws.keys(): + j_kws_values[k] = extras[k] + else: + raise AttributeError(f"there is not attribute named {k} in Jittor {func_name}, you should delete it in {func_name} extras.") + j_ags_ = [j_ags_values[str(i)] for i in range(len(list(j_ags_values.keys())))] + j_kws_ = [key + "=" + j_kws_values[key] for key in j_kws_values.keys()] + j_func = f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})" + if p_prefix is None: + return f"{j_module}.{j_name}({', '.join(j_ags_+j_kws_)})" + else: + if prefix in p_prefix: + return f"{j_prefix}.{j_name}({', '.join(j_ags_+j_kws_)})" + else: + return f"{prefix}.{j_name}({', '.join(j_ags_+j_kws_)})" + return j_func + +def dfs(a): + if isinstance(a, ast.Import): + if 'torch' in astunparse.unparse(a) and 'init' in astunparse.unparse(a): + import_flag.append('init') + return ast.parse('from jittor import init').body[0] + if 'torch' in astunparse.unparse(a) and 'nn' in astunparse.unparse(a): + import_flag.append('nn') + return ast.parse('from jittor import nn').body[0] + if a.names[0].name == 'torch': + return 'delete' + elif isinstance(a, ast.ImportFrom): + if 'torch' in a.module: + return 'delete' + elif isinstance(a, ast.Call): + for idx, ag in enumerate(a.args): + ret = dfs(ag) + if ret is not None: + a.args[idx] = ret + for idx, kw in enumerate(a.keywords): + ret = dfs(kw) + if ret is not None: + a.keywords[idx] = ret + func = astunparse.unparse(a.func).strip('\n').split('.') + prefix = '.'.join(func[0:-1]) + func_name = func[-1] + if func_name in pjmap.keys(): + ags = [astunparse.unparse(ag).strip('\n') for ag in a.args] + kws = [astunparse.unparse(kw).strip('\n') for kw in a.keywords] + ret = convert_(prefix, func_name, ags, kws) + return ast.parse(ret).body[0].value + if ".load_state_dict" in astunparse.unparse(a.func): + a.func.attr = 'load_parameters' + if astunparse.unparse(a.func).strip('\n').endswith(".size"): + ags = [astunparse.unparse(ag).strip('\n') for ag in a.args] + if len(ags) != 0: + con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']' + else: + con = astunparse.unparse(a.func).replace('size', 'shape') + return ast.parse(con).body[0].value + elif isinstance(a, ast.Expr): pass + elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name): replace(a) + elif isinstance(a, ast.FunctionDef): + if a.name == 'forward': a.name = 'execute' + if hasattr(a, '__dict__'): + for k in a.__dict__.keys(): + if isinstance(a.__dict__[k], list): + delete_flag = [] + for i,a_ in enumerate(a.__dict__[k]): + ret = dfs(a_) + if ret is 'delete': + delete_flag.append(True) + del a.__dict__[k][i] + continue + if ret is not None: + a.__dict__[k][i] = ret + delete_flag.append(False) + tmp = [a_ for i,a_ in enumerate(a.__dict__[k]) if delete_flag[i] == False] + a.__dict__[k] = tmp + else: + ret = dfs(a.__dict__[k]) + if ret is not None: + a.__dict__[k] = ret \ No newline at end of file diff --git a/python/jittor/utils/pytorch_converter2.py b/python/jittor/utils/pytorch_converter2.py deleted file mode 100644 index c174dae8..00000000 --- a/python/jittor/utils/pytorch_converter2.py +++ /dev/null @@ -1,117 +0,0 @@ -# *************************************************************** -# Copyright (c) 2020 Jittor. Authors: Dun Liang . All Rights Reserved. -# This file is subject to the terms and conditions defined in -# file 'LICENSE.txt', which is part of this source code package. -# *************************************************************** -import ast, astunparse - -def convert(code): - a = ast.parse(code) - a.body.insert(0, ast.parse('import jittor as jt').body[0]) - a.body.insert(1, ast.parse('from jittor import init').body[0]) - dfs(a) - return astunparse.unparse(a) - -def replace(a): - if hasattr(a, "attr"): - if a.attr == "Conv2d": a.attr = "Conv" - if a.attr == "BatchNorm2d": a.attr = "BatchNorm" - if a.attr == "ReLU": a.attr = "Relu" - if a.attr == "AvgPool2d": a.attr = "Pool" - if a.attr == "MaxPool2d": a.attr = "Pool" - if hasattr(a, "id"): - if a.id == "Conv2d": a.id = "Conv" - if a.id == "BatchNorm2d": a.id = "BatchNorm" - if a.id == "ReLU": a.id = "Relu" - if a.id == "AvgPool2d": a.id = "Pool" - if a.id == "MaxPool2d": a.id = "Pool" - -def dfs(a): - if isinstance(a, ast.Import): - if a.names[0].name == 'torch.nn' and a.names[0].asname == 'nn': - a.names[0].name = 'jittor.nn' - a.names[0].asname = 'nn' - elif isinstance(a, ast.ImportFrom): - if a.module == 'torch': - a.module = a.module.replace('torch', 'jittor') - return a - elif isinstance(a, ast.Call): - for idx, ag in enumerate(a.args): - ret = dfs(ag) - if ret is not None: - a.args[idx] = ret - for idx, kw in enumerate(a.keywords): - ret = dfs(kw) - if ret is not None: - a.keywords[idx] = ret - if ".load_state_dict" in astunparse.unparse(a.func): - a.func.attr = 'load_parameters' - if astunparse.unparse(a.func).startswith("torch.Tensor"): - a.func.value.id = 'jt' - a.func.attr = 'array' - if ".cat" in astunparse.unparse(a.func): - if len(a.args) == 1: - dim = a.keywords[0].value.n - else: - dim = a.args[1].n - if isinstance(a.args[0], ast.List): - objs = [elt.id for elt in a.args[0].elts] - con = 'jt.contrib.concat([' + ','.join(objs) + '], dim=' + str(dim) + ')' - else: - con = 'jt.contrib.concat(' + a.args[0].id + ', dim=' + str(dim) + ')' - return ast.parse(con).body[0].value - if "view" in astunparse.unparse(a.func): - ags = [astunparse.unparse(ag).strip('\n') for ag in a.args] - con = 'jt.reshape(' + a.func.value.id + ', [' + ','.join(ags) + '])' - return ast.parse(con).body[0].value - if "permute" in astunparse.unparse(a.func): - ags = [astunparse.unparse(ag).strip('\n') for ag in a.func.value.args] - con = 'jt.transpose(' + a.func.value.func.value.id + ', [' + ','.join(ags) + '])' - return ast.parse(con).body[0].value - if astunparse.unparse(a.func).strip('\n').endswith(".size"): - ags = [astunparse.unparse(ag).strip('\n') for ag in a.args] - con = astunparse.unparse(a.func).split('.size')[0] + '.shape[' + ','.join(ags) + ']' - return ast.parse(con).body[0].value - if astunparse.unparse(a.func).startswith("F."): - a.func.value.id = "nn" - return a - if "kaiming_normal_" in astunparse.unparse(a.func): - ag = astunparse.unparse(a.args[0]).strip('\n') - kws = {} - for kw in a.keywords: - tmp = astunparse.unparse(kw).split('=') - kws[tmp[0]] = tmp[1].strip('\n') - con = 'init.relu_invariant_gauss_(' + ag + ', mode=' + kws['mode'] + ')' - return ast.parse(con).body[0].value - if "constant_" in astunparse.unparse(a.func): - ags = [astunparse.unparse(ag).strip('\n') for ag in a.args] - con = 'init.constant_(' + ','.join(ags) + ')' - return ast.parse(con).body[0].value - if "ReLU" in astunparse.unparse(a.func): - a.args.clear() - a.keywords.clear() - elif "Conv2d" in astunparse.unparse(a.func): - pass - elif "AvgPool2d" in astunparse.unparse(a.func): - a.keywords.append(ast.keyword(arg='op', value=ast.Str(s='mean'))) - elif "MaxPool2d" in astunparse.unparse(a.func): - a.keywords.append(ast.keyword(arg='op', value=ast.Str(s='maximum'))) - for kw in a.keywords: - if kw.arg in ['return_indices', 'groups']: - kw.value = ast.NameConstant(value=None) - elif isinstance(a, ast.Expr): pass - elif isinstance(a, ast.Attribute) or isinstance(a, ast.Name): replace(a) - elif isinstance(a, ast.FunctionDef): - if a.name == 'forward': a.name = 'execute' - if hasattr(a, '__dict__'): - for k in a.__dict__.keys(): - if isinstance(a.__dict__[k], list): - for i,a_ in enumerate(a.__dict__[k]): - ret = dfs(a_) - if ret is not None: - a.__dict__[k][i] = ret - - else: - ret = dfs(a.__dict__[k]) - if ret is not None: - a.__dict__[k] = ret \ No newline at end of file diff --git a/src/var_holder.h b/src/var_holder.h index eae903b4..9504be2e 100644 --- a/src/var_holder.h +++ b/src/var_holder.h @@ -62,6 +62,19 @@ struct VarHolder { return var->name.c_str(); } + // @pyjt(size) + inline NanoVector size() { + if (var->num<0) sync(); + return var->shape; + } + + // @pyjt(size) + inline int64 size(int64 dim) { + if (var->num<0) sync(); + ASSERT(dim>=0 && dimshape.size()) << "dim is out of index"; + return var->shape[dim]; + } + // @pyjt(stop_grad) // @attrs(return_self) inline VarHolder* stop_grad() {