From 2f722bd8774066af2ec60bf65b99eb769fb8f4ae Mon Sep 17 00:00:00 2001
From: yaox12 <yaox12@outlook.com>
Date: Thu, 1 Oct 2020 11:29:06 +0800
Subject: [PATCH 01/16] update basic transforms

---
 python/jittor/transform/__init__.py     | 324 +------------
 python/jittor/transform/function_pil.py | 334 +++++++++++++
 python/jittor/transform/transform.py    | 602 ++++++++++++++++++++++++
 3 files changed, 938 insertions(+), 322 deletions(-)
 create mode 100644 python/jittor/transform/function_pil.py
 create mode 100644 python/jittor/transform/transform.py

diff --git a/python/jittor/transform/__init__.py b/python/jittor/transform/__init__.py
index facfbbd5..1163af61 100644
--- a/python/jittor/transform/__init__.py
+++ b/python/jittor/transform/__init__.py
@@ -1,331 +1,11 @@
 # ***************************************************************
 # Copyright (c) 2020 Jittor.
 # Authors:
+#     Xin Yao <yaox12@outlook.com>
 #     Dun Liang <randonlang@gmail.com>. 
 # All Rights Reserved.
 # This file is subject to the terms and conditions defined in
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
-from PIL import Image
-import random
-import math
-import numpy as np
-import warnings
-from collections.abc import Sequence, Mapping
 
-def crop(img, top, left, height, width):
-    '''
-    Function for cropping image.
-
-    Args::
-
-        [in] img(Image.Image): Input image.
-        [in] top(int): the top boundary of the cropping box.
-        [in] left(int): the left boundary of the cropping box.
-        [in] height(int): height of the cropping box.
-        [in] width(int): width of the cropping box.
-
-    Example::
-        
-        img = Image.open(...)
-        img_ = transform.crop(img, 10, 10, 100, 100)
-    '''
-    return img.crop((left, top, left + width, top + height))
-
-def resize(img, size, interpolation=Image.BILINEAR):
-    '''
-    Function for resizing image.
-
-    Args::
-
-        [in] img(Image.Image): Input image.
-        [in] size: resize size.
-        [in] interpolation(int): type of resize. default: PIL.Image.BILINEAR
-
-    Example::
-        
-        img = Image.open(...)
-        img_ = transform.resize(img, (100, 100))
-    '''
-    return img.resize(size[::-1], interpolation)
-
-def crop_and_resize(img, top, left, height, width, size, interpolation=Image.BILINEAR):
-    '''
-    Function for cropping and resizing image.
-
-    Args::
-
-        [in] img(Image.Image): Input image.
-        [in] top(int): the top boundary of the cropping box.
-        [in] left(int): the left boundary of the cropping box.
-        [in] height(int): height of the cropping box.
-        [in] width(int): width of the cropping box.
-        [in] size: resize size.
-        [in] interpolation(int): type of resize. default: PIL.Image.BILINEAR
-
-    Example::
-        
-        img = Image.open(...)
-        img_ = transform.resize(img, 10，10，200，200，100)
-    '''
-    img = crop(img, top, left, height, width)
-    img = resize(img, size, interpolation)
-    return img
-
-class Crop:
-    """Crop and the PIL Image to given size.
-
-    Args:
-
-        * top(int): top pixel indexes
-        * left(int): left pixel indexes
-        * height(int): image height
-        * width(int): image width
-    """
-    def __init__(self, top, left, height, width):
-        self.top = top
-        self.left = left
-        self.height = height
-        self.width = width
-    def __call__(self, img):
-        return crop(img, self.top, self.left, self.height, self.width)
-
-
-class RandomCropAndResize:
-    """Random crop and resize the given PIL Image to given size.
-
-    Args::
-
-        [in] size(int or tuple): width and height of the output image.
-        [in] scale(tuple): range of scale ratio of the area.
-        [in] ratio(tuple): range of aspect ratio.
-        [in] interpolation: type of resize. default: PIL.Image.BILINEAR.
-
-    Example::
-
-        transform = transform.RandomCropAndResize(224)
-        img_ = transform(img)
-    """
-    def __init__(self, size, scale:tuple=(0.08, 1.0), ratio:tuple=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        assert scale[0] <= scale[1] and ratio[0] <= ratio[1]
-
-        self.size = size
-        self.scale = scale
-        self.ratio = ratio
-        self.interpolation = interpolation
-
-    def __call__(self, img:Image.Image):
-        width, height = img.size
-        scale = self.scale
-        ratio = self.ratio
-        area = height * width
-
-        for _ in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if 0 < w <= width and 0 < h <= height:
-                i = random.randint(0, height - h)
-                j = random.randint(0, width - w)
-                break
-        else:
-            # Fallback to central crop
-            in_ratio = float(width) / float(height)
-            if in_ratio < min(ratio):
-                w = width
-                h = int(round(w / min(ratio)))
-            elif in_ratio > max(ratio):
-                h = height
-                w = int(round(h * max(ratio)))
-            else:
-                w = width
-                h = height
-            i = (height - h) // 2
-            j = (width - w) // 2
-        return crop_and_resize(img, i, j, h, w, self.size, self.interpolation)
-
-def hflip(img):
-    return img.transpose(Image.FLIP_LEFT_RIGHT)
-    
-class RandomHorizontalFlip:
-    """
-    Random flip the image horizontally.
-
-    Args::
-
-        [in] p(float): The probability of image flip, default: 0.5.
-
-    Example::
-
-        transform = transform.RandomHorizontalFlip(0.6)
-        img_ = transform(img)
-    """
-    def __init__(self, p=0.5):
-        self.p = p
-        
-    def __call__(self, img:Image.Image):
-        if random.random() < self.p:
-            return img.transpose(Image.FLIP_LEFT_RIGHT)
-        return img
-
-class CenterCrop:
-    '''
-    Class for cropping image centrally.
-
-    Args::
-
-    [in] size(int or tuple): Size want to crop.
-
-    Example::
-
-        transform = transform.CenterCrop(224)
-        img_ = transform(img)
-    '''
-    def __init__(self, size):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        self.size = size
-    
-    def __call__(self, img:Image.Image):
-        width, height = img.size
-        return crop(img, (height - self.size[0]) / 2, (width - self.size[1]) / 2, self.size[0], self.size[1])
-
-def to_tensor(img):
-    """
-    Function for turning Image.Image to jt.array.
-
-    Args::
-
-        [in] img(Image.Image): Input image.
-    
-    Example::
-        
-        img = Image.open(...)
-        img_ = transform.to_tensor(img)
-    """
-    if isinstance(img, Image.Image):
-        return np.array(img).transpose((2,0,1)) / np.float32(255)
-    return img
-
-class ImageNormalize:
-    '''
-    Class for normalizing the input image.
-
-    Args::
-
-    [in] mean(list): the mean value of Normalization.
-    [in] std(list): the std value of Normalization.
-
-    Example::
-
-        transform = transform.ImageNormalize(mean=[0.5], std=[0.5])
-        img_ = transform(img)
-    '''
-
-    def __init__(self, mean, std):
-        self.mean = np.float32(mean).reshape(-1,1,1)
-        self.std = np.float32(std).reshape(-1,1,1)
-        
-    def __call__(self, img):
-        if isinstance(img, Image.Image):
-            img = (np.array(img).transpose((2,0,1)) \
-                - self.mean*np.float32(255.)) \
-                / (self.std*np.float32(255.))
-        else:
-            img = (img - self.mean) / self.std
-        return img
-    
-class Compose:
-    '''
-    Base class for combining various transformations.
-
-    Args::
-
-    [in] transforms(list): a list of transform.
-
-    Example::
-
-        transform = transform.Compose([
-            transform.Resize(opt.img_size),
-            transform.Gray(),
-            transform.ImageNormalize(mean=[0.5], std=[0.5]),
-        ])
-        img_ = transform(img)
-    '''
-    def __init__(self, transforms):
-        self.transforms = transforms
-    def __call__(self, data):
-        for t in self.transforms:
-            data = t(data)
-        return data
-
-class Resize:
-    '''
-    Class for resizing image.
-
-    Args::
-
-    [in] size(int or tuple): Size want to resize.
-    [in] mode(int): type of resize.
-
-    Example::
-
-        transform = transform.Resize(224)
-        img_ = transform(img)
-    '''
-    def __init__(self, size, mode=Image.BILINEAR):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        self.size = size
-        self.mode = mode
-    def __call__(self, img:Image.Image):
-        return img.resize(self.size, self.mode)
-
-class Gray:
-    '''
-    Convert image to grayscale.
-
-    Example::
-
-        transform = transform.Gray()
-        img_ = transform(img)
-    '''
-    def __call__(self, img:Image.Image):
-        img = np.array(img.convert('L'))
-        img = img[np.newaxis, :]
-        return np.array((img / 255.0), dtype = np.float32)
-
-class RandomCrop:
-    '''
-    Class for randomly cropping the input image.
-
-    Args::
-
-    [in] size(tuple or int): the size want to crop.
-
-    Example::
-
-        transform = transform.RandomCrop(128)
-        img_ = transform(img)
-    '''
-    def __init__(self, size):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        self.size = size
-    def __call__(self, img:Image.Image):
-        width, height = img.size
-        assert self.size[0] <= height and self.size[1] <= width, f"crop size exceeds the input image in RandomCrop"
-        top = np.random.randint(0,height-self.size[0]+1)
-        left = np.random.randint(0,width-self.size[1]+1)
-        return crop(img, top, left, self.size[0], self.size[1])
-        
\ No newline at end of file
+from .transform import *
diff --git a/python/jittor/transform/function_pil.py b/python/jittor/transform/function_pil.py
new file mode 100644
index 00000000..c4bc3799
--- /dev/null
+++ b/python/jittor/transform/function_pil.py
@@ -0,0 +1,334 @@
+# ***************************************************************
+# Copyright (c) 2020 Jittor.
+# Authors:
+#     Xin Yao <yaox12@outlook.com>
+# All Rights Reserved.
+# This file is subject to the terms and conditions defined in
+# file 'LICENSE.txt', which is part of this source code package.
+# ***************************************************************
+
+from typing import Sequence
+from PIL import Image, ImageOps, ImageEnhance
+import numpy as np
+
+
+def _is_pil_image(img):
+    return isinstance(img, Image.Image)
+
+def _get_image_size(img):
+    if _is_pil_image(img):
+        return img.size
+    raise TypeError(f"Unexpected type {img}")
+
+def hflip(img):
+    """
+    Function for horizontally flipping the given image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.hflip(img)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    return img.transpose(Image.FLIP_LEFT_RIGHT)
+
+
+def vflip(img):
+    """
+    Function for vertically flipping the given image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.vflip(img)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    return img.transpose(Image.FLIP_TOP_BOTTOM)
+
+
+def adjust_brightness(img, brightness_factor):
+    """
+    Function for adjusting brightness of an RGB image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] brightness_factor (float):  How much to adjust the brightness.
+             Can be any non negative number. 0 gives a black image, 1 gives the
+             original image while 2 increases the brightness by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Brightness adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_brightness(img, 0.5)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    enhancer = ImageEnhance.Brightness(img)
+    img = enhancer.enhance(brightness_factor)
+    return img
+
+
+def adjust_contrast(img, contrast_factor):
+    """
+    Function for adjusting contrast of an image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] contrast_factor (float): How much to adjust the contrast.
+             Can be any non negative number. 0 gives a solid gray image,
+             1 gives the original image while 2 increases the contrast by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Contrast adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_contrast(img, 0.5)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    enhancer = ImageEnhance.Contrast(img)
+    img = enhancer.enhance(contrast_factor)
+    return img
+
+
+def adjust_saturation(img, saturation_factor):
+    """
+    Function for adjusting saturation of an image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] saturation_factor (float):  How much to adjust the saturation.
+             0 will give a black and white image, 1 will give the original image
+             while 2 will enhance the saturation by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Saturation adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_saturation(img, 0.5)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    enhancer = ImageEnhance.Color(img)
+    img = enhancer.enhance(saturation_factor)
+    return img
+
+
+def adjust_hue(img, hue_factor):
+    """
+    Function for adjusting hue of an image.
+
+    The image hue is adjusted by converting the image to HSV and
+    cyclically shifting the intensities in the hue channel (H).
+    The image is then converted back to original image mode.
+
+    `hue_factor` is the amount of shift in H channel and must be in the
+    interval `[-0.5, 0.5]`.
+
+    See `Hue`_ for more details.
+
+    .. _Hue: https://en.wikipedia.org/wiki/Hue
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] hue_factor (float):  How much to shift the hue channel.
+             Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of
+             hue channel in HSV space in positive and negative direction respectively.
+             0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+             with complementary colors while 0 gives the original image.
+
+    Returns::
+        [out] PIL Image.Image: Saturation adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_hue(img, 0.1)
+    """
+    if not(-0.5 <= hue_factor <= 0.5):
+        raise ValueError(f'hue_factor ({hue_factor}) is not in [-0.5, 0.5].')
+
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    input_mode = img.mode
+    if input_mode in {'L', '1', 'I', 'F'}:
+        return img
+
+    h, s, v = img.convert('HSV').split()
+
+    np_h = np.array(h, dtype=np.uint8)
+    # uint8 addition take cares of rotation across boundaries
+    with np.errstate(over='ignore'):
+        np_h += np.uint8(hue_factor * 255)
+    h = Image.fromarray(np_h, 'L')
+
+    img = Image.merge('HSV', (h, s, v)).convert(input_mode)
+    return img
+
+
+def adjust_gamma(img, gamma, gain=1):
+    """
+    Function for performing gamma correction on an image.
+
+    Also known as Power Law Transform. Intensities in RGB mode are adjusted
+    based on the following equation:
+
+    .. math::
+        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+    See `Gamma Correction`_ for more details.
+
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+             gamma larger than 1 make the shadows darker,
+             while gamma smaller than 1 make dark regions lighter.
+        [in] gain (float): The constant multiplier.
+
+    Returns::
+        [out] PIL Image.Image: Gamma adjusted image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    if gamma < 0:
+        raise ValueError('Gamma should be a non-negative real number')
+
+    input_mode = img.mode
+    img = img.convert('RGB')
+    gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255., gamma) for ele in range(256)] * 3
+    img = img.point(gamma_map)  # use PIL's point-function to accelerate this part
+
+    img = img.convert(input_mode)
+    return img
+
+
+def crop(img, top, left, height, width):
+    """
+    Function for cropping image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] top(int): the top boundary of the cropping box.
+        [in] left(int): the left boundary of the cropping box.
+        [in] height(int): height of the cropping box.
+        [in] width(int): width of the cropping box.
+
+    Returns::
+        [out] PIL Image.Image: Cropped image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.crop(img, 10, 10, 100, 100)
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    return img.crop((left, top, left + width, top + height))
+
+
+def resize(img, size, interpolation=Image.BILINEAR):
+    """
+    Function for resizing the input image to the given size.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] size(sequence or int): Desired output size. If size is a sequence like
+             (h, w), the output size will be matched to this. If size is an int,
+             the smaller edge of the image will be matched to this number maintaining
+             the aspect ratio. If a tuple or list of length 1 is provided, it is
+             interpreted as a single int.
+        [in] interpolation(int, optional): type of interpolation. default: PIL.Image.BILINEAR
+
+    Returns::
+        [out] PIL Image.Image: Resized image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.resize(img, (100, 100))
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+    if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
+        raise TypeError(f'Got inappropriate size arg: {size}')
+
+    if isinstance(size, int) or len(size) == 1:
+        if isinstance(size, Sequence):
+            size = size[0]
+        w, h = img.size
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            return img.resize((ow, oh), interpolation)
+        else:
+            oh = size
+            ow = int(size * w / h)
+            return img.resize((ow, oh), interpolation)
+    else:
+        return img.resize(size[::-1], interpolation)
+
+
+def to_grayscale(img, num_output_channels):
+    """
+    Function for converting PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
+
+    Returns::
+        [out] PIL Image: Grayscale version of the image.
+              if num_output_channels = 1 : returned image is single channel
+              if num_output_channels = 3 : returned image is 3 channel with r = g = b
+    """
+    if not _is_pil_image(img):
+        raise TypeError(f'img should be PIL Image. Got {type(img)}')
+
+    if num_output_channels == 1:
+        img = img.convert('L')
+    elif num_output_channels == 3:
+        img = img.convert('L')
+        np_img = np.array(img, dtype=np.uint8)
+        np_img = np.dstack([np_img, np_img, np_img])
+        img = Image.fromarray(np_img, 'RGB')
+    else:
+        raise ValueError('num_output_channels should be either 1 or 3')
+
+    return img
diff --git a/python/jittor/transform/transform.py b/python/jittor/transform/transform.py
new file mode 100644
index 00000000..87e017b9
--- /dev/null
+++ b/python/jittor/transform/transform.py
@@ -0,0 +1,602 @@
+# ***************************************************************
+# Copyright (c) 2020 Jittor.
+# Authors:
+#     Xin Yao <yaox12@outlook.com>
+#     Dun Liang <randonlang@gmail.com>. 
+# All Rights Reserved.
+# This file is subject to the terms and conditions defined in
+# file 'LICENSE.txt', which is part of this source code package.
+# ***************************************************************
+from PIL import Image
+import random
+import math
+import numpy as np
+import warnings
+from collections.abc import Sequence, Mapping
+
+from . import function_pil as F_pil
+
+__all__ = ["hflip", "vflip", "adjust_brightness", "adjust_contrast", "adjust_saturation", "adjust_hue", "adjust_gamma",
+           "crop", "resize", "to_grayscale", "center_crop", "crop_and_resize", "to_tensor", "image_normalize",
+           "Crop", "RandomCropAndResize", "RandomHorizontalFlip", "CenterCrop", "ImageNormalize", "Compose",
+           "Resize", "Gray", "RandomCrop",]
+
+
+def _get_image_size(img):
+    """
+    Return image size as (w, h)
+    """
+    return F_pil._get_image_size(img)
+
+
+def hflip(img):
+    """
+    Function for horizontally flipping the given image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.hflip(img)
+    """
+    return F_pil.hflip(img)
+
+
+def vflip(img):
+    """
+    Function for vertically flipping the given image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.vflip(img)
+    """
+    return F_pil.vflip(img)
+
+
+def adjust_brightness(img, brightness_factor):
+    """
+    Function for adjusting brightness of an RGB image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] brightness_factor (float):  How much to adjust the brightness.
+             Can be any non negative number. 0 gives a black image, 1 gives the
+             original image while 2 increases the brightness by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Brightness adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_brightness(img, 0.5)
+    """
+    return F_pil.adjust_brightness(img, brightness_factor)
+
+
+def adjust_contrast(img, contrast_factor):
+    """
+    Function for adjusting contrast of an image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] contrast_factor (float): How much to adjust the contrast.
+             Can be any non negative number. 0 gives a solid gray image,
+             1 gives the original image while 2 increases the contrast by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Contrast adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_contrast(img, 0.5)
+    """
+    return F_pil.adjust_contrast(img, contrast_factor)
+
+
+def adjust_saturation(img, saturation_factor):
+    """
+    Function for adjusting saturation of an image.
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] saturation_factor (float):  How much to adjust the saturation.
+             0 will give a black and white image, 1 will give the original image
+             while 2 will enhance the saturation by a factor of 2.
+
+    Returns::
+        [out] PIL Image.Image: Saturation adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_saturation(img, 0.5)
+    """
+    return F_pil.adjust_saturation(img, saturation_factor)
+
+
+def adjust_hue(img, hue_factor):
+    """
+    Function for adjusting hue of an image.
+
+    The image hue is adjusted by converting the image to HSV and
+    cyclically shifting the intensities in the hue channel (H).
+    The image is then converted back to original image mode.
+
+    `hue_factor` is the amount of shift in H channel and must be in the
+    interval `[-0.5, 0.5]`.
+
+    See `Hue`_ for more details.
+
+    .. _Hue: https://en.wikipedia.org/wiki/Hue
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] hue_factor (float):  How much to shift the hue channel.
+             Should be in [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of
+             hue channel in HSV space in positive and negative direction respectively.
+             0 means no shift. Therefore, both -0.5 and 0.5 will give an image
+             with complementary colors while 0 gives the original image.
+
+    Returns::
+        [out] PIL Image.Image: Saturation adjusted image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.adjust_hue(img, 0.1)
+    """
+    return F_pil.adjust_hue(img, hue_factor)
+
+
+def adjust_gamma(img, gamma, gain=1):
+    """
+    Function for performing gamma correction on an image.
+
+    Also known as Power Law Transform. Intensities in RGB mode are adjusted
+    based on the following equation:
+
+    .. math::
+        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+    See `Gamma Correction`_ for more details.
+
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+    Args::
+
+        [in] img (PIL Image.Image): Image to be adjusted.
+        [in] gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+             gamma larger than 1 make the shadows darker,
+             while gamma smaller than 1 make dark regions lighter.
+        [in] gain (float): The constant multiplier.
+
+    Returns::
+        [out] PIL Image.Image: Gamma adjusted image.
+    """
+    return F_pil.adjust_gamma(img, gamma, gain)
+
+
+def crop(img, top, left, height, width):
+    """
+    Function for cropping image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] top(int): the top boundary of the cropping box.
+        [in] left(int): the left boundary of the cropping box.
+        [in] height(int): height of the cropping box.
+        [in] width(int): width of the cropping box.
+
+    Returns::
+        [out] PIL Image.Image: Cropped image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.crop(img, 10, 10, 100, 100)
+    """
+    return F_pil.crop(img, top, left, height, width)
+
+
+def resize(img, size, interpolation=Image.BILINEAR):
+    """
+    Function for resizing the input image to the given size.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] size(sequence or int): Desired output size. If size is a sequence like
+             (h, w), the output size will be matched to this. If size is an int,
+             the smaller edge of the image will be matched to this number maintaining
+             the aspect ratio. If a tuple or list of length 1 is provided, it is
+             interpreted as a single int.
+        [in] interpolation(int, optional): type of interpolation. default: PIL.Image.BILINEAR
+
+    Returns::
+        [out] PIL Image.Image: Resized image.
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.resize(img, (100, 100))
+    """
+    return F_pil.resize(img, size, interpolation)
+
+
+def to_grayscale(img, num_output_channels):
+    """
+    Function for converting PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
+
+    Returns::
+        [out] PIL Image: Grayscale version of the image.
+              if num_output_channels = 1 : returned image is single channel
+              if num_output_channels = 3 : returned image is 3 channel with r = g = b
+    """
+    return F_pil.to_grayscale(img, num_output_channels)
+
+
+def center_crop(img, output_size):
+    """
+    Function for cropping the given image at the center.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] output_size (sequence or int): (height, width) of the crop box.
+             If int or sequence with single int, it is used for both directions.
+
+    Returns::
+        PIL Image.Image: Cropped image.
+    """
+    if isinstance(output_size, int):
+        output_size = (int(output_size), int(output_size))
+    elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
+        output_size = (output_size[0], output_size[0])
+
+    image_width, image_height = _get_image_size(img)
+    crop_height, crop_width = output_size
+
+    crop_top = int(round((image_height - crop_height) / 2.))
+    crop_left = int(round((image_width - crop_width) / 2.))
+    return crop(img, crop_top, crop_left, crop_height, crop_width)
+
+
+def crop_and_resize(img, top, left, height, width, size, interpolation=Image.BILINEAR):
+    """
+    Function for cropping and resizing image.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+        [in] top(int): the top boundary of the cropping box.
+        [in] left(int): the left boundary of the cropping box.
+        [in] height(int): height of the cropping box.
+        [in] width(int): width of the cropping box.
+        [in] size: resize size.
+        [in] interpolation(int): type of resize. default: PIL.Image.BILINEAR
+
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.resize(img, 10，10，200，200，100)
+    """
+    img = crop(img, top, left, height, width)
+    img = resize(img, size, interpolation)
+    return img
+
+
+def to_tensor(img):
+    """
+    Function for turning Image.Image to jt.array.
+
+    Args::
+
+        [in] img(PIL Image.Image): Input image.
+    
+    Example::
+        
+        img = Image.open(...)
+        img_ = transform.to_tensor(img)
+    """
+    # todo: handle image with various modes
+    if isinstance(img, Image.Image):
+        return np.array(img).transpose((2, 0, 1)) / np.float32(255)
+    return img
+
+
+def image_normalize(img, mean, std):
+    """
+    Function for normalizing image.
+
+        Class for normalizing the input image.
+
+    Args::
+
+    [in] image(PIL Image.Image or np.ndarray): input image.
+         If type of input image is np.ndarray, it should be in shape (C, H, W). 
+    [in] mean(list): the mean value of Normalization.
+    [in] std(list): the std value of Normalization.
+
+    Example::
+        img = Image.open(...)
+        img_ = transform.image_normalize(img, mean=[0.5], std=[0.5])
+    """
+    if isinstance(img, Image.Image):
+        img = (np.array(img).transpose((2, 0, 1)) \
+               - mean * np.float32(255.)) \
+               / (std * np.float32(255.))
+    else:
+        img = (img - mean) / std
+    return img
+
+
+class Crop:
+    """Crop and the PIL Image to given size.
+
+    Args:
+
+        * top(int): top pixel indexes
+        * left(int): left pixel indexes
+        * height(int): image height
+        * width(int): image width
+    """
+    def __init__(self, top, left, height, width):
+        self.top = top
+        self.left = left
+        self.height = height
+        self.width = width
+
+    def __call__(self, img):
+        return crop(img, self.top, self.left, self.height, self.width)
+
+
+class RandomCropAndResize:
+    """Random crop and resize the given PIL Image to given size.
+
+    Args::
+
+        [in] size(int or tuple): width and height of the output image.
+        [in] scale(tuple): range of scale ratio of the area.
+        [in] ratio(tuple): range of aspect ratio.
+        [in] interpolation: type of resize. default: PIL.Image.BILINEAR.
+
+    Example::
+
+        transform = transform.RandomCropAndResize(224)
+        img_ = transform(img)
+    """
+    def __init__(self, size, scale:tuple=(0.08, 1.0), ratio:tuple=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
+        if isinstance(size, int):
+            size = (size, size)
+        assert isinstance(size, tuple)
+        assert scale[0] <= scale[1] and ratio[0] <= ratio[1]
+
+        self.size = size
+        self.scale = scale
+        self.ratio = ratio
+        self.interpolation = interpolation
+
+    def __call__(self, img:Image.Image):
+        width, height = img.size
+        scale = self.scale
+        ratio = self.ratio
+        area = height * width
+
+        for _ in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if 0 < w <= width and 0 < h <= height:
+                i = random.randint(0, height - h)
+                j = random.randint(0, width - w)
+                break
+        else:
+            # Fallback to central crop
+            in_ratio = float(width) / float(height)
+            if in_ratio < min(ratio):
+                w = width
+                h = int(round(w / min(ratio)))
+            elif in_ratio > max(ratio):
+                h = height
+                w = int(round(h * max(ratio)))
+            else:
+                w = width
+                h = height
+            i = (height - h) // 2
+            j = (width - w) // 2
+        return crop_and_resize(img, i, j, h, w, self.size, self.interpolation)
+
+
+class RandomHorizontalFlip:
+    """
+    Random flip the image horizontally.
+
+    Args::
+
+        [in] p(float): The probability of image flip, default: 0.5.
+
+    Example::
+
+        transform = transform.RandomHorizontalFlip(0.6)
+        img_ = transform(img)
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img: Image.Image):
+        if random.random() < self.p:
+            return hflip(img)
+        return img
+
+
+class CenterCrop:
+    '''
+    Class for cropping image centrally.
+
+    Args::
+
+    [in] size(int or tuple): Size want to crop.
+
+    Example::
+
+        transform = transform.CenterCrop(224)
+        img_ = transform(img)
+    '''
+    def __init__(self, size):
+        if isinstance(size, int):
+            size = (int(size), int(size))
+        elif isinstance(size, (tuple, list)) and len(size) == 1:
+            size = (size[0], size[0])
+        self.size = size
+
+    def __call__(self, img: Image.Image):
+        return center_crop(img, self.size)
+
+
+class ImageNormalize:
+    '''
+    Class for normalizing the input image.
+
+    Args::
+
+    [in] mean(list): the mean value of Normalization.
+    [in] std(list): the std value of Normalization.
+
+    Example::
+
+        transform = transform.ImageNormalize(mean=[0.5], std=[0.5])
+        img_ = transform(img)
+    '''
+
+    def __init__(self, mean, std):
+        self.mean = np.float32(mean).reshape(-1, 1, 1)
+        self.std = np.float32(std).reshape(-1, 1, 1)
+        
+    def __call__(self, img):
+        return image_normalize(img, self.mean, self.std)
+
+
+class Compose:
+    '''
+    Base class for combining various transformations.
+
+    Args::
+
+    [in] transforms(list): a list of transform.
+
+    Example::
+
+        transform = transform.Compose([
+            transform.Resize(opt.img_size),
+            transform.Gray(),
+            transform.ImageNormalize(mean=[0.5], std=[0.5]),
+        ])
+        img_ = transform(img)
+    '''
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, data):
+        for t in self.transforms:
+            data = t(data)
+        return data
+
+
+class Resize:
+    '''
+    Class for resizing image.
+
+    Args::
+
+    [in] size(int or tuple): Size want to resize.
+    [in] interpolation(int): type of resize.
+
+    Example::
+
+        transform = transform.Resize(224)
+        img_ = transform(img)
+    '''
+    def __init__(self, size, interpolation=Image.BILINEAR):
+        if isinstance(size, int):
+            size = (size, size)
+        assert isinstance(size, tuple)
+        self.size = size
+        self.interpolation = interpolation
+
+    def __call__(self, img: Image.Image):
+        return resize(img, self.size, self.interpolation)
+
+
+class Gray:
+    '''
+    Convert image to grayscale.
+
+    Example::
+
+        transform = transform.Gray()
+        img_ = transform(img)
+    '''
+    def __init__(self, num_output_channels=1):
+        self.num_output_channels = num_output_channels
+
+    def __call__(self, img: Image.Image):
+        return to_grayscale(img, self.num_output_channels)
+
+
+class RandomCrop:
+    '''
+    Class for randomly cropping the input image.
+
+    Args::
+
+    [in] size(tuple or int): the size want to crop.
+
+    Example::
+
+        transform = transform.RandomCrop(128)
+        img_ = transform(img)
+    '''
+    def __init__(self, size):
+        if isinstance(size, int):
+            size = (size, size)
+        assert isinstance(size, tuple)
+        self.size = size
+
+    def __call__(self, img: Image.Image):
+        width, height = _get_image_size(img)
+        assert self.size[0] <= height and self.size[1] <= width, f"crop size exceeds the input image in RandomCrop"
+        top = np.random.randint(0, height - self.size[0] + 1)
+        left = np.random.randint(0, width - self.size[1] + 1)
+        return crop(img, top, left, self.size[0], self.size[1])
+
+
+class ToTensor:
+    """
+    Convert PIL Image to jt.array.
+    """
+    def __call__(self, img: Image.Image):
+        return to_tensor(img)

From 3d07af7917d5533686c2386d9d8443e66ebf45ab Mon Sep 17 00:00:00 2001
From: yaox12 <yaox12@outlook.com>
Date: Sat, 3 Oct 2020 16:59:56 +0800
Subject: [PATCH 02/16] add more transforming modules

---
 python/jittor/transform/function_pil.py |  16 ++
 python/jittor/transform/transform.py    | 283 ++++++++++++++++++++++--
 2 files changed, 276 insertions(+), 23 deletions(-)

diff --git a/python/jittor/transform/function_pil.py b/python/jittor/transform/function_pil.py
index c4bc3799..5f681806 100644
--- a/python/jittor/transform/function_pil.py
+++ b/python/jittor/transform/function_pil.py
@@ -15,11 +15,19 @@ import numpy as np
 def _is_pil_image(img):
     return isinstance(img, Image.Image)
 
+
 def _get_image_size(img):
     if _is_pil_image(img):
         return img.size
     raise TypeError(f"Unexpected type {img}")
 
+
+def _get_image_num_channels(img):
+    if _is_pil_image(img):
+        return 1 if img.mode == 'L' else 3
+    raise TypeError(f"Unexpected type {img}")
+
+
 def hflip(img):
     """
     Function for horizontally flipping the given image.
@@ -70,6 +78,7 @@ def adjust_brightness(img, brightness_factor):
              original image while 2 increases the brightness by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Brightness adjusted image.
 
     Example::
@@ -97,6 +106,7 @@ def adjust_contrast(img, contrast_factor):
              1 gives the original image while 2 increases the contrast by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Contrast adjusted image.
 
     Example::
@@ -124,6 +134,7 @@ def adjust_saturation(img, saturation_factor):
              while 2 will enhance the saturation by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Saturation adjusted image.
 
     Example::
@@ -164,6 +175,7 @@ def adjust_hue(img, hue_factor):
              with complementary colors while 0 gives the original image.
 
     Returns::
+
         [out] PIL Image.Image: Saturation adjusted image.
 
     Example::
@@ -216,6 +228,7 @@ def adjust_gamma(img, gamma, gain=1):
         [in] gain (float): The constant multiplier.
 
     Returns::
+
         [out] PIL Image.Image: Gamma adjusted image.
     """
     if not _is_pil_image(img):
@@ -246,6 +259,7 @@ def crop(img, top, left, height, width):
         [in] width(int): width of the cropping box.
 
     Returns::
+
         [out] PIL Image.Image: Cropped image.
 
     Example::
@@ -274,6 +288,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
         [in] interpolation(int, optional): type of interpolation. default: PIL.Image.BILINEAR
 
     Returns::
+
         [out] PIL Image.Image: Resized image.
 
     Example::
@@ -314,6 +329,7 @@ def to_grayscale(img, num_output_channels):
         [in] num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
 
     Returns::
+
         [out] PIL Image: Grayscale version of the image.
               if num_output_channels = 1 : returned image is single channel
               if num_output_channels = 3 : returned image is 3 channel with r = g = b
diff --git a/python/jittor/transform/transform.py b/python/jittor/transform/transform.py
index 87e017b9..6c226bc1 100644
--- a/python/jittor/transform/transform.py
+++ b/python/jittor/transform/transform.py
@@ -7,9 +7,10 @@
 # This file is subject to the terms and conditions defined in
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
-from PIL import Image
+import numbers
 import random
 import math
+from PIL import Image
 import numpy as np
 import warnings
 from collections.abc import Sequence, Mapping
@@ -19,7 +20,8 @@ from . import function_pil as F_pil
 __all__ = ["hflip", "vflip", "adjust_brightness", "adjust_contrast", "adjust_saturation", "adjust_hue", "adjust_gamma",
            "crop", "resize", "to_grayscale", "center_crop", "crop_and_resize", "to_tensor", "image_normalize",
            "Crop", "RandomCropAndResize", "RandomHorizontalFlip", "CenterCrop", "ImageNormalize", "Compose",
-           "Resize", "Gray", "RandomCrop",]
+           "Resize", "Gray", "RandomGray", "RandomCrop", "ToTensor", "Lambda", "RandomApply", "RandomOrder",
+           "RandomChoice", "RandomVerticalFlip", "ColorJitter"]
 
 
 def _get_image_size(img):
@@ -28,6 +30,8 @@ def _get_image_size(img):
     """
     return F_pil._get_image_size(img)
 
+def _get_image_num_channels(img):
+    return F_pil._get_image_num_channels(img)
 
 def hflip(img):
     """
@@ -73,6 +77,7 @@ def adjust_brightness(img, brightness_factor):
              original image while 2 increases the brightness by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Brightness adjusted image.
 
     Example::
@@ -95,6 +100,7 @@ def adjust_contrast(img, contrast_factor):
              1 gives the original image while 2 increases the contrast by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Contrast adjusted image.
 
     Example::
@@ -117,6 +123,7 @@ def adjust_saturation(img, saturation_factor):
              while 2 will enhance the saturation by a factor of 2.
 
     Returns::
+
         [out] PIL Image.Image: Saturation adjusted image.
 
     Example::
@@ -152,6 +159,7 @@ def adjust_hue(img, hue_factor):
              with complementary colors while 0 gives the original image.
 
     Returns::
+
         [out] PIL Image.Image: Saturation adjusted image.
 
     Example::
@@ -185,6 +193,7 @@ def adjust_gamma(img, gamma, gain=1):
         [in] gain (float): The constant multiplier.
 
     Returns::
+
         [out] PIL Image.Image: Gamma adjusted image.
     """
     return F_pil.adjust_gamma(img, gamma, gain)
@@ -203,6 +212,7 @@ def crop(img, top, left, height, width):
         [in] width(int): width of the cropping box.
 
     Returns::
+
         [out] PIL Image.Image: Cropped image.
 
     Example::
@@ -228,6 +238,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
         [in] interpolation(int, optional): type of interpolation. default: PIL.Image.BILINEAR
 
     Returns::
+
         [out] PIL Image.Image: Resized image.
 
     Example::
@@ -248,6 +259,7 @@ def to_grayscale(img, num_output_channels):
         [in] num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
 
     Returns::
+
         [out] PIL Image: Grayscale version of the image.
               if num_output_channels = 1 : returned image is single channel
               if num_output_channels = 3 : returned image is 3 channel with r = g = b
@@ -266,12 +278,11 @@ def center_crop(img, output_size):
              If int or sequence with single int, it is used for both directions.
 
     Returns::
+
         PIL Image.Image: Cropped image.
     """
-    if isinstance(output_size, int):
-        output_size = (int(output_size), int(output_size))
-    elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
-        output_size = (output_size[0], output_size[0])
+
+    output_size = _setup_size(output_size, error_msg="If size is a sequence, it should have 2 values")
 
     image_width, image_height = _get_image_size(img)
     crop_height, crop_width = output_size
@@ -386,12 +397,9 @@ class RandomCropAndResize:
         img_ = transform(img)
     """
     def __init__(self, size, scale:tuple=(0.08, 1.0), ratio:tuple=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
+        self.size = _setup_size(size, error_msg="If size is a sequence, it should have 2 values")
         assert scale[0] <= scale[1] and ratio[0] <= ratio[1]
 
-        self.size = size
         self.scale = scale
         self.ratio = ratio
         self.interpolation = interpolation
@@ -467,11 +475,7 @@ class CenterCrop:
         img_ = transform(img)
     '''
     def __init__(self, size):
-        if isinstance(size, int):
-            size = (int(size), int(size))
-        elif isinstance(size, (tuple, list)) and len(size) == 1:
-            size = (size[0], size[0])
-        self.size = size
+        self.size = _setup_size(size, error_msg="If size is a sequence, it should have 2 values")
 
     def __call__(self, img: Image.Image):
         return center_crop(img, self.size)
@@ -541,10 +545,7 @@ class Resize:
         img_ = transform(img)
     '''
     def __init__(self, size, interpolation=Image.BILINEAR):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        self.size = size
+        self.size = _setup_size(size, error_msg="If size is a sequence, it should have 2 values")
         self.interpolation = interpolation
 
     def __call__(self, img: Image.Image):
@@ -567,6 +568,35 @@ class Gray:
         return to_grayscale(img, self.num_output_channels)
 
 
+class RandomGray:
+    '''
+    Randomly convert image to grayscale.
+
+    Args::
+        [in] p (float): probability that image should be converted to grayscale, default: 0.1
+
+    Returns::
+
+        [out] PIL Image: Grayscale version of the image with probability p and unchanged
+              with probability (1-p).
+              - If input image is 1 channel: grayscale version is 1 channel
+              - If input image is 3 channel: grayscale version is 3 channel with r == g == b
+
+    Example::
+
+        transform = transform.Gray()
+        img_ = transform(img)
+    '''
+    def __init__(self, p=0.1):
+        self.p = p
+
+    def __call__(self, img: Image.Image):
+        num_output_channels = _get_image_num_channels(img)
+        if random.random() < self.p:
+            return to_grayscale(img, num_output_channels=num_output_channels)
+        return img
+
+
 class RandomCrop:
     '''
     Class for randomly cropping the input image.
@@ -581,10 +611,7 @@ class RandomCrop:
         img_ = transform(img)
     '''
     def __init__(self, size):
-        if isinstance(size, int):
-            size = (size, size)
-        assert isinstance(size, tuple)
-        self.size = size
+        self.size = _setup_size(size, error_msg="If size is a sequence, it should have 2 values")
 
     def __call__(self, img: Image.Image):
         width, height = _get_image_size(img)
@@ -600,3 +627,213 @@ class ToTensor:
     """
     def __call__(self, img: Image.Image):
         return to_tensor(img)
+
+
+class Lambda:
+    """
+    Apply a user-defined lambda as a transform.
+
+    Args::
+
+        [in] lambd (function): Lambda/function to be used for transform.
+    """
+
+    def __init__(self, lambd):
+        if not callable(lambd):
+            raise TypeError(f"Argument lambd should be callable, got {repr(type(lambd).__name__)}")
+        self.lambd = lambd
+
+    def __call__(self, img):
+        return self.lambd(img)
+
+
+class RandomApply:
+    """
+    Apply randomly a list of transformations with a given probability
+
+    Args::
+        [in] transforms (list or tuple): list of transformations
+        [in] p (float): probability
+    """
+
+    def __init__(self, transforms, p=0.5):
+        assert isinstance(transforms, (list, tuple))
+        self.transforms = transforms
+        self.p = p
+
+    def __call__(self, img):
+        if self.p < random.random():
+            return img
+        for t in self.transforms:
+            img = t(img)
+        return img
+
+
+class RandomOrder:
+    """
+    Apply a list of transformations in a random order.
+
+    Args::
+        [in] transforms (list or tuple): list of transformations
+        [in] p (float): probability
+    """
+
+    def __init__(self, transforms):
+        assert isinstance(transforms, (list, tuple))
+        self.transforms = transforms
+
+    def __call__(self, img):
+        order = list(range(len(self.transforms)))
+        random.shuffle(order)
+        for i in order:
+            img = self.transforms[i](img)
+        return img
+
+
+class RandomChoice:
+    """
+    Apply single transformation randomly picked from a list.
+
+    Args::
+        [in] transforms (list or tuple): list of transformations
+        [in] p (float): probability
+    """
+
+    def __init__(self, transforms):
+        assert isinstance(transforms, (list, tuple))
+        self.transforms = transforms
+
+    def __call__(self, img):
+        t = random.choice(self.transforms)
+        return t(img)
+
+
+class RandomVerticalFlip:
+    """
+    Random flip the image vertically.
+
+    Args::
+
+        [in] p(float): The probability of image flip, default: 0.5.
+
+    Example::
+
+        transform = transform.RandomVerticalFlip(0.6)
+        img_ = transform(img)
+    """
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img: Image.Image):
+        if random.random() < self.p:
+            return vflip(img)
+        return img
+
+
+class ColorJitter:
+    """
+    Randomly change the brightness, contrast, saturation and hue of an image.
+
+    Args::
+
+        [in] brightness (float or tuple of float (min, max)): How much to jitter brightness.
+             brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
+             or the given [min, max]. Should be non negative numbers.
+        [in] contrast (float or tuple of float (min, max)): How much to jitter contrast.
+             contrast_factor is chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
+             or the given [min, max]. Should be non negative numbers.
+        [in] saturation (float or tuple of float (min, max)): How much to jitter saturation.
+             saturation_factor is chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
+             or the given [min, max]. Should be non negative numbers.
+        [in] hue (float or tuple of float (min, max)): How much to jitter hue.
+             hue_factor is chosen uniformly from [-hue, hue] or the given [min, max].
+             Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
+    """
+
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
+        self.brightness = self._check_input(brightness, 'brightness')
+        self.contrast = self._check_input(contrast, 'contrast')
+        self.saturation = self._check_input(saturation, 'saturation')
+        self.hue = self._check_input(hue, 'hue', center=0, bound=(-0.5, 0.5),
+                                     clip_first_on_zero=False)
+
+    @staticmethod
+    def _check_input(value, name, center=1, bound=(0, float('inf')), clip_first_on_zero=True):
+        if isinstance(value, numbers.Number):
+            if value < 0:
+                raise ValueError(f"If {name} is a single number, it must be non negative.")
+            value = [center - float(value), center + float(value)]
+            if clip_first_on_zero:
+                value[0] = max(value[0], 0.0)
+        elif isinstance(value, (tuple, list)) and len(value) == 2:
+            if not bound[0] <= value[0] <= value[1] <= bound[1]:
+                raise ValueError(f"{name} values should be between {bound}")
+        else:
+            raise TypeError(f"{name} should be a single number or a list/tuple with length 2.")
+
+        # if value is 0 or (1., 1.) for brightness/contrast/saturation
+        # or (0., 0.) for hue, do nothing
+        if value[0] == value[1] == center:
+            value = None
+        return value
+
+    @staticmethod
+    def _get_transform(brightness, contrast, saturation, hue):
+        """
+        Get a randomized transform to be applied on image.
+
+        Arguments are same as that of __init__.
+
+        Returns::
+            Transform which randomly adjusts brightness, contrast, saturation
+            and hue in a random order.
+        """
+        transforms = []
+
+        if brightness is not None:
+            brightness_factor = random.uniform(brightness[0], brightness[1])
+            transforms.append(Lambda(lambda img: adjust_brightness(img, brightness_factor)))
+
+        if contrast is not None:
+            contrast_factor = random.uniform(contrast[0], contrast[1])
+            transforms.append(Lambda(lambda img: adjust_contrast(img, contrast_factor)))
+
+        if saturation is not None:
+            saturation_factor = random.uniform(saturation[0], saturation[1])
+            transforms.append(Lambda(lambda img: adjust_saturation(img, saturation_factor)))
+
+        if hue is not None:
+            hue_factor = random.uniform(hue[0], hue[1])
+            transforms.append(Lambda(lambda img: adjust_hue(img, hue_factor)))
+
+        random.shuffle(transforms)
+        transform = Compose(transforms)
+
+        return transform
+
+    def __call__(self, img):
+        """
+        Args::
+
+            [in] img (PIL Image): Input image.
+
+        Returns::
+
+            [out] PIL Image: Color jittered image.
+        """
+        transform = self._get_transform(self.brightness, self.contrast, self.saturation, self.hue)
+
+        return transform(img)
+
+
+def _setup_size(size, error_msg):
+    if isinstance(size, numbers.Number):
+        return int(size), int(size)
+
+    if isinstance(size, Sequence) and len(size) == 1:
+        return size[0], size[0]
+
+    if len(size) != 2:
+        raise ValueError(error_msg)
+
+    return size

From 513507c6a07acfff1b2f2cd8c248035b7e0bcc55 Mon Sep 17 00:00:00 2001
From: yaox12 <yaox12@outlook.com>
Date: Tue, 6 Oct 2020 16:30:41 +0800
Subject: [PATCH 03/16] add to_pil_image

---
 python/jittor/transform/transform.py | 109 +++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

diff --git a/python/jittor/transform/transform.py b/python/jittor/transform/transform.py
index 6c226bc1..4981755e 100644
--- a/python/jittor/transform/transform.py
+++ b/python/jittor/transform/transform.py
@@ -12,6 +12,7 @@ import random
 import math
 from PIL import Image
 import numpy as np
+import jittor as jt
 import warnings
 from collections.abc import Sequence, Mapping
 
@@ -335,6 +336,84 @@ def to_tensor(img):
     return img
 
 
+def to_pil_image(pic, mode=None):
+    """Convert a jt.array or an np.ndarray to PIL Image.
+
+    Args::
+
+        [in] pic (jt.array or numpy.ndarray): Image to be converted to PIL Image.
+        [in] mode (`PIL.Image mode`): color space and pixel depth of input data (optional).
+
+    Returns::
+
+        [out] PIL Image: Image converted to PIL Image.
+    """
+    if not(isinstance(pic, jt.array) or isinstance(pic, np.ndarray)):
+        raise TypeError(f'pic should be Tensor or ndarray. Got {type(pic)}.')
+
+    elif isinstance(pic, jt.array):
+        if pic.ndim not in {2, 3}:
+            raise ValueError(f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.')
+
+        elif pic.ndim == 2:
+            # if 2D image, convert to np.ndarray and add channel dimension (CHW)
+            pic = np.expand_dims(pic.data, 2)
+
+    elif isinstance(pic, np.ndarray):
+        if pic.ndim not in {2, 3}:
+            raise ValueError(f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.')
+
+        elif pic.ndim == 2:
+            # if 2D image, add channel dimension (HWC)
+            pic = np.expand_dims(pic, 2)
+
+    npimg = pic
+    if not isinstance(npimg, np.ndarray):
+        raise TypeError(f'Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}.')
+
+    if npimg.shape[2] == 1:
+        expected_mode = None
+        npimg = npimg[:, :, 0]
+        if npimg.dtype == np.uint8:
+            expected_mode = 'L'
+        elif npimg.dtype == np.int16:
+            expected_mode = 'I;16'
+        elif npimg.dtype == np.int32:
+            expected_mode = 'I'
+        elif npimg.dtype == np.float32:
+            expected_mode = 'F'
+        if mode is not None and mode != expected_mode:
+            raise ValueError(f"Incorrect mode ({mode}) supplied for input type {np.dtype}. Should be {expected_mode}.")
+        mode = expected_mode
+
+    elif npimg.shape[2] == 2:
+        permitted_2_channel_modes = ['LA']
+        if mode is not None and mode not in permitted_2_channel_modes:
+            raise ValueError(f"Only modes {permitted_2_channel_modes} are supported for 2D inputs")
+
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'LA'
+
+    elif npimg.shape[2] == 4:
+        permitted_4_channel_modes = ['RGBA', 'CMYK', 'RGBX']
+        if mode is not None and mode not in permitted_4_channel_modes:
+            raise ValueError(f"Only modes {permitted_4_channel_modes} are supported for 4D inputs")
+
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'RGBA'
+    else:
+        permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
+        if mode is not None and mode not in permitted_3_channel_modes:
+            raise ValueError(f"Only modes {permitted_3_channel_modes} are supported for 3D inputs")
+        if mode is None and npimg.dtype == np.uint8:
+            mode = 'RGB'
+
+    if mode is None:
+        raise TypeError('Input type {} is not supported'.format(npimg.dtype))
+
+    return Image.fromarray(npimg, mode=mode)
+
+
 def image_normalize(img, mean, std):
     """
     Function for normalizing image.
@@ -629,6 +708,36 @@ class ToTensor:
         return to_tensor(img)
 
 
+class ToPILImage:
+    """
+    Converts a jt.array of shape C x H x W or a numpy ndarray of shape
+    H x W x C to a PIL Image while preserving the value range.
+
+    Args::
+
+        [in] mode (`PIL.Image mode`): color space and pixel depth of input data (optional).
+             If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
+              - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
+              - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
+              - If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
+              - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``,
+                ``float``, ``short``).
+    """
+    def __init__(self, mode=None):
+        self.mode = mode
+
+    def __call__(self, pic):
+        """
+        Args::
+            [in] pic (jt.array or numpy.ndarray): Image to be converted to PIL Image.
+
+        Returns:
+
+            [out] PIL Image: Image converted to PIL Image.
+        """
+        return to_pil_image(pic, self.mode)
+
+
 class Lambda:
     """
     Apply a user-defined lambda as a transform.

From 130bc3db5aab8aecd5cfcf4a4ac7ffedb13e6b89 Mon Sep 17 00:00:00 2001
From: yaox12 <yaox12@outlook.com>
Date: Wed, 7 Oct 2020 16:13:37 +0800
Subject: [PATCH 04/16] add test for transforms

---
 python/jittor/test/test_transform.py    | 916 ++++++++++++++++++++++++
 python/jittor/transform/function_pil.py |   6 +-
 python/jittor/transform/transform.py    | 181 +++--
 3 files changed, 1041 insertions(+), 62 deletions(-)
 create mode 100644 python/jittor/test/test_transform.py

diff --git a/python/jittor/test/test_transform.py b/python/jittor/test/test_transform.py
new file mode 100644
index 00000000..e1485c7c
--- /dev/null
+++ b/python/jittor/test/test_transform.py
@@ -0,0 +1,916 @@
+import unittest
+import random
+from PIL import Image
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+import jittor as jt
+import jittor.transform as transform
+
+try:
+    from scipy import stats
+except ImportError:
+    stats = None
+
+
+class Tester(unittest.TestCase):
+
+    def test_crop(self):
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+
+        img = np.ones([3, height, width])
+        oh1 = (height - oheight) // 2
+        ow1 = (width - owidth) // 2
+        imgnarrow = img[:, oh1:oh1 + oheight, ow1:ow1 + owidth]
+        imgnarrow.fill(0)
+        img = jt.array(img)
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.CenterCrop((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        self.assertEqual(result.sum().data, 0,
+                         f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}")
+        oheight += 1
+        owidth += 1
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.CenterCrop((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        sum1 = result.sum().data
+        self.assertGreater(sum1, 1,
+                           f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}")
+        oheight += 1
+        owidth += 1
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.CenterCrop((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        sum2 = result.sum().data
+        self.assertGreater(sum2, 0,
+                           f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}")
+        self.assertGreater(sum2, sum1,
+                           f"height: {height} width: {width} oheight: {oheight} owdith: {owidth}")
+
+    def test_randomresized_params(self):
+        height = random.randint(24, 32) * 2
+        width = random.randint(24, 32) * 2
+        img = jt.ones([3, height, width])
+        to_pil_image = transform.ToPILImage()
+        img = to_pil_image(img)
+        size = 100
+        epsilon = 0.05
+        min_scale = 0.25
+        for _ in range(10):
+            scale_min = max(round(random.random(), 2), min_scale)
+            scale_range = (scale_min, scale_min + round(random.random(), 2))
+            aspect_min = max(round(random.random(), 2), epsilon)
+            aspect_ratio_range = (aspect_min, aspect_min + round(random.random(), 2))
+            randresizecrop = transform.RandomCropAndResize(size, scale_range, aspect_ratio_range)
+            i, j, h, w = randresizecrop.get_params(img, scale_range, aspect_ratio_range)
+            aspect_ratio_obtained = w / h
+            self.assertTrue((min(aspect_ratio_range) - epsilon <= aspect_ratio_obtained and
+                             aspect_ratio_obtained <= max(aspect_ratio_range) + epsilon) or
+                            aspect_ratio_obtained == 1.0)
+            self.assertIsInstance(i, int)
+            self.assertIsInstance(j, int)
+            self.assertIsInstance(h, int)
+            self.assertIsInstance(w, int)
+
+    def test_resize(self):
+        height = random.randint(24, 32) * 2
+        width = random.randint(24, 32) * 2
+        osize = random.randint(5, 12) * 2
+
+        img = jt.ones([3, height, width])
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.Resize(osize),
+            transform.ToTensor(),
+        ])(img)
+        self.assertIn(osize, result.size())
+        if height < width:
+            self.assertLessEqual(result.size(1), result.size(2))
+        elif width < height:
+            self.assertGreaterEqual(result.size(1), result.size(2))
+
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.Resize([osize, osize]),
+            transform.ToTensor(),
+        ])(img)
+        self.assertIn(osize, result.size())
+        self.assertEqual(result.size(1), osize)
+        self.assertEqual(result.size(2), osize)
+
+        oheight = random.randint(5, 12) * 2
+        owidth = random.randint(5, 12) * 2
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.Resize((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        self.assertEqual(result.size(1), oheight)
+        self.assertEqual(result.size(2), owidth)
+
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.Resize([oheight, owidth]),
+            transform.ToTensor(),
+        ])(img)
+        self.assertEqual(result.size(1), oheight)
+        self.assertEqual(result.size(2), owidth)
+
+    def test_random_crop(self):
+        height = random.randint(10, 32) * 2
+        width = random.randint(10, 32) * 2
+        oheight = random.randint(5, (height - 2) / 2) * 2
+        owidth = random.randint(5, (width - 2) / 2) * 2
+        img = jt.ones([3, height, width])
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.RandomCrop((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        self.assertEqual(result.size(1), oheight)
+        self.assertEqual(result.size(2), owidth)
+
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.RandomCrop((oheight, owidth)),
+            transform.ToTensor(),
+        ])(img)
+        self.assertEqual(result.size(1), oheight)
+        self.assertEqual(result.size(2), owidth)
+
+        result = transform.Compose([
+            transform.ToPILImage(),
+            transform.RandomCrop((height, width)),
+            transform.ToTensor()
+        ])(img)
+        self.assertEqual(result.size(1), height)
+        self.assertEqual(result.size(2), width)
+        self.assertTrue(np.allclose(img.numpy(), result.numpy()))
+
+        with self.assertRaises(AssertionError):
+            result = transform.Compose([
+                transform.ToPILImage(),
+                transform.RandomCrop((height + 1, width + 1)),
+                transform.ToTensor(),
+            ])(img)
+
+    def test_lambda(self):
+        trans = transform.Lambda(lambda x: x.add(10))
+        x = jt.random([10])
+        y = trans(x)
+        self.assertTrue(np.allclose(y.data, jt.add(x, 10).data))
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_apply(self):
+        random_state = random.getstate()
+        random.seed(42)
+        random_apply_transform = transform.RandomApply(
+            [
+                transform.RandomHorizontalFlip(),
+                transform.RandomVerticalFlip(),
+            ], p=0.4
+        )
+        img = transform.ToPILImage()(jt.random((3, 10, 10)))
+        num_samples = 250
+        num_applies = 0
+        for _ in range(num_samples):
+            out = random_apply_transform(img)
+            if out != img:
+                num_applies += 1
+
+        p_value = stats.binom_test(num_applies, num_samples, p=0.3)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_choice(self):
+        random_state = random.getstate()
+        random.seed(42)
+        random_choice_transform = transform.RandomChoice(
+            [
+                transform.Resize(15),
+                transform.Resize(20),
+                transform.CenterCrop(10)
+            ]
+        )
+        img = transform.ToPILImage()(jt.random((3, 25, 25)))
+        num_samples = 250
+        num_resize_15 = 0
+        num_resize_20 = 0
+        num_crop_10 = 0
+        for _ in range(num_samples):
+            out = random_choice_transform(img)
+            if out.size == (15, 15):
+                num_resize_15 += 1
+            elif out.size == (20, 20):
+                num_resize_20 += 1
+            elif out.size == (10, 10):
+                num_crop_10 += 1
+
+        p_value = stats.binom_test(num_resize_15, num_samples, p=0.33333)
+        self.assertGreater(p_value, 0.0001)
+        p_value = stats.binom_test(num_resize_20, num_samples, p=0.33333)
+        self.assertGreater(p_value, 0.0001)
+        p_value = stats.binom_test(num_crop_10, num_samples, p=0.33333)
+        self.assertGreater(p_value, 0.0001)
+
+        random.setstate(random_state)
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_order(self):
+        random_state = random.getstate()
+        random.seed(42)
+        random_order_transform = transform.RandomOrder(
+            [
+                transform.Resize(20),
+                transform.CenterCrop(10)
+            ]
+        )
+        img = transform.ToPILImage()(jt.random((3, 25, 25)))
+        num_samples = 250
+        num_normal_order = 0
+        resize_crop_out = transform.CenterCrop(10)(transform.Resize(20)(img))
+        for _ in range(num_samples):
+            out = random_order_transform(img)
+            if out == resize_crop_out:
+                num_normal_order += 1
+
+        p_value = stats.binom_test(num_normal_order, num_samples, p=0.5)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+    def test_to_tensor(self):
+        test_channels = [1, 3, 4]
+        height, width = 4, 4
+        trans = transform.ToTensor()
+
+        with self.assertRaises(TypeError):
+            trans(np.random.rand(1, height, width).tolist())
+
+        with self.assertRaises(ValueError):
+            trans(np.random.rand(height))
+            trans(np.random.rand(1, 1, height, width))
+
+        for channels in test_channels:
+            input_data = jt.array(np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)).float().divide(255)
+            img = transform.ToPILImage()(input_data)
+            output = trans(img)
+            self.assertTrue(np.allclose(input_data.data, output.data))
+
+            ndarray = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8)
+            output = trans(ndarray)
+            expected_output = ndarray.transpose((2, 0, 1)) / 255.0
+            self.assertTrue(np.allclose(output.numpy(), expected_output))
+
+            ndarray = np.random.rand(height, width, channels).astype(np.float32)
+            output = trans(ndarray)
+            expected_output = ndarray.transpose((2, 0, 1))
+            self.assertTrue(np.allclose(output.numpy(), expected_output))
+
+        # separate test for mode '1' PIL images
+        input_data = jt.array(np.random.binomial(1, 0.5, size=(1, height, width)).astype(np.uint8))
+        img = transform.ToPILImage()(input_data.multiply(255)).convert('1')
+        output = trans(img)
+        self.assertTrue(np.allclose(input_data.numpy(), output.numpy()))
+
+    def test_1_channel_tensor_to_pil_image(self):
+        to_tensor = transform.ToTensor()
+
+        img_data_float = jt.array(np.random.rand(1, 4, 4), dtype='float32')
+        img_data_byte = jt.array(np.random.randint(0, 255, (1, 4, 4)), dtype='uint8')
+        img_data_short = jt.array(np.random.randint(0, 32767, (1, 4, 4)), dtype='int16')
+        img_data_int = jt.array(np.random.randint(0, 2147483647, (1, 4, 4)), dtype='int32')
+
+        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
+        expected_outputs = [img_data_float.multiply(255).int().float().divide(255).numpy(),
+                            img_data_byte.float().divide(255.0).numpy(),
+                            img_data_short.numpy(),
+                            img_data_int.numpy()]
+        expected_modes = ['L', 'L', 'I;16', 'I']
+
+        for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
+            for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]:
+                img = t(img_data)
+                self.assertEqual(img.mode, mode)
+                self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
+        # 'F' mode for torch.FloatTensor
+        img_F_mode = transform.ToPILImage(mode='F')(img_data_float)
+        self.assertEqual(img_F_mode.mode, 'F')
+        self.assertTrue(np.allclose(np.array(Image.fromarray(img_data_float.squeeze(0).numpy(), mode='F')),
+                                    np.array(img_F_mode)))
+
+    def test_1_channel_ndarray_to_pil_image(self):
+        img_data_float = np.random.rand(4, 4, 1).astype(np.float32)
+        img_data_byte = np.random.randint(0, 255, (4, 4, 1)).astype(np.uint8)
+        img_data_short = np.random.randint(0, 32767, (4, 4, 1)).astype(np.int16)
+        img_data_int = np.random.randint(0, 2147483647, (4, 4, 1)).astype(np.int32)
+
+        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
+        expected_modes = ['F', 'L', 'I;16', 'I']
+        for img_data, mode in zip(inputs, expected_modes):
+            for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]:
+                img = t(img_data)
+                self.assertEqual(img.mode, mode)
+                self.assertTrue(np.allclose(img_data[:, :, 0], img))
+
+    def test_2_channel_ndarray_to_pil_image(self):
+        def verify_img_data(img_data, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'LA')  # default should assume LA
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+            split = img.split()
+            for i in range(2):
+                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
+
+        img_data = np.random.randint(0, 255, (4, 4, 2)).astype(np.uint8)
+        for mode in [None, 'LA']:
+            verify_img_data(img_data, mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 4 or 1 or 3 channel images
+            transform.ToPILImage(mode='RGBA')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='RGB')(img_data)
+
+    def test_2_channel_tensor_to_pil_image(self):
+        def verify_img_data(img_data, expected_output, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'LA')  # default should assume LA
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+            split = img.split()
+            for i in range(2):
+                self.assertTrue(np.allclose(expected_output[i].numpy(), transform.to_tensor(split[i]).numpy()))
+
+        img_data = jt.random((2, 4, 4))
+        expected_output = img_data.multiply(255).int().float().divide(255)
+        for mode in [None, 'LA']:
+            verify_img_data(img_data, expected_output, mode=mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 4 or 1 or 3 channel images
+            transform.ToPILImage(mode='RGBA')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='RGB')(img_data)
+
+    def test_3_channel_tensor_to_pil_image(self):
+        def verify_img_data(img_data, expected_output, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'RGB')  # default should assume RGB
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+            split = img.split()
+            for i in range(3):
+                self.assertTrue(np.allclose(expected_output[i].numpy(), transform.to_tensor(split[i]).numpy()))
+
+        img_data = jt.random((3, 4, 4))
+        expected_output = img_data.multiply(255).int().float().divide(255)
+        for mode in [None, 'RGB', 'HSV', 'YCbCr']:
+            verify_img_data(img_data, expected_output, mode=mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 4 or 1 or 2 channel images
+            transform.ToPILImage(mode='RGBA')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='LA')(img_data)
+
+        with self.assertRaises(ValueError):
+            transform.ToPILImage()(jt.random((1, 3, 4, 4)))
+
+    def test_3_channel_ndarray_to_pil_image(self):
+        def verify_img_data(img_data, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'RGB')  # default should assume RGB
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+            split = img.split()
+            for i in range(3):
+                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
+
+        img_data = np.random.randint(0, 255, (4, 4, 3)).astype(np.uint8)
+        for mode in [None, 'RGB', 'HSV', 'YCbCr']:
+            verify_img_data(img_data, mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 4 or 1 or 2 channel images
+            transform.ToPILImage(mode='RGBA')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='LA')(img_data)
+
+    def test_4_channel_tensor_to_pil_image(self):
+        def verify_img_data(img_data, expected_output, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'RGBA')  # default should assume RGBA
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+
+            split = img.split()
+            for i in range(4):
+                self.assertTrue(np.allclose(expected_output[i].numpy(), transform.to_tensor(split[i]).numpy()))
+
+        img_data = jt.random((4, 4, 4))
+        expected_output = img_data.multiply(255).int().float().divide(255)
+        for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
+            verify_img_data(img_data, expected_output, mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 3 or 1 or 2 channel images
+            transform.ToPILImage(mode='RGB')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='LA')(img_data)
+
+    def test_4_channel_ndarray_to_pil_image(self):
+        def verify_img_data(img_data, mode):
+            if mode is None:
+                img = transform.ToPILImage()(img_data)
+                self.assertEqual(img.mode, 'RGBA')  # default should assume RGBA
+            else:
+                img = transform.ToPILImage(mode=mode)(img_data)
+                self.assertEqual(img.mode, mode)
+            split = img.split()
+            for i in range(4):
+                self.assertTrue(np.allclose(img_data[:, :, i], split[i]))
+
+        img_data = np.random.randint(0, 255, (4, 4, 4)).astype(np.uint8)
+        for mode in [None, 'RGBA', 'CMYK', 'RGBX']:
+            verify_img_data(img_data, mode)
+
+        with self.assertRaises(ValueError):
+            # should raise if we try a mode for 3 or 1 or 2 channel images
+            transform.ToPILImage(mode='RGB')(img_data)
+            transform.ToPILImage(mode='P')(img_data)
+            transform.ToPILImage(mode='LA')(img_data)
+
+    def test_2d_tensor_to_pil_image(self):
+        to_tensor = transform.ToTensor()
+
+        img_data_float = jt.array(np.random.rand(4, 4), dtype='float32')
+        img_data_byte = jt.array(np.random.randint(0, 255, (4, 4)), dtype='uint8')
+        img_data_short = jt.array(np.random.randint(0, 32767, (4, 4)), dtype='int16')
+        img_data_int = jt.array(np.random.randint(0, 2147483647, (4, 4)), dtype='int32')
+
+        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
+        expected_outputs = [img_data_float.multiply(255).int().float().divide(255).numpy(),
+                            img_data_byte.float().divide(255.0).numpy(),
+                            img_data_short.numpy(),
+                            img_data_int.numpy()]
+        expected_modes = ['L', 'L', 'I;16', 'I']
+
+        for img_data, expected_output, mode in zip(inputs, expected_outputs, expected_modes):
+            for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]:
+                img = t(img_data)
+                self.assertEqual(img.mode, mode)
+                self.assertTrue(np.allclose(expected_output, to_tensor(img).numpy()))
+
+    def test_2d_ndarray_to_pil_image(self):
+        img_data_float = np.random.rand(4, 4).astype(np.float32)
+        img_data_byte = np.random.randint(0, 255, (4, 4)).astype(np.uint8)
+        img_data_short = np.random.randint(0, 32767, (4, 4)).astype(np.int16)
+        img_data_int = np.random.randint(0, 2147483647, (4, 4)).astype(np.int32)
+
+        inputs = [img_data_float, img_data_byte, img_data_short, img_data_int]
+        expected_modes = ['F', 'L', 'I;16', 'I']
+        for img_data, mode in zip(inputs, expected_modes):
+            for t in [transform.ToPILImage(), transform.ToPILImage(mode=mode)]:
+                img = t(img_data)
+                self.assertEqual(img.mode, mode)
+                self.assertTrue(np.allclose(img_data, img))
+
+    def test_tensor_bad_types_to_pil_image(self):
+        with self.assertRaises(ValueError):
+            transform.ToPILImage()(jt.ones((1, 3, 4, 4)))
+
+    def test_ndarray_bad_types_to_pil_image(self):
+        trans = transform.ToPILImage()
+        with self.assertRaises(TypeError):
+            trans(np.ones([4, 4, 1], np.int64))
+            trans(np.ones([4, 4, 1], np.uint16))
+            trans(np.ones([4, 4, 1], np.uint32))
+            trans(np.ones([4, 4, 1], np.float64))
+
+        with self.assertRaises(ValueError):
+            transform.ToPILImage()(np.ones([1, 4, 4, 3]))
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_vertical_flip(self):
+        random_state = random.getstate()
+        random.seed(42)
+        img = transform.ToPILImage()(jt.random((3, 10, 10)))
+        vimg = img.transpose(Image.FLIP_TOP_BOTTOM)
+
+        num_samples = 250
+        num_vertical = 0
+        for _ in range(num_samples):
+            out = transform.RandomVerticalFlip()(img)
+            if out == vimg:
+                num_vertical += 1
+
+        p_value = stats.binom_test(num_vertical, num_samples, p=0.5)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+        num_samples = 250
+        num_vertical = 0
+        for _ in range(num_samples):
+            out = transform.RandomVerticalFlip(p=0.7)(img)
+            if out == vimg:
+                num_vertical += 1
+
+        p_value = stats.binom_test(num_vertical, num_samples, p=0.7)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_horizontal_flip(self):
+        random_state = random.getstate()
+        random.seed(42)
+        img = transform.ToPILImage()(jt.random((3, 10, 10)))
+        himg = img.transpose(Image.FLIP_LEFT_RIGHT)
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transform.RandomHorizontalFlip()(img)
+            if out == himg:
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.5)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+        num_samples = 250
+        num_horizontal = 0
+        for _ in range(num_samples):
+            out = transform.RandomHorizontalFlip(p=0.7)(img)
+            if out == himg:
+                num_horizontal += 1
+
+        p_value = stats.binom_test(num_horizontal, num_samples, p=0.7)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+    @unittest.skipIf(stats is None, 'scipy.stats is not available')
+    def test_normalize(self):
+        def samples_from_standard_normal(tensor):
+            p_value = stats.kstest(list(tensor.reshape(-1).data), 'norm', args=(0, 1)).pvalue
+            return p_value > 0.0001
+
+        random_state = random.getstate()
+        random.seed(42)
+        for channels in [1, 3]:
+            img = jt.random((channels, 10, 10))
+            mean = [img[c].mean().item() for c in range(channels)]
+            std = [img[c].std().item() for c in range(channels)]
+            normalized = transform.ImageNormalize(mean, std)(img)
+            self.assertTrue(samples_from_standard_normal(normalized))
+        random.setstate(random_state)
+
+    def test_normalize_different_dtype(self):
+        for dtype1 in ['float32', 'float64']:
+            img = jt.random((3, 10, 10), dtype=dtype1)
+            for dtype2 in ['int64', 'float32', 'float64']:
+                mean = jt.array([1, 2, 3], dtype=dtype2)
+                std = jt.array([1, 2, 1], dtype=dtype2)
+                # checks that it doesn't crash
+                transform.image_normalize(img, mean, std)
+
+    def test_normalize_3d_tensor(self):
+        jt.seed(28)
+        n_channels = 3
+        img_size = 10
+        mean = jt.random((n_channels,))
+        std = jt.random((n_channels,))
+        img = jt.random((n_channels, img_size, img_size))
+        target = transform.image_normalize(img, mean, std)
+
+        mean_unsqueezed = mean.reshape(-1, 1, 1)
+        std_unsqueezed = std.reshape(-1, 1, 1)
+        result1 = transform.image_normalize(img, mean_unsqueezed, std_unsqueezed)
+        result2 = transform.image_normalize(img,
+            mean_unsqueezed.repeat(1, img_size, img_size),
+            std_unsqueezed.repeat(1, img_size, img_size))
+        assert_array_almost_equal(target.data, result1.data)
+        assert_array_almost_equal(target.data, result2.data)
+
+    def test_adjust_brightness(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+
+        # test 0
+        y_pil = transform.adjust_brightness(x_pil, 1)
+        y_np = np.array(y_pil)
+        self.assertTrue(np.allclose(y_np, x_np))
+
+        # test 1
+        y_pil = transform.adjust_brightness(x_pil, 0.5)
+        y_np = np.array(y_pil)
+        y_ans = [0, 2, 6, 27, 67, 113, 18, 4, 117, 45, 127, 0]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 2
+        y_pil = transform.adjust_brightness(x_pil, 2)
+        y_np = np.array(y_pil)
+        y_ans = [0, 10, 26, 108, 255, 255, 74, 16, 255, 180, 255, 2]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+    def test_adjust_contrast(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+
+        # test 0
+        y_pil = transform.adjust_contrast(x_pil, 1)
+        y_np = np.array(y_pil)
+        self.assertTrue(np.allclose(y_np, x_np))
+
+        # test 1
+        y_pil = transform.adjust_contrast(x_pil, 0.5)
+        y_np = np.array(y_pil)
+        y_ans = [43, 45, 49, 70, 110, 156, 61, 47, 160, 88, 170, 43]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 2
+        y_pil = transform.adjust_contrast(x_pil, 2)
+        y_np = np.array(y_pil)
+        y_ans = [0, 0, 0, 22, 184, 255, 0, 0, 255, 94, 255, 0]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+    # @unittest.skipIf(Image.__version__ >= '7', "Temporarily disabled")
+    def test_adjust_saturation(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+
+        # test 0
+        y_pil = transform.adjust_saturation(x_pil, 1)
+        y_np = np.array(y_pil)
+        self.assertTrue(np.allclose(y_np, x_np))
+
+        # test 1
+        y_pil = transform.adjust_saturation(x_pil, 0.5)
+        y_np = np.array(y_pil)
+        y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 216, 89]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 2
+        y_pil = transform.adjust_saturation(x_pil, 2)
+        y_np = np.array(y_pil)
+        y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 3, 255, 0]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+    def test_adjust_hue(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+
+        with self.assertRaises(ValueError):
+            transform.adjust_hue(x_pil, -0.7)
+            transform.adjust_hue(x_pil, 1)
+
+        # test 0: almost same as x_data but not exact.
+        # probably because hsv <-> rgb floating point ops
+        y_pil = transform.adjust_hue(x_pil, 0)
+        y_np = np.array(y_pil)
+        y_ans = [0, 5, 13, 54, 139, 226, 35, 8, 234, 91, 255, 1]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 1
+        y_pil = transform.adjust_hue(x_pil, 0.25)
+        y_np = np.array(y_pil)
+        y_ans = [13, 0, 12, 224, 54, 226, 234, 8, 99, 1, 222, 255]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 2
+        y_pil = transform.adjust_hue(x_pil, -0.25)
+        y_np = np.array(y_pil)
+        y_ans = [0, 13, 2, 54, 226, 58, 8, 234, 152, 255, 43, 1]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+    def test_adjust_gamma(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+
+        # test 0
+        y_pil = transform.adjust_gamma(x_pil, 1)
+        y_np = np.array(y_pil)
+        self.assertTrue(np.allclose(y_np, x_np))
+
+        # test 1
+        y_pil = transform.adjust_gamma(x_pil, 0.5)
+        y_np = np.array(y_pil)
+        y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+        # test 2
+        y_pil = transform.adjust_gamma(x_pil, 2)
+        y_np = np.array(y_pil)
+        y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0]
+        y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
+        self.assertTrue(np.allclose(y_np, y_ans))
+
+    def test_adjusts_L_mode(self):
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_rgb = Image.fromarray(x_np, mode='RGB')
+
+        x_l = x_rgb.convert('L')
+        self.assertEqual(transform.adjust_brightness(x_l, 2).mode, 'L')
+        self.assertEqual(transform.adjust_saturation(x_l, 2).mode, 'L')
+        self.assertEqual(transform.adjust_contrast(x_l, 2).mode, 'L')
+        self.assertEqual(transform.adjust_hue(x_l, 0.4).mode, 'L')
+        self.assertEqual(transform.adjust_gamma(x_l, 0.5).mode, 'L')
+
+    def test_color_jitter(self):
+        color_jitter = transform.ColorJitter(2, 2, 2, 0.1)
+
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+        x_pil_2 = x_pil.convert('L')
+
+        for i in range(10):
+            y_pil = color_jitter(x_pil)
+            self.assertEqual(y_pil.mode, x_pil.mode)
+
+            y_pil_2 = color_jitter(x_pil_2)
+            self.assertEqual(y_pil_2.mode, x_pil_2.mode)
+
+    def test_gray(self):
+        """Unit tests for grayscale transform"""
+
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+        x_pil_2 = x_pil.convert('L')
+        gray_np = np.array(x_pil_2)
+
+        # Test Set: Gray an image with desired number of output channels
+        # Case 1: RGB -> 1 channel grayscale
+        trans1 = transform.Gray(num_output_channels=1)
+        gray_pil_1 = trans1(x_pil)
+        gray_np_1 = np.array(gray_pil_1)
+        self.assertEqual(gray_pil_1.mode, 'L', 'mode should be L')
+        self.assertEqual(gray_np_1.shape, tuple(x_shape[0:2]), 'should be 1 channel')
+        np.testing.assert_equal(gray_np, gray_np_1)
+
+        # Case 2: RGB -> 3 channel grayscale
+        trans2 = transform.Gray(num_output_channels=3)
+        gray_pil_2 = trans2(x_pil)
+        gray_np_2 = np.array(gray_pil_2)
+        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
+        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
+        np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
+        np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
+        np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])
+
+        # Case 3: 1 channel grayscale -> 1 channel grayscale
+        trans3 = transform.Gray(num_output_channels=1)
+        gray_pil_3 = trans3(x_pil_2)
+        gray_np_3 = np.array(gray_pil_3)
+        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
+        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
+        np.testing.assert_equal(gray_np, gray_np_3)
+
+        # Case 4: 1 channel grayscale -> 3 channel grayscale
+        trans4 = transform.Gray(num_output_channels=3)
+        gray_pil_4 = trans4(x_pil_2)
+        gray_np_4 = np.array(gray_pil_4)
+        self.assertEqual(gray_pil_4.mode, 'RGB', 'mode should be RGB')
+        self.assertEqual(gray_np_4.shape, tuple(x_shape), 'should be 3 channel')
+        np.testing.assert_equal(gray_np_4[:, :, 0], gray_np_4[:, :, 1])
+        np.testing.assert_equal(gray_np_4[:, :, 1], gray_np_4[:, :, 2])
+        np.testing.assert_equal(gray_np, gray_np_4[:, :, 0])
+
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_gray(self):
+        """Unit tests for random grayscale transform"""
+
+        # Test Set 1: RGB -> 3 channel grayscale
+        random_state = random.getstate()
+        random.seed(42)
+        x_shape = [2, 2, 3]
+        x_np = np.random.randint(0, 256, x_shape, np.uint8)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+        x_pil_2 = x_pil.convert('L')
+        gray_np = np.array(x_pil_2)
+
+        num_samples = 250
+        num_gray = 0
+        for _ in range(num_samples):
+            gray_pil_2 = transform.RandomGray(p=0.5)(x_pil)
+            gray_np_2 = np.array(gray_pil_2)
+            if np.array_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1]) and \
+                    np.array_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2]) and \
+                    np.array_equal(gray_np, gray_np_2[:, :, 0]):
+                num_gray = num_gray + 1
+
+        p_value = stats.binom_test(num_gray, num_samples, p=0.5)
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+        # Test Set 2: grayscale -> 1 channel grayscale
+        random_state = random.getstate()
+        random.seed(42)
+        x_shape = [2, 2, 3]
+        x_np = np.random.randint(0, 256, x_shape, np.uint8)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+        x_pil_2 = x_pil.convert('L')
+        gray_np = np.array(x_pil_2)
+
+        num_samples = 250
+        num_gray = 0
+        for _ in range(num_samples):
+            gray_pil_3 = transform.RandomGray(p=0.5)(x_pil_2)
+            gray_np_3 = np.array(gray_pil_3)
+            if np.array_equal(gray_np, gray_np_3):
+                num_gray = num_gray + 1
+
+        p_value = stats.binom_test(num_gray, num_samples, p=1.0)  # Note: grayscale is always unchanged
+        random.setstate(random_state)
+        self.assertGreater(p_value, 0.0001)
+
+        # Test set 3: Explicit tests
+        x_shape = [2, 2, 3]
+        x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
+        x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
+        x_pil = Image.fromarray(x_np, mode='RGB')
+        x_pil_2 = x_pil.convert('L')
+        gray_np = np.array(x_pil_2)
+
+        # Case 3a: RGB -> 3 channel grayscale (grayscaled)
+        trans2 = transform.RandomGray(p=1.0)
+        gray_pil_2 = trans2(x_pil)
+        gray_np_2 = np.array(gray_pil_2)
+        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
+        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
+        np.testing.assert_equal(gray_np_2[:, :, 0], gray_np_2[:, :, 1])
+        np.testing.assert_equal(gray_np_2[:, :, 1], gray_np_2[:, :, 2])
+        np.testing.assert_equal(gray_np, gray_np_2[:, :, 0])
+
+        # Case 3b: RGB -> 3 channel grayscale (unchanged)
+        trans2 = transform.RandomGray(p=0.0)
+        gray_pil_2 = trans2(x_pil)
+        gray_np_2 = np.array(gray_pil_2)
+        self.assertEqual(gray_pil_2.mode, 'RGB', 'mode should be RGB')
+        self.assertEqual(gray_np_2.shape, tuple(x_shape), 'should be 3 channel')
+        np.testing.assert_equal(x_np, gray_np_2)
+
+        # Case 3c: 1 channel grayscale -> 1 channel grayscale (grayscaled)
+        trans3 = transform.RandomGray(p=1.0)
+        gray_pil_3 = trans3(x_pil_2)
+        gray_np_3 = np.array(gray_pil_3)
+        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
+        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
+        np.testing.assert_equal(gray_np, gray_np_3)
+
+        # Case 3d: 1 channel grayscale -> 1 channel grayscale (unchanged)
+        trans3 = transform.RandomGray(p=0.0)
+        gray_pil_3 = trans3(x_pil_2)
+        gray_np_3 = np.array(gray_pil_3)
+        self.assertEqual(gray_pil_3.mode, 'L', 'mode should be L')
+        self.assertEqual(gray_np_3.shape, tuple(x_shape[0:2]), 'should be 1 channel')
+        np.testing.assert_equal(gray_np, gray_np_3)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/jittor/transform/function_pil.py b/python/jittor/transform/function_pil.py
index 5f681806..aa62d3e7 100644
--- a/python/jittor/transform/function_pil.py
+++ b/python/jittor/transform/function_pil.py
@@ -19,13 +19,13 @@ def _is_pil_image(img):
 def _get_image_size(img):
     if _is_pil_image(img):
         return img.size
-    raise TypeError(f"Unexpected type {img}")
+    raise TypeError(f"Unexpected type {type(img)}")
 
 
 def _get_image_num_channels(img):
     if _is_pil_image(img):
         return 1 if img.mode == 'L' else 3
-    raise TypeError(f"Unexpected type {img}")
+    raise TypeError(f"Unexpected type {type(img)}")
 
 
 def hflip(img):
@@ -319,7 +319,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
         return img.resize(size[::-1], interpolation)
 
 
-def to_grayscale(img, num_output_channels):
+def gray(img, num_output_channels):
     """
     Function for converting PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
 
diff --git a/python/jittor/transform/transform.py b/python/jittor/transform/transform.py
index 4981755e..bd538093 100644
--- a/python/jittor/transform/transform.py
+++ b/python/jittor/transform/transform.py
@@ -13,16 +13,15 @@ import math
 from PIL import Image
 import numpy as np
 import jittor as jt
-import warnings
-from collections.abc import Sequence, Mapping
+from collections.abc import Sequence
 
 from . import function_pil as F_pil
 
 __all__ = ["hflip", "vflip", "adjust_brightness", "adjust_contrast", "adjust_saturation", "adjust_hue", "adjust_gamma",
-           "crop", "resize", "to_grayscale", "center_crop", "crop_and_resize", "to_tensor", "image_normalize",
+           "crop", "resize", "gray", "center_crop", "crop_and_resize", "to_tensor", "to_pil_image", "image_normalize",
            "Crop", "RandomCropAndResize", "RandomHorizontalFlip", "CenterCrop", "ImageNormalize", "Compose",
            "Resize", "Gray", "RandomGray", "RandomCrop", "ToTensor", "Lambda", "RandomApply", "RandomOrder",
-           "RandomChoice", "RandomVerticalFlip", "ColorJitter"]
+           "RandomChoice", "RandomVerticalFlip", "ColorJitter", "ToPILImage"]
 
 
 def _get_image_size(img):
@@ -34,6 +33,13 @@ def _get_image_size(img):
 def _get_image_num_channels(img):
     return F_pil._get_image_num_channels(img)
 
+def _is_numpy(img):
+    return isinstance(img, np.ndarray)
+
+def _is_numpy_image(img):
+    return img.ndim in {2, 3}
+
+
 def hflip(img):
     """
     Function for horizontally flipping the given image.
@@ -250,7 +256,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
     return F_pil.resize(img, size, interpolation)
 
 
-def to_grayscale(img, num_output_channels):
+def gray(img, num_output_channels):
     """
     Function for converting PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
 
@@ -265,7 +271,7 @@ def to_grayscale(img, num_output_channels):
               if num_output_channels = 1 : returned image is single channel
               if num_output_channels = 3 : returned image is 3 channel with r = g = b
     """
-    return F_pil.to_grayscale(img, num_output_channels)
+    return F_pil.gray(img, num_output_channels)
 
 
 def center_crop(img, output_size):
@@ -317,59 +323,96 @@ def crop_and_resize(img, top, left, height, width, size, interpolation=Image.BIL
     return img
 
 
-def to_tensor(img):
+def to_tensor(pic):
     """
-    Function for turning Image.Image to jt.array.
+    Function for turning Image.Image or np.ndarray to jt.Var.
 
     Args::
 
-        [in] img(PIL Image.Image): Input image.
+        [in] img(PIL Image.Image or np.ndarray): Input image.
     
     Example::
         
         img = Image.open(...)
         img_ = transform.to_tensor(img)
     """
-    # todo: handle image with various modes
-    if isinstance(img, Image.Image):
-        return np.array(img).transpose((2, 0, 1)) / np.float32(255)
-    return img
+    if not(F_pil._is_pil_image(pic) or _is_numpy(pic)):
+        raise TypeError(f'img should be PIL Image or ndarray. Got {type(pic)}.')
 
+    if _is_numpy(pic) and not _is_numpy_image(pic):
+        raise ValueError(f'img should be 2/3 dimensional. Got {pic.ndim} dimensions.')
 
-def to_pil_image(pic, mode=None):
-    """Convert a jt.array or an np.ndarray to PIL Image.
+    if _is_numpy(pic):
+        # handle numpy array
+        if pic.ndim == 2:
+            pic = pic[:, :, None]
+
+        img = jt.array(pic.transpose((2, 0, 1)))
+        # backward compatibility
+        if img.dtype == 'uint8':
+            return img.float().divide(255)
+        else:
+            return img
+
+    # handle PIL Image
+    if pic.mode == 'I':
+        img = jt.array(np.array(pic, np.int32, copy=False))
+    elif pic.mode == 'I;16':
+        img = jt.array(np.array(pic, np.int16, copy=False))
+    elif pic.mode == 'F':
+        img = jt.array(np.array(pic, np.float32, copy=False))
+    elif pic.mode == '1':
+        img = jt.array(np.array(pic, np.uint8, copy=False) * 255, dtype='uint8')
+    else:
+        img = jt.array(np.array(pic, np.uint8, copy=False))
+
+    # put it from HWC to CHW format
+    img = img.reshape(pic.size[1], pic.size[0], len(pic.getbands()))
+    img = img.permute((2, 0, 1))
+    if img.dtype == 'uint8':
+        return img.float().divide(255)
+    else:
+        return img
+
+def to_pil_image(img, mode=None):
+    """Convert a jt.Var or an np.ndarray to PIL Image.
 
     Args::
 
-        [in] pic (jt.array or numpy.ndarray): Image to be converted to PIL Image.
+        [in] img (jt.Var or numpy.ndarray): Image to be converted to PIL Image.
         [in] mode (`PIL.Image mode`): color space and pixel depth of input data (optional).
 
     Returns::
 
         [out] PIL Image: Image converted to PIL Image.
     """
-    if not(isinstance(pic, jt.array) or isinstance(pic, np.ndarray)):
-        raise TypeError(f'pic should be Tensor or ndarray. Got {type(pic)}.')
+    if not(isinstance(img, jt.Var) or isinstance(img, np.ndarray)):
+        raise TypeError(f'img should be jt.Var or ndarray. Got {type(img)}.')
 
-    elif isinstance(pic, jt.array):
-        if pic.ndim not in {2, 3}:
-            raise ValueError(f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.')
+    elif isinstance(img, jt.Var):
+        if img.ndim not in {2, 3}:
+            raise ValueError(f'img should be 2/3 dimensional. Got {img.ndim} dimensions.')
 
-        elif pic.ndim == 2:
-            # if 2D image, convert to np.ndarray and add channel dimension (CHW)
-            pic = np.expand_dims(pic.data, 2)
+        elif img.ndim == 2:
+            # if 2D image, add channel dimension (CHW)
+            img = img.unsqueeze(0)
 
-    elif isinstance(pic, np.ndarray):
-        if pic.ndim not in {2, 3}:
-            raise ValueError(f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.')
+    elif isinstance(img, np.ndarray):
+        if img.ndim not in {2, 3}:
+            raise ValueError(f'img should be 2/3 dimensional. Got {img.ndim} dimensions.')
 
-        elif pic.ndim == 2:
+        elif img.ndim == 2:
             # if 2D image, add channel dimension (HWC)
-            pic = np.expand_dims(pic, 2)
+            img = np.expand_dims(img, 2)
+
+    npimg = img
+    if isinstance(img, jt.Var):
+        if img.dtype in ('float32', 'float64') and mode != 'F':
+            img = img.multiply(255).uint8()
+        npimg = np.transpose(img.data, (1, 2, 0))
 
-    npimg = pic
     if not isinstance(npimg, np.ndarray):
-        raise TypeError(f'Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}.')
+        raise TypeError(f'Input img must be a jt.Var or NumPy ndarray, not {type(npimg)}.')
 
     if npimg.shape[2] == 1:
         expected_mode = None
@@ -418,8 +461,6 @@ def image_normalize(img, mean, std):
     """
     Function for normalizing image.
 
-        Class for normalizing the input image.
-
     Args::
 
     [in] image(PIL Image.Image or np.ndarray): input image.
@@ -431,11 +472,30 @@ def image_normalize(img, mean, std):
         img = Image.open(...)
         img_ = transform.image_normalize(img, mean=[0.5], std=[0.5])
     """
-    if isinstance(img, Image.Image):
+    if not isinstance(img, (Image.Image, jt.Var, np.ndarray)):
+        raise TypeError(f'Input type should be in (PIL Image, jt.Var, np.ndarray). Got {type(img)}.')
+    elif isinstance(img, Image.Image):
+        assert img.mode == 'RGB', f"input image mode should be 'RGB'. Got {img.mode}."
         img = (np.array(img).transpose((2, 0, 1)) \
                - mean * np.float32(255.)) \
                / (std * np.float32(255.))
     else:
+        if img.ndim < 3:
+            raise ValueError(f'Expected input to be a array image of size (..., C, H, W). Got {img.shape}.')
+        if isinstance(img, jt.Var):
+            mean = jt.array(mean)
+            std = jt.array(std)
+            if (std.data == 0).any():
+                raise ValueError('std cannot be zero.')
+        else:
+            mean = np.asarray(mean)
+            std = np.asarray(std)
+            if (std == 0).any():
+                raise ValueError('std cannot be zero.')
+        if mean.ndim == 1:
+            mean = mean.reshape(-1, 1, 1)
+        if std.ndim == 1:
+            std = std.reshape(-1, 1, 1)
         img = (img - mean) / std
     return img
 
@@ -483,10 +543,9 @@ class RandomCropAndResize:
         self.ratio = ratio
         self.interpolation = interpolation
 
-    def __call__(self, img:Image.Image):
-        width, height = img.size
-        scale = self.scale
-        ratio = self.ratio
+    @staticmethod
+    def get_params(img: Image.Image, scale, ratio):
+        width, height = _get_image_size(img)
         area = height * width
 
         for _ in range(10):
@@ -500,21 +559,25 @@ class RandomCropAndResize:
             if 0 < w <= width and 0 < h <= height:
                 i = random.randint(0, height - h)
                 j = random.randint(0, width - w)
-                break
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = float(width) / float(height)
+        if in_ratio < min(ratio):
+            w = width
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = height
+            w = int(round(h * max(ratio)))
         else:
-            # Fallback to central crop
-            in_ratio = float(width) / float(height)
-            if in_ratio < min(ratio):
-                w = width
-                h = int(round(w / min(ratio)))
-            elif in_ratio > max(ratio):
-                h = height
-                w = int(round(h * max(ratio)))
-            else:
-                w = width
-                h = height
-            i = (height - h) // 2
-            j = (width - w) // 2
+            w = width
+            h = height
+        i = (height - h) // 2
+        j = (width - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img: Image.Image):
+        i, j, h, w = self.get_params(img, self.scale, self.ratio)
         return crop_and_resize(img, i, j, h, w, self.size, self.interpolation)
 
 
@@ -644,7 +707,7 @@ class Gray:
         self.num_output_channels = num_output_channels
 
     def __call__(self, img: Image.Image):
-        return to_grayscale(img, self.num_output_channels)
+        return gray(img, self.num_output_channels)
 
 
 class RandomGray:
@@ -672,7 +735,7 @@ class RandomGray:
     def __call__(self, img: Image.Image):
         num_output_channels = _get_image_num_channels(img)
         if random.random() < self.p:
-            return to_grayscale(img, num_output_channels=num_output_channels)
+            return gray(img, num_output_channels=num_output_channels)
         return img
 
 
@@ -702,7 +765,7 @@ class RandomCrop:
 
 class ToTensor:
     """
-    Convert PIL Image to jt.array.
+    Convert PIL Image to jt.Var.
     """
     def __call__(self, img: Image.Image):
         return to_tensor(img)
@@ -710,7 +773,7 @@ class ToTensor:
 
 class ToPILImage:
     """
-    Converts a jt.array of shape C x H x W or a numpy ndarray of shape
+    Converts a jt.Var of shape C x H x W or a numpy ndarray of shape
     H x W x C to a PIL Image while preserving the value range.
 
     Args::
@@ -726,16 +789,16 @@ class ToPILImage:
     def __init__(self, mode=None):
         self.mode = mode
 
-    def __call__(self, pic):
+    def __call__(self, img):
         """
         Args::
-            [in] pic (jt.array or numpy.ndarray): Image to be converted to PIL Image.
+            [in] img (jt.Var or numpy.ndarray): Image to be converted to PIL Image.
 
         Returns:
 
             [out] PIL Image: Image converted to PIL Image.
         """
-        return to_pil_image(pic, self.mode)
+        return to_pil_image(img, self.mode)
 
 
 class Lambda:

From 672df913852ccb57a16ef68a0ad5cea8b67bcb95 Mon Sep 17 00:00:00 2001
From: yaox12 <yaox12@outlook.com>
Date: Wed, 7 Oct 2020 16:22:33 +0800
Subject: [PATCH 05/16] declare the input format for to_tensor and to_pil_image

---
 python/jittor/transform/transform.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/jittor/transform/transform.py b/python/jittor/transform/transform.py
index bd538093..ecab1a5f 100644
--- a/python/jittor/transform/transform.py
+++ b/python/jittor/transform/transform.py
@@ -325,12 +325,16 @@ def crop_and_resize(img, top, left, height, width, size, interpolation=Image.BIL
 
 def to_tensor(pic):
     """
-    Function for turning Image.Image or np.ndarray to jt.Var.
+    Function for turning Image.Image or np.ndarray (HWC) to jt.Var (CHW).
 
     Args::
 
         [in] img(PIL Image.Image or np.ndarray): Input image.
-    
+             If input type is np.ndarray, the shape should be in HWC.
+
+    Return:
+        [out] jt.Var in shape CHW.
+
     Example::
         
         img = Image.open(...)
@@ -375,11 +379,11 @@ def to_tensor(pic):
         return img
 
 def to_pil_image(img, mode=None):
-    """Convert a jt.Var or an np.ndarray to PIL Image.
+    """Convert a jt.Var (CHW) or an np.ndarray (HWC) to PIL Image.
 
     Args::
 
-        [in] img (jt.Var or numpy.ndarray): Image to be converted to PIL Image.
+        [in] img (jt.Var (CHW) or numpy.ndarray (HWC)): Image to be converted to PIL Image.
         [in] mode (`PIL.Image mode`): color space and pixel depth of input data (optional).
 
     Returns::

From b026c3b702ae17352d72bbaf5d5902a2a7d05b22 Mon Sep 17 00:00:00 2001
From: jwzxgy2007 <823951506@qq.com>
Date: Sun, 13 Dec 2020 21:06:34 +0800
Subject: [PATCH 06/16] add conv3d

---
 python/jittor/nn.py | 155 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 153 insertions(+), 2 deletions(-)

diff --git a/python/jittor/nn.py b/python/jittor/nn.py
index 5ac11954..0672b004 100644
--- a/python/jittor/nn.py
+++ b/python/jittor/nn.py
@@ -18,7 +18,7 @@ import math
 from collections import OrderedDict
 from jittor.pool import Pool, pool, AdaptiveAvgPool2d
 from jittor.optim import *
-from jittor.misc import _pair
+from jittor.misc import _pair, _triple
 
 
 def matmul_transpose(a, b):
@@ -590,6 +590,92 @@ class Conv1d(Module):
         y = x.squeeze(-1)
         return y
 
+class Conv3d(Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size, kernel_size)
+        self.stride = stride if isinstance(stride, tuple) else (stride, stride, stride)
+        self.padding = padding if isinstance(padding, tuple) else (padding, padding, padding)
+        self.dilation = dilation if isinstance(dilation, tuple) else (dilation, dilation, dilation)
+        self.groups = groups
+        assert in_channels % groups == 0, 'in_channels must be divisible by groups'
+        assert out_channels % groups == 0, 'out_channels must be divisible by groups'
+        Kh, Kw, Kd = self.kernel_size
+        self.groups = groups
+        assert in_channels % groups == 0, 'in_channels must be divisible by groups'
+        assert out_channels % groups == 0, 'out_channels must be divisible by groups'
+
+        self.weight = init.invariant_uniform([out_channels, in_channels//groups, Kh, Kw, Kd], dtype="float")
+        if bias:
+            fan=1
+            for i in self.weight.shape[1:]:
+                fan *= i
+            bound = 1 / math.sqrt(fan)
+            self.bias = init.uniform([out_channels], dtype="float", low=-bound, high=bound)
+        else:
+            self.bias = None
+
+    def execute(self, x):
+        if self.groups == 1:
+            N,C,H,W,D = x.shape
+            Kh, Kw, Kd = self.kernel_size
+            assert C==self.in_channels
+            oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1
+            ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1
+            od = (D+self.padding[2]*2-Kd*self.dilation[2]+self.dilation[2]-1)//self.stride[2]+1
+            xx = x.reindex([N,self.out_channels,C,oh,ow,od,Kh,Kw,Kd], [
+                'i0', # Nid
+                'i2', # Cid
+                f'i3*{self.stride[0]}-{self.padding[0]}+i6*{self.dilation[0]}', # Hid+Khid
+                f'i4*{self.stride[1]}-{self.padding[1]}+i7*{self.dilation[1]}', # Wid+KWid
+                f'i5*{self.stride[2]}-{self.padding[2]}+i8*{self.dilation[2]}', # Did+KDid
+            ])
+            ww = self.weight.broadcast(xx.shape, [0,3,4,5])
+            yy = xx*ww
+            y = yy.sum([2,6,7,8]) # Kc, Kh, Kw, Kd
+            if self.bias is not None:
+                b = self.bias.broadcast(y.shape, [0,2,3,4])
+                y = y + b
+            return y
+        else:
+            N,C,H,W,D = x.shape
+            Kh, Kw, Kd = self.kernel_size
+            G = self.groups
+            CpG = C // G # channels per group
+            assert C==self.in_channels
+            oc = self.out_channels
+            oh = (H+self.padding[0]*2-Kh*self.dilation[0]+self.dilation[0]-1)//self.stride[0]+1
+            ow = (W+self.padding[1]*2-Kw*self.dilation[1]+self.dilation[1]-1)//self.stride[1]+1
+            od = (D+self.padding[2]*2-Kd*self.dilation[2]+self.dilation[2]-1)//self.stride[2]+1
+            xx = x.reindex([N,G,oc//G,CpG,oh,ow,od,Kh,Kw,Kd], [
+                'i0', # Nid
+                f'i1*{CpG}+i3', # Gid
+                f'i4*{self.stride[0]}-{self.padding[0]}+i7*{self.dilation[0]}', # Hid+Khid
+                f'i5*{self.stride[1]}-{self.padding[1]}+i8*{self.dilation[1]}', # Wid+KWid
+                f'i6*{self.stride[2]}-{self.padding[2]}+i9*{self.dilation[2]}', # Did+KDid
+            ])
+            # w: [oc, CpG, Kh, Kw, Kd]
+            ww = self.weight.reindex([N, G, oc//G, CpG, oh, ow, od, Kh, Kw, Kd], [
+                f'i1*{oc//G}+i2',
+                'i3',
+                'i7',
+                'i8',
+                'i9'
+            ])
+            ww.compile_options = xx.compile_options = {"G":G,"C":C}
+            yy = xx*ww
+            y = yy.reindex_reduce('add', [N, oc, oh, ow, od], [
+                'i0',
+                f'i1*{oc//G}+i2',
+                'i4',
+                'i5',
+                'i6'
+            ])
+            if self.bias is not None:
+                b = self.bias.broadcast(y.shape, [0,2,3,4])
+                y = y + b
+            return y 
 
 def conv2d(x, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
     padding = _pair(padding)
@@ -647,7 +733,72 @@ def conv2d(x, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
         if bias is not None:
             b = bias.broadcast(y.shape, [0,2,3])
             y = y + b
-        return y          
+        return y
+
+def conv3d(x, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    padding = _triple(padding)
+    stride = _triple(stride)
+    dilation = _triple(dilation)
+    out_channels = weight.shape[0]
+
+    if groups == 1:
+        N,C,H,W,D = x.shape
+        Kh, Kw, Kd = weight.shape[-3:]
+        oh = (H+padding[0]*2-Kh*dilation[0]+dilation[0]-1)//stride[0]+1
+        ow = (W+padding[1]*2-Kw*dilation[1]+dilation[1]-1)//stride[1]+1
+        od = (D+padding[2]*2-Kd*dilation[2]+dilation[2]-1)//stride[2]+1
+        xx = x.reindex([N,out_channels,C,oh,ow,od,Kh,Kw,Kd], [
+                'i0', # Nid
+                'i2', # Cid
+                f'i3*{stride[0]}-{padding[0]}+i6*{dilation[0]}', # Hid+Khid
+                f'i4*{stride[1]}-{padding[1]}+i7*{dilation[1]}', # Wid+KWid
+                f'i5*{stride[2]}-{padding[2]}+i8*{dilation[2]}', # Did+KDid
+            ])
+        ww = weight.broadcast(xx.shape, [0,3,4,5])
+        yy = xx*ww
+        y = yy.sum([2,6,7,8]) # Kc, Kh, Kw,Kd
+        if bias is not None:
+            b = bias.broadcast(y.shape, [0,2,3,4])
+            y = y + b
+        return y
+    else:
+        N,C,H,W,D = x.shape
+        Kh, Kw, Kd = weight.shape[-3:]
+        G = groups
+        CpG = C // G # channels per group
+        oc = out_channels
+        oh = (H+padding[0]*2-Kh*dilation[0]+dilation[0]-1)//stride[0]+1
+        ow = (W+padding[1]*2-Kw*dilation[1]+dilation[1]-1)//stride[1]+1
+        od = (D+padding[2]*2-Kd*dilation[2]+dilation[2]-1)//stride[2]+1
+        xx = x.reindex([N,G,oc//G,CpG,oh,ow,od,Kh,Kw,Kd], [
+                'i0', # Nid
+                f'i1*{CpG}+i3', # Gid
+                f'i4*{stride[0]}-{padding[0]}+i7*{dilation[0]}', # Hid+Khid
+                f'i5*{stride[1]}-{padding[1]}+i8*{dilation[1]}', # Wid+KWid
+                f'i6*{stride[2]}-{padding[2]}+i9*{dilation[2]}', # Did+KDid
+            ])
+        xx.compile_options = {"G":G}
+        # w: [oc, CpG, Kh, Kw, Kd]
+        ww = weight.reindex([N, G, oc//G, CpG, oh, ow, od, Kh, Kw, Kd], [
+                f'i1*{oc//G}+i2',
+                'i3',
+                'i7',
+                'i8',
+                'i9'
+            ])
+        yy = xx*ww
+        y = yy.reindex_reduce('add', [N, oc, oh, ow, od], [
+                'i0',
+                f'i1*{oc//G}+i2',
+                'i4',
+                'i5',
+                'i6'
+            ])
+
+        if bias is not None:
+            b = bias.broadcast(y.shape, [0,2,3,4])
+            y = y + b
+        return y
 
 class ConvTranspose(Module):
     def __init__(self, in_channels, out_channels, kernel_size, stride=1, \

From 81d8c528db3727f581e4248a67e909e6d4915327 Mon Sep 17 00:00:00 2001
From: jwzxgy2007 <823951506@qq.com>
Date: Wed, 16 Dec 2020 22:32:36 +0800
Subject: [PATCH 07/16] add pool3d

---
 python/jittor/pool.py | 206 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)

diff --git a/python/jittor/pool.py b/python/jittor/pool.py
index 54a6410a..1456e101 100644
--- a/python/jittor/pool.py
+++ b/python/jittor/pool.py
@@ -157,6 +157,212 @@ class Pool(Module):
             return xx.reduce(self.op, [4,5])
 
 
+class Pool3d(Module):
+    def __init__(self, kernel_size, stride=None, padding=0, dilation=None, return_indices=None, ceil_mode=False, count_include_pad=True, op="maximum"):
+        assert dilation == None
+        assert return_indices == None
+        self.kernel_size = kernel_size
+        self.op = op
+        self.stride = stride if stride else kernel_size
+        self.padding = padding
+        self.ceil_mode = ceil_mode
+        self.count_include_pad = count_include_pad and padding != 0
+
+    def execute(self, x):
+        N,C,H,W,D = x.shape
+        if self.ceil_mode == False:
+            h = (H+self.padding*2-self.kernel_size)//self.stride+1
+            w = (W+self.padding*2-self.kernel_size)//self.stride+1
+            d = (D+self.padding*2-self.kernel_size)//self.stride+1
+        else:
+            h = (H+self.padding*2-self.kernel_size + self.stride - 1)//self.stride+1
+            w = (W+self.padding*2-self.kernel_size + self.stride - 1)//self.stride+1
+            d = (D+self.padding*2-self.kernel_size + self.stride - 1)//self.stride+1
+
+        if self.op in ['maximum', 'minimum', 'mean']:
+            if self.op == 'mean':
+                if self.count_include_pad:
+                    count = f"int count = {self.kernel_size*self.kernel_size*self.kernel_size};"
+                else:
+                    count = "int count = (k2_ - k2) * (k3_ - k3)* (k4_ - k4);"
+                count += "float32 rcount = 1.0f / count;"
+            else:
+                count = ""
+            forward_body = f'''{{
+                int k4 = i4*{self.stride}-{self.padding};
+                int k3 = i3*{self.stride}-{self.padding};
+                int k2 = i2*{self.stride}-{self.padding};
+                int k4_ = min(k4 + {self.kernel_size}, in0_shape4);
+                int k3_ = min(k3 + {self.kernel_size}, in0_shape3);
+                int k2_ = min(k2 + {self.kernel_size}, in0_shape2);
+                k4 = max(0, k4);
+                k3 = max(0, k3);
+                k2 = max(0, k2);
+                @out(i0, i1, i2, i3, i4) = init_{self.op}(out_type);
+                {count}
+                for (int p = k2; p < k2_; ++p)
+                    for (int q = k3; q < k3_; ++q)
+                        for (int r = k4; r < k4_; ++r)
+                            @out(i0, i1, i2, i3, i4) = {self.op}(out_type, @out(i0, i1, i2, i3, i4), @in0(i0, i1, p, q, r));
+            }}'''
+            backward_body = f'''{{
+                int k4 = i4*{self.stride}-{self.padding};
+                int k3 = i3*{self.stride}-{self.padding};
+                int k2 = i2*{self.stride}-{self.padding};
+                int k4_ = min(k4 + {self.kernel_size}, in0_shape4);
+                int k3_ = min(k3 + {self.kernel_size}, in0_shape3);
+                int k2_ = min(k2 + {self.kernel_size}, in0_shape2);
+                k4 = max(0, k4);
+                k3 = max(0, k3);
+                k2 = max(0, k2);
+                {count}
+                int bo=1;
+                for (int p = k2; p < k2_ && bo; ++p)
+                    for (int q = k3; q < k3_ && bo; ++q) 
+                        for (int r = k4; r < k4_ && bo; ++r) {{
+                        {"atomicAdd(&@out(i0,i1,p,q,r), @dout(i0,i1,i2,i3,i4)/count);"
+                            if self.op == "mean" else
+                        f"""if (@pout(i0,i1,i2,i3,i4) == @in0(i0,i1,p,q,r)) {{
+                            atomicAdd(&@out(i0,i1,p,q,r), @dout(i0,i1,i2,i3,i4)),
+                            bo=0;
+                        }}"""}
+                    }}
+            }}'''
+            out = jt.code([N,C,h,w,d], x.dtype, [x],
+                cuda_header="""
+                    #include <ops/binary_op_defs.h>
+                    #include <misc/cuda_limits.h>
+                """,
+                cuda_src=f'''
+                    __global__ static void kernel1(@ARGS_DEF) {{
+                        @PRECALC
+                        int res_x = (in0_shape4 - 1) / blockDim.x + 1;
+                        int res_y = (in0_shape3 - 1) / blockDim.y + 1;
+                        int res_z = (in0_shape2 - 1) / blockDim.z + 1;
+
+                        int idx4 = blockIdx.x / (res_y * res_z);
+                        int idx3 = (blockIdx.x - idx4 * res_y * res_z) / res_z;
+                        int idx2 = blockIdx.x - idx4 * res_y * res_z - idx3 * res_y;
+
+
+                        int p4 = threadIdx.x + idx4 * blockDim.x;
+                        int s4 = blockDim.x * res_x;
+
+                        int p3 = threadIdx.y + idx3 * blockDim.y;
+                        int p3 = blockDim.y * res_y;
+
+                        int p2 = threadIdx.z + idx2 * blockDim.z;
+                        int p2 = blockDim.z * res_z;
+
+                        int i1 = blockIdx.y;
+                        int i0 = blockIdx.z;
+                        for (int i4 = p4; i4 < out_shape4; i4 += s4)
+                        for (int i3 = p3; i3 < out_shape3; i3 += s3)
+                        for (int i2 = p2; i2 < out_shape2; i2 += s2)
+                            {forward_body}
+                    }}
+
+                    int tx = min(1024, out_shape4);
+                    int ty = min(1024 / tx, out_shape3);
+                    int tz = min(1024 / tx / ty, out_shape2);
+
+
+                    int res_x = (out_shape4 - 1) / tx + 1;
+                    int res_y = (out_shape3 - 1) / ty + 1;
+                    int res_z = (out_shape2 - 1) / tz + 1;
+
+                    
+
+                    int bx = res_x * res_y * res_z;
+                    int by = out_shape1;
+                    int bz = out_shape0;
+
+                    dim3 s1(bx, by, bz);
+                    dim3 s2(tx, ty, tz);
+                    kernel1<<<s1, s2>>>(@ARGS);
+                ''',
+                cuda_grad_src=[f'''
+                    __global__ static void kernel3(@ARGS_DEF) {{
+                        @PRECALC
+
+
+                        int res_x = (in0_shape4 - 1) / blockDim.x + 1;
+                        int res_y = (in0_shape3 - 1) / blockDim.y + 1;
+                        int res_z = (in0_shape2 - 1) / blockDim.z + 1;
+
+                        int idx4 = blockIdx.x / (res_y * res_z);
+                        int idx3 = (blockIdx.x - idx4 * res_y * res_z) / res_z;
+                        int idx2 = blockIdx.x - idx4 * res_y * res_z - idx3 * res_y;
+
+
+                        int p4 = threadIdx.x + idx4 * blockDim.x;
+                        int s4 = blockDim.x * res_x;
+
+                        int p3 = threadIdx.y + idx3 * blockDim.y;
+                        int p3 = blockDim.y * res_y;
+
+                        int p2 = threadIdx.z + idx2 * blockDim.z;
+                        int p2 = blockDim.z * res_z;
+
+                        int i1 = blockIdx.y;
+                        int i0 = blockIdx.z;
+                        for (int i4 = p4; i4 < pout_shape4; i4 += s4)
+                            for (int i3 = p3; i3 < pout_shape3; i3 += s3)
+                                for (int i2 = p2; i2 < pout_shape2; i2 += s2)
+                                    {backward_body}
+                    }}
+                    cudaMemsetAsync(out_p, 0, out->size);
+
+                    int tx = min(1024, pout_shape4);
+                    int ty = min(1024 / tx, pout_shape3);
+                    int tz = min(1024 / tx / ty, pout_shape2);
+
+                    int res_x = (pout_shape4 - 1) / tx + 1;
+                    int res_y = (pout_shape3 - 1) / ty + 1;
+                    int res_z = (pout_shape2 - 1) / tz + 1;
+
+                    int bx = res_x * res_y * res_z;
+
+                    int by = pout_shape1;
+
+                    int bz = pout_shape0;
+                    dim3 s1_(bx, by, bz);
+                    dim3 s2_(tx, ty, tz);
+                    kernel3<<<s1_, s2_>>>(@ARGS);
+                '''],
+                cpu_header='#include <ops/binary_op_defs.h>',
+                cpu_src=f'''
+                    using namespace std;
+                    for (int i0=0; i0<out_shape0; i0++)
+                    for (int i1=0; i1<out_shape1; i1++)
+                    for (int i2=0; i2<out_shape2; i2++)
+                    for (int i3=0; i3<out_shape3; i3++)
+                    for (int i4=0; i4<out_shape4; i4++)
+                        {forward_body}
+                ''',
+                cpu_grad_src = [f'''
+                    using namespace std;
+                    std::memset(out_p, 0, out->size);
+                    #define atomicAdd(a,b) (*a) += b
+                    for (int i0=0; i0<pout_shape0; i0++)
+                    for (int i1=0; i1<pout_shape1; i1++)
+                    for (int i2=0; i2<pout_shape2; i2++) 
+                    for (int i3=0; i3<pout_shape3; i3++)
+                    for (int i4=0; i4<pout_shape4; i4++)
+                        {backward_body}
+                '''])
+            return out
+        else:
+            # TODO: backward 
+            xx = x.reindex([N,C,h,w,d,self.kernel_size,self.kernel_size,self.kernel_size], [
+                "i0", # Nid
+                "i1", # Cid
+                f"i2*{self.stride}-{self.padding}+i5", # Hid
+                f"i3*{self.stride}-{self.padding}+i6", # Wid
+                f"i4*{self.stride}-{self.padding}+i7", # Did
+            ])
+            return xx.reduce(self.op, [5,6,7])
+
 class AdaptiveAvgPool2d(Module):
     def __init__(self, output_size):
         self.output_size = output_size

From 9f8da7507c7a82827298e5ef3aab1d37e3f1f325 Mon Sep 17 00:00:00 2001
From: jwzxgy2007 <823951506@qq.com>
Date: Fri, 5 Feb 2021 16:45:16 +0800
Subject: [PATCH 08/16] fix bug

---
 python/jittor/pool.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/jittor/pool.py b/python/jittor/pool.py
index 1456e101..027c327d 100644
--- a/python/jittor/pool.py
+++ b/python/jittor/pool.py
@@ -228,6 +228,7 @@ class Pool3d(Module):
                         }}"""}
                     }}
             }}'''
+            
             out = jt.code([N,C,h,w,d], x.dtype, [x],
                 cuda_header="""
                     #include <ops/binary_op_defs.h>
@@ -249,10 +250,10 @@ class Pool3d(Module):
                         int s4 = blockDim.x * res_x;
 
                         int p3 = threadIdx.y + idx3 * blockDim.y;
-                        int p3 = blockDim.y * res_y;
+                        int s3 = blockDim.y * res_y;
 
                         int p2 = threadIdx.z + idx2 * blockDim.z;
-                        int p2 = blockDim.z * res_z;
+                        int s2 = blockDim.z * res_z;
 
                         int i1 = blockIdx.y;
                         int i0 = blockIdx.z;
@@ -299,10 +300,10 @@ class Pool3d(Module):
                         int s4 = blockDim.x * res_x;
 
                         int p3 = threadIdx.y + idx3 * blockDim.y;
-                        int p3 = blockDim.y * res_y;
+                        int s3 = blockDim.y * res_y;
 
                         int p2 = threadIdx.z + idx2 * blockDim.z;
-                        int p2 = blockDim.z * res_z;
+                        int s2 = blockDim.z * res_z;
 
                         int i1 = blockIdx.y;
                         int i0 = blockIdx.z;

From 23eb540b7d1be3aa8422df021aa6ad414790803d Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Fri, 4 Jun 2021 14:03:10 +0800
Subject: [PATCH 09/16] support pool3d

---
 python/jittor/utils/pytorch_converter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/jittor/utils/pytorch_converter.py b/python/jittor/utils/pytorch_converter.py
index b91dfc30..85c9a070 100644
--- a/python/jittor/utils/pytorch_converter.py
+++ b/python/jittor/utils/pytorch_converter.py
@@ -358,9 +358,9 @@ unsupport_ops = [
     # ***************************************************************
     'ModuleDict', 'ParameterList', 'ParameterDict', 
     'Conv1d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose3d', 'Unfold', 'Fold', 
-    'MaxPool1d', 'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'AvgPool1d', 
-    'AvgPool3d', 'FractionalMaxPool2d', 'LPPool1d', 'LPPool2d', 'AdaptiveMaxPool1d', 
-    'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool3d', 
+    'MaxPool1d', 'MaxUnpool1d', 'MaxUnpool2d', 'AvgPool1d', 
+    'FractionalMaxPool2d', 'LPPool1d', 'LPPool2d', 'AdaptiveMaxPool1d', 
+    'AdaptiveAvgPool1d',
     'ReflectionPad1d', 'ReplicationPad1d', 'ReplicationPad3d', 'ConstantPad1d', 'ConstantPad3d', 
     'ELU', 'Hardshrink', 'Hardtanh', 'LogSigmoid', 'MultiheadAttention', 
     'RReLU', 'SELU', 'CELU', 'GELU', 'Softshrink', 'Softsign', 'Tanhshrink', 

From 88483fedbc1745c30e3bb41ebccbc84521ea3c12 Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Fri, 4 Jun 2021 14:07:07 +0800
Subject: [PATCH 10/16] add layernorm3d

---
 python/jittor/nn.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/jittor/nn.py b/python/jittor/nn.py
index 0ebb3f4a..5d33a493 100644
--- a/python/jittor/nn.py
+++ b/python/jittor/nn.py
@@ -423,7 +423,7 @@ class BatchNorm(Module):
             norm_x = x * w.broadcast(x, dims) + b.broadcast(x, dims)
             return norm_x
 
-BatchNorm2d = BatchNorm1d = BatchNorm
+BatchNorm3d = BatchNorm2d = BatchNorm1d = BatchNorm
 
 class InstanceNorm(Module):
     def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, is_train=True, sync=True):
@@ -447,7 +447,7 @@ class InstanceNorm(Module):
         b = self.bias - xmean * w
         return x * w.broadcast(x, dims) + b.broadcast(x, dims)
 
-InstanceNorm2d = InstanceNorm1d = InstanceNorm
+InstanceNorm3d = InstanceNorm2d = InstanceNorm1d = InstanceNorm
 
 class LayerNorm(Module):
     def __init__(self, normalized_shape, eps: float = 1e-5, elementwise_affine: bool = True) -> None:
@@ -470,7 +470,7 @@ class LayerNorm(Module):
         return x * w + b
 
 
-LayerNorm2d = LayerNorm1d = LayerNorm
+LayerNorm3d = LayerNorm2d = LayerNorm1d = LayerNorm
 
 class GroupNorm(Module):
     def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, is_train=True):

From 1931f2fb4140b19f11630e5e7b4c74c6dfbb338b Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Fri, 4 Jun 2021 14:25:51 +0800
Subject: [PATCH 11/16] update cuda md5

---
 python/jittor/__init__.py           | 2 +-
 python/jittor_utils/install_cuda.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index 7765c63e..c7a5ecc9 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.15'
+__version__ = '1.2.3.16'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor_utils/install_cuda.py b/python/jittor_utils/install_cuda.py
index 2d571e1e..14dbcb3d 100644
--- a/python/jittor_utils/install_cuda.py
+++ b/python/jittor_utils/install_cuda.py
@@ -44,7 +44,7 @@ def install_cuda():
         md5 = "5dbdb43e35b4db8249027997720bf1ca"
     elif cuda_driver_version >= [10,2]:
         cuda_tgz = "cuda10.2_cudnn7_linux.tgz"
-        md5 = "a78f296746d97e9d76615289c2fe98ac"
+        md5 = "40f0563e8eb176f53e55943f6d212ad7"
     elif cuda_driver_version >= [10,]:
         cuda_tgz = "cuda10.0_cudnn7_linux.tgz"
         md5 = "f16d3ff63f081031d21faec3ec8b7dac"

From 2c2f5b156ddee908e273f17d5f6017889aabaf53 Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Fri, 4 Jun 2021 22:39:29 +0800
Subject: [PATCH 12/16] python trace data

---
 python/jittor/__init__.py                 |  2 +-
 python/jittor/src/pybind/py_var_tracer.cc | 41 +++++++++++++++++++++++
 python/jittor/test/test_trace_var.py      | 32 ++++++++++++++++++
 3 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index c7a5ecc9..7e6885e1 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.16'
+__version__ = '1.2.3.17'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor/src/pybind/py_var_tracer.cc b/python/jittor/src/pybind/py_var_tracer.cc
index e668b0ae..4012b953 100644
--- a/python/jittor/src/pybind/py_var_tracer.cc
+++ b/python/jittor/src/pybind/py_var_tracer.cc
@@ -20,6 +20,7 @@
 namespace jittor {
 
 DEFINE_FLAG(int, trace_py_var, 0, "Trace py stack max depth for debug.");
+DEFINE_FLAG(int, trace_var_data, 0, "Trace py stack max depth for debug.");
 Op* trace_grad_op = nullptr;
 
 TraceData trace_data;
@@ -185,6 +186,44 @@ static vector<Stack> get_stack_info() {
     return stacks;
 }
 
+template<class T>
+string get_str(T* t, int64 num) {
+    string s = "";
+    for (int64 i=0; i<num; i++) {
+        s += S(t[i]);
+        if (i != num-1)
+            s += ',';
+    }
+    return s;
+} 
+
+static inline string get_var_data_str(Var* v) {
+    if (v->dtype() == ns_int8)
+        return get_str(v->ptr<int8>(), v->num);
+    if (v->dtype() == ns_int16)
+        return get_str(v->ptr<int16>(), v->num);
+    if (v->dtype() == ns_int32)
+        return get_str(v->ptr<int32>(), v->num);
+    if (v->dtype() == ns_int64)
+        return get_str(v->ptr<int64>(), v->num);
+
+
+    if (v->dtype() == ns_uint8)
+        return get_str(v->ptr<uint8>(), v->num);
+    if (v->dtype() == ns_uint16)
+        return get_str(v->ptr<uint16>(), v->num);
+    if (v->dtype() == ns_uint32)
+        return get_str(v->ptr<uint32>(), v->num);
+    if (v->dtype() == ns_uint64)
+        return get_str(v->ptr<uint64>(), v->num);
+
+    if (v->dtype() == ns_float32)
+        return get_str(v->ptr<float32>(), v->num);
+    if (v->dtype() == ns_float64)
+        return get_str(v->ptr<float64>(), v->num);
+    return "";
+}
+
 void TraceData::record_node(Node* node, bool record_stack) {
     if (thread_name.size()) return;
     NodeData data;
@@ -255,6 +294,8 @@ void TraceData::record_exe_node(Node* node) {
             data.attrs["dsize"] = S(v->dtype().dsize());
             data.attrs["name"] = v->name.c_str();
             data.attrs["is_var"] = "1";
+            if (trace_var_data && v->mem_ptr)
+                data.attrs["data"] = get_var_data_str(v);
         } else {
             auto op = node->op();
             data.attrs["name"] = op->name_ex();
diff --git a/python/jittor/test/test_trace_var.py b/python/jittor/test/test_trace_var.py
index a866c0dc..658c5926 100644
--- a/python/jittor/test/test_trace_var.py
+++ b/python/jittor/test/test_trace_var.py
@@ -10,6 +10,7 @@ import numpy as np
 from jittor import Module
 from jittor.models import resnet
 import pickle
+from PIL import Image
 
 f32 = jt.float32
 
@@ -117,6 +118,37 @@ class TestTraceVar(unittest.TestCase):
                     if i not in data["node_data"]:
                         assert 0, (i, "not found")
 
+    def test_resnet_infer_with_feature(self):
+        cat_url = "https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=3782485413,1118109468&fm=26&gp=0.jpg"
+        import jittor_utils
+        cat_path = f"{jt.flags.cache_path}/cat.jpg"
+        print("download")
+        jittor_utils.download(cat_url, cat_path)
+        with open(cat_path, 'rb') as f:
+            img = Image.open(f).convert('RGB')
+            img = jt.array(np.array(img))
+            print(img.shape, img.dtype)
+            img = ((img.float() - 128) / 255).transpose(2,0,1)
+
+
+        with jt.flag_scope(trace_py_var=2, trace_var_data=1):
+            img = img[None,...]
+
+            resnet18 = resnet.Resnet18(pretrained=True)
+            x = jt.float32(img)
+            y = resnet18(x)
+            y.sync()
+
+            data = jt.dump_trace_data()
+            jt.clear_trace_data()
+            with open(f"{jt.flags.cache_path}/resnet_with_feature.pkl", "wb") as f:
+                pickle.dump(data, f)
+            for k,v in data["execute_op_info"].items():
+                for i in v['fused_ops']:
+                    if i not in data["node_data"]:
+                        assert 0, (i, "not found")
+
+
     def test_resnet_trainx(self):
         with jt.flag_scope(trace_py_var=2):
 

From fd7d68e6aa387bde151cb3a20c6f8b04d171a4f1 Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Sat, 5 Jun 2021 10:59:06 +0800
Subject: [PATCH 13/16] add /usr/lib as cuda lib search path

---
 python/jittor/__init__.py       | 2 +-
 python/jittor/compile_extern.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index 7e6885e1..f942e214 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.17'
+__version__ = '1.2.3.18'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor/compile_extern.py b/python/jittor/compile_extern.py
index 401a386b..2d3ea5a3 100644
--- a/python/jittor/compile_extern.py
+++ b/python/jittor/compile_extern.py
@@ -180,11 +180,11 @@ def setup_cuda_lib(lib_name, link=True, extra_flags=""):
         prefer_version = ()
         if nvcc_version[0] == 11:
             prefer_version = ("8",)
-        culib_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu"], f"lib{lib_name}.so", prefer_version)
+        culib_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu", "/usr/lib"], f"lib{lib_name}.so", prefer_version)
 
         if lib_name == "cublas" and nvcc_version[0] >= 10:
             # manual link libcublasLt.so
-            cublas_lt_lib_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu"], f"libcublasLt.so", nvcc_version)
+            cublas_lt_lib_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu", "/usr/lib"], f"libcublasLt.so", nvcc_version)
             ctypes.CDLL(cublas_lt_lib_path, dlopen_flags)
 
 
@@ -193,7 +193,7 @@ def setup_cuda_lib(lib_name, link=True, extra_flags=""):
             if nvcc_version >= (11,0,0):
                 libs = ["libcudnn_ops_infer.so", "libcudnn_ops_train.so", "libcudnn_cnn_infer.so", "libcudnn_cnn_train.so"]
                 for l in libs:
-                    ex_cudnn_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu"], l, prefer_version)
+                    ex_cudnn_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu", "/usr/lib"], l, prefer_version)
                     ctypes.CDLL(ex_cudnn_path, dlopen_flags)
 
         # dynamic link cuda library

From a07eb6bc121eb4e153fc2d18679061b90f236a79 Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Sat, 5 Jun 2021 11:46:07 +0800
Subject: [PATCH 14/16] add nvcc search path

---
 python/jittor/__init__.py | 2 +-
 python/jittor/compiler.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index f942e214..44c41389 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.18'
+__version__ = '1.2.3.19'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor/compiler.py b/python/jittor/compiler.py
index 4d3588a2..d33a89ec 100644
--- a/python/jittor/compiler.py
+++ b/python/jittor/compiler.py
@@ -877,7 +877,10 @@ if install_cuda.has_installation():
         nvcc_path = try_find_exe(nvcc_path)
 # check system installed cuda
 if not nvcc_path:
-    nvcc_path = env_or_try_find('nvcc_path', 'nvcc') or try_find_exe('/usr/local/cuda/bin/nvcc') or try_find_exe('/usr/bin/nvcc')
+    nvcc_path = env_or_try_find('nvcc_path', 'nvcc') or \
+        try_find_exe('/usr/local/cuda/bin/nvcc') or \
+        try_find_exe('/usr/bin/nvcc') or \
+        try_find_exe('/opt/cuda/bin/nvcc')
 # if system has no cuda, install jtcuda
 if not nvcc_path:
     nvcc_path = install_cuda.install_cuda()

From b14a60ad744e82c8db2a85e40725e1bed9e4314b Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Sat, 5 Jun 2021 23:05:01 +0800
Subject: [PATCH 15/16] add roll

---
 python/jittor/__init__.py          |  2 +-
 python/jittor/misc.py              | 41 +++++++++++++++++++++++++++++-
 python/jittor/test/test_setitem.py |  9 +++++++
 3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index 44c41389..5ef63a55 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.19'
+__version__ = '1.2.3.20'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor/misc.py b/python/jittor/misc.py
index ec06260f..73f71255 100644
--- a/python/jittor/misc.py
+++ b/python/jittor/misc.py
@@ -1197,7 +1197,7 @@ def gather(x, dim, index):
 
 Parameters::
 
-    * input (jt.Var) – the source array
+    * x (jt.Var) – the source array
     * dim (int) – the axis along which to index
     * index (jt.Var) – the indices of elements to gather
 
@@ -1216,3 +1216,42 @@ Example::
     return x.getitem(tuple(indexes))
 
 jt.Var.gather = gather
+
+def roll(x, shifts, dims=None):
+    '''Roll the tensor along the given dimension(s).
+
+Parameters::
+
+    * x (jt.Var) – the source array
+    * shifts (int or tuple) – shift offset of dims
+    * dims (int or tuple) – shift dims
+
+Examples::
+
+        x = jt.array([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
+        y = x.roll(1, 0)
+        assert (y.numpy() == [[7,8],[1,2],[3,4],[5,6]]).all()
+        y = x.roll(-1, 0)
+        assert (y.numpy() == [[3,4],[5,6],[7,8],[1,2]]).all()
+        y = x.roll(shifts=(2, 1), dims=(0, 1))
+        assert (y.numpy() == [[6,5],[8,7],[2,1],[4,3]]).all()
+
+    '''
+    if isinstance(shifts, int):
+        shifts = (shifts,)
+    if dims is None:
+        dims = tuple(range(len(shifts)))
+    elif isinstance(dims, int):
+        dims = (dims,)
+    assert len(dims) == len(shifts)
+    ids = [ f'i{i}' for i in range(x.ndim) ]
+    for i in range(len(dims)):
+        shift = shifts[i]
+        d = dims[i]
+        size = x.shape[d]
+        shift = shift % size
+        if shift<0: shift += size
+        ids[d] = f'(i{d}<{shift}?i{d}+{size-shift}:(i{d}-{shift}))'
+    return x.reindex(x.shape, ids)
+
+jt.Var.roll = roll
diff --git a/python/jittor/test/test_setitem.py b/python/jittor/test/test_setitem.py
index 2e86266b..1f3468f7 100644
--- a/python/jittor/test/test_setitem.py
+++ b/python/jittor/test/test_setitem.py
@@ -201,6 +201,15 @@ class TestSetitem(unittest.TestCase):
         a = jt.array([1,2])
         assert a[None,:,None,None,...,None].shape == (1,2,1,1,1)
 
+    def test_roll(self):
+        x = jt.array([1, 2, 3, 4, 5, 6, 7, 8]).view(4, 2)
+        y = x.roll(1, 0)
+        assert (y.numpy() == [[7,8],[1,2],[3,4],[5,6]]).all(), y
+        y = x.roll(-1, 0)
+        assert (y.numpy() == [[3,4],[5,6],[7,8],[1,2]]).all()
+        y = x.roll(shifts=(2, 1), dims=(0, 1))
+        assert (y.numpy() == [[6,5],[8,7],[2,1],[4,3]]).all()
+
 
 if __name__ == "__main__":
     unittest.main()
\ No newline at end of file

From 77b293b6b8edbb2a749d85e883d447481b5875be Mon Sep 17 00:00:00 2001
From: Dun Liang <randonlang@gmail.com>
Date: Wed, 9 Jun 2021 15:52:44 +0800
Subject: [PATCH 16/16] bug fix

---
 python/jittor/__init__.py                           |  2 +-
 python/jittor/compile_extern.py                     | 13 +++++++++++++
 python/jittor/src/jit_compiler.cc                   |  0
 python/jittor/src/opt/pass/loop_var_analyze_pass.cc |  1 +
 python/jittor/test/test_batchnorm.py                |  1 +
 python/jittor/test/test_default_var.py              |  1 +
 python/jittor/test/test_grad.py                     |  1 +
 python/jittor/test/test_resize_and_crop.py          |  7 +++++--
 python/jittor/test/test_slice.py                    |  1 +
 python/jittor/test/test_ternary_op.py               |  8 ++++----
 python/jittor/utils/polish_centos.py                |  4 ++--
 python/jittor/version                               |  2 +-
 12 files changed, 31 insertions(+), 10 deletions(-)
 mode change 100755 => 100644 python/jittor/src/jit_compiler.cc

diff --git a/python/jittor/__init__.py b/python/jittor/__init__.py
index bca90383..37719b09 100644
--- a/python/jittor/__init__.py
+++ b/python/jittor/__init__.py
@@ -9,7 +9,7 @@
 # file 'LICENSE.txt', which is part of this source code package.
 # ***************************************************************
 
-__version__ = '1.2.3.21'
+__version__ = '1.2.3.22'
 from jittor_utils import lock
 with lock.lock_scope():
     ori_int = int
diff --git a/python/jittor/compile_extern.py b/python/jittor/compile_extern.py
index 2d3ea5a3..4327c6bd 100644
--- a/python/jittor/compile_extern.py
+++ b/python/jittor/compile_extern.py
@@ -52,6 +52,19 @@ def setup_mkl():
     use_mkl = os.environ.get("use_mkl", "1")=="1"
     mkl_ops = None
     if not use_mkl: return
+
+    # pytorch mkl is conflict with jittor mkl
+    # yield error "free: invalide size" or
+    # "mmap error"
+    # import pytorch(>1.8) first can fix this problem
+
+    try:
+        # jt.dirty_fix_pytorch_runtime_error()
+        import torch
+        from torch import nn
+    except:
+        torch = None
+
     mkl_include_path = os.environ.get("mkl_include_path")
     mkl_lib_path = os.environ.get("mkl_lib_path")
     
diff --git a/python/jittor/src/jit_compiler.cc b/python/jittor/src/jit_compiler.cc
old mode 100755
new mode 100644
diff --git a/python/jittor/src/opt/pass/loop_var_analyze_pass.cc b/python/jittor/src/opt/pass/loop_var_analyze_pass.cc
index 48177ead..d7eecb1a 100644
--- a/python/jittor/src/opt/pass/loop_var_analyze_pass.cc
+++ b/python/jittor/src/opt/pass/loop_var_analyze_pass.cc
@@ -130,6 +130,7 @@ void LoopVarAnalyzePass::run() {
             }
             loop_vars.reserve(loop_var->shape.size());
             string vname = pm->oc->get_name_by_op_var(op, loop_var);
+            ASSERT(vname!="__fill__");
             for (uint j=0; j<loop_var->shape.size(); j++)
                 loop_vars.emplace_back(vname+"->shape["+S(j)+"]");
             break;
diff --git a/python/jittor/test/test_batchnorm.py b/python/jittor/test/test_batchnorm.py
index 964f0fdd..fcf7141c 100644
--- a/python/jittor/test/test_batchnorm.py
+++ b/python/jittor/test/test_batchnorm.py
@@ -51,6 +51,7 @@ def check_equal_without_istrain(arr, j_layer, p_layer, threshold=1e-5):
 
 @unittest.skipIf(skip_this_test, "No Torch found")
 class TestBatchNorm(unittest.TestCase):
+    @jt.flag_scope(auto_convert_64_to_32=0)
     def test_batchnorm(self):
         # ***************************************************************
         # Test BatchNorm Layer
diff --git a/python/jittor/test/test_default_var.py b/python/jittor/test/test_default_var.py
index 6928372d..84561fa2 100644
--- a/python/jittor/test/test_default_var.py
+++ b/python/jittor/test/test_default_var.py
@@ -21,6 +21,7 @@ class TestDefaultVar(unittest.TestCase):
     def setUpClass(self):
         return
 
+    @jt.flag_scope(auto_convert_64_to_32=0)
     def test_default_var(self):
         a=jt.array((2,3,3), np.float32)
         b=a*2.0
diff --git a/python/jittor/test/test_grad.py b/python/jittor/test/test_grad.py
index 39fc4e09..e86cd0a5 100644
--- a/python/jittor/test/test_grad.py
+++ b/python/jittor/test/test_grad.py
@@ -73,6 +73,7 @@ class TestGrad(unittest.TestCase):
         assert dx.data == 0
         
     def test_random_graph(self):
+        @jt.flag_scope(auto_convert_64_to_32=0)
         def test(num_vars, num_ops, seed):
             np.random.seed(seed)
             vars = []
diff --git a/python/jittor/test/test_resize_and_crop.py b/python/jittor/test/test_resize_and_crop.py
index 4eb017db..5fad5e0c 100644
--- a/python/jittor/test/test_resize_and_crop.py
+++ b/python/jittor/test/test_resize_and_crop.py
@@ -91,7 +91,7 @@ def check_equal(arr, j_layer, p_layer):
     pytorch_arr = torch.Tensor(arr)
     jittor_result = j_layer(jittor_arr)
     pytorch_result = p_layer(pytorch_arr)
-    assert np.allclose(pytorch_result.detach().numpy(), jittor_result.numpy())
+    np.testing.assert_allclose(pytorch_result.detach().numpy(), jittor_result.numpy(), rtol=1e-6)
 
 class TestResizeAndCrop(unittest.TestCase):
     def test(self):
@@ -114,7 +114,10 @@ class TestResizeAndCrop(unittest.TestCase):
     def test_upsample(self):
         arr = np.random.randn(2,3,224,224)
         check_equal(arr, jnn.Upsample(scale_factor=2), tnn.Upsample(scale_factor=2))
-        check_equal(arr, jnn.Upsample(scale_factor=0.2), tnn.Upsample(scale_factor=0.2))
+        check_equal(arr, jnn.Upsample(scale_factor=0.5), tnn.Upsample(scale_factor=0.5))
+        # pytorch change behav when scale_factor changed
+        # this test cannot pass
+        # check_equal(arr, jnn.Upsample(scale_factor=0.2), tnn.Upsample(scale_factor=0.2))
 
     @unittest.skipIf(torch is None, "no torch found")
     def test_pixelshuffle(self):
diff --git a/python/jittor/test/test_slice.py b/python/jittor/test/test_slice.py
index 867d6b38..dbc3cc40 100644
--- a/python/jittor/test/test_slice.py
+++ b/python/jittor/test/test_slice.py
@@ -19,6 +19,7 @@ class TestSlice(unittest.TestCase):
         a[2] = 1
         assert a.dtype == "bool"
         a.sync()
+        assert np.equal(a.data, np.array([0,1,1,0,0,0,0,0,0,0])).all()
 
     def test_var_slices(self):
         def check(slices, msg):
diff --git a/python/jittor/test/test_ternary_op.py b/python/jittor/test/test_ternary_op.py
index 82f4cf01..33337d08 100644
--- a/python/jittor/test/test_ternary_op.py
+++ b/python/jittor/test/test_ternary_op.py
@@ -14,8 +14,8 @@ from .test_cuda import test_cuda
 class TestTernaryOp(unittest.TestCase):
     def test_with_np(self):
         np.random.seed(0)
-        a = np.random.rand(5,10)
-        b = np.random.rand(5,10)
+        a = np.random.rand(5,10).astype("float32")
+        b = np.random.rand(5,10).astype("float32")
         ja = jt.array(a)
         jb = jt.array(b)
         jc = jt.ternary(ja>jb, ja, jb)
@@ -26,8 +26,8 @@ class TestTernaryOp(unittest.TestCase):
 
     def test_min(self):
         np.random.seed(1)
-        a = np.random.rand(5,10)
-        b = np.random.rand(5,10)
+        a = np.random.rand(5,10).astype("float32")
+        b = np.random.rand(5,10).astype("float32")
         ja = jt.array(a)
         jb = jt.array(b)
         jc = jt.minimum(ja,jb)
diff --git a/python/jittor/utils/polish_centos.py b/python/jittor/utils/polish_centos.py
index 5706e29b..a2903775 100644
--- a/python/jittor/utils/polish_centos.py
+++ b/python/jittor/utils/polish_centos.py
@@ -52,8 +52,8 @@ def run_in_centos(env):
     centos_path = os.path.join(home_path, ".cache", "centos")
     os.makedirs(centos_path+"/src/jittor", exist_ok=True)
     os.makedirs(centos_path+"/src/jittor_utils", exist_ok=True)
-    os.system(f"cp -rL {jt.flags.jittor_path} {centos_path+'/src/'}")
-    os.system(f"cp -rL {jt.flags.jittor_path}/../jittor_utils {centos_path+'/src/'}")
+    os.system(f"sudo cp -rL {jt.flags.jittor_path} {centos_path+'/src/'}")
+    os.system(f"sudo cp -rL {jt.flags.jittor_path}/../jittor_utils {centos_path+'/src/'}")
 
     run_cmd(f"sudo docker build --tag centos_build_env -f /tmp/centos_build_env .")
     run_cmd(f"sudo docker run --rm -v {centos_path}:/root/.cache/jittor centos_build_env scl enable devtoolset-7 'PYTHONPATH=/root/.cache/jittor/src {env} python3.8 -m jittor.test.test_core'")
diff --git a/python/jittor/version b/python/jittor/version
index 4712868c..98d3c70f 100644
--- a/python/jittor/version
+++ b/python/jittor/version
@@ -1 +1 @@
-5f0e1aa2f9891c12fc1e190d6cc6177fc6498302
+939b29514b2e5cc591053aab614efd569772585d