diff --git a/python/jittor/ccl/__init__.py b/python/jittor/ccl/__init__.py
new file mode 100644
index 00000000..23cfbaa7
--- /dev/null
+++ b/python/jittor/ccl/__init__.py
@@ -0,0 +1,3 @@
+from .ccl_2d import ccl_2d
+from .ccl_3d import ccl_3d
+from .ccl_link import ccl_link
\ No newline at end of file
diff --git a/python/jittor/ccl/ccl_2d.py b/python/jittor/ccl/ccl_2d.py
new file mode 100644
index 00000000..9bbd5d11
--- /dev/null
+++ b/python/jittor/ccl/ccl_2d.py
@@ -0,0 +1,177 @@
+import jittor as jt
+
+
+def ccl_2d(data_2d):
+    ''' 
+    2D connected component labelling, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm
+    Args:
+        [in]param data_2d: binary two-dimensional vector
+            type data_2d: jittor array
+
+    Returns:
+        [out]result: labeled two-dimensional vector
+
+    Example:
+    >>> import jittor as jt
+    >>> jt.flags.use_cuda = 1
+    >>> import cv2
+    >>> import numpy as np
+    >>> img = cv2.imread('testImg.png', 0)
+    >>> a = img.mean()
+    >>> img[img <= a] = 0
+    >>> img[img > a] = 1
+    >>> img = jt.Var(img)
+
+    >>> result = ccl_2d(img)
+    >>> print(jt.unique(result, return_counts=True, return_inverse=True)[0], jt.unique(result, return_counts=True, return_inverse=True)[2])
+    >>> cv2.imwrite('testImg_result.png', result.numpy().astype(np.uint8) * 50)
+    '''
+
+    data_2d = data_2d.astype(jt.uint32)
+    cY = data_2d.shape[0]
+    cX = data_2d.shape[1]
+    data_2d_copy = data_2d.clone()
+    changed = jt.ones([1], dtype=jt.uint32)
+    data_2d = data_2d.reshape(cX * cY)
+    result = jt.code(data_2d.shape,
+                     data_2d.dtype, [data_2d, changed],
+                     cuda_header='''
+                    @alias(g_image, in0)
+                    @alias(g_labels, out)
+                    ''',
+                     cuda_src=r'''
+                    __global__ void init_labels(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+                        @g_labels(iy*cX + ix) = iy*cX + ix;
+                    }
+
+                    __device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
+                        // Resolve Label
+                        unsigned int next = @g_labels(label);
+
+                        // Follow chain
+                        while(label != next) {
+                            // Move to next
+                            label = next;
+                            next = @g_labels(label);
+                        }
+
+                        // Return label
+                        return label;
+                    }
+
+                    __global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int id = ((blockIdx.y * blockDim.y) + threadIdx.y) * cX +
+                                                ((blockIdx.x * blockDim.x) + threadIdx.x);
+                        
+                        // Check Thread Range
+                        if(id < cX*cY) {
+                            // Resolve Label
+                            @g_labels(id) = find_root(@ARGS, @g_labels(id));
+                        }
+                    }
+
+                    __global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+                        // Check Thread Range
+                        if((ix < cX) && (iy < cY)) {
+                            // Get image and label values
+                            const unsigned char cyx   = @g_image( iy*cX + ix);
+                            
+                            // Get neighbour labels
+                            const unsigned int lym1x = (iy > 0)    ? @g_labels((iy-1)*cX +   ix) : 0;
+                            const unsigned int lyxm1 = (ix > 0)    ? @g_labels(iy    *cX + ix-1) : 0;
+                            const unsigned int lyx   =               @g_labels(iy    *cX +   ix);
+                            const unsigned int lyxp1 = (ix < cX-1) ? @g_labels(iy    *cX + ix+1) : 0;
+                            const unsigned int lyp1x = (iy < cY-1) ? @g_labels((iy+1)*cX +   ix) : 0;
+
+                            const unsigned int lym1xm1 = (iy > 0    && ix > 0   )    ? @g_labels((iy-1)*cX +   ix-1) : 0;
+                            const unsigned int lym1xp1 = (iy > 0    && ix < cX-1)    ? @g_labels((iy-1)*cX +   ix+1) : 0;
+                            const unsigned int lyp1xm1 = (iy < cY-1 && ix > 0   )    ? @g_labels((iy+1)*cX +   ix-1) : 0;
+                            const unsigned int lyp1xp1 = (iy < cY-1 && ix < cX-1)    ? @g_labels((iy+1)*cX +   ix+1) : 0;
+
+                            const bool nym1x = (iy > 0)    ? (cyx == (@g_image((iy-1)*cX +   ix))) : false;
+                            const bool nyxm1 = (ix > 0)    ? (cyx == (@g_image(iy    *cX + ix-1))) : false;
+                            const bool nyxp1 = (ix < cX-1) ? (cyx == (@g_image(iy    *cX + ix+1))) : false;
+                            const bool nyp1x = (iy > cY-1) ? (cyx == (@g_image((iy+1)*cX +   ix))) : false;
+
+                            const bool nym1xm1 = (iy > 0    && ix > 0   )    ? (cyx == (@g_image((iy-1)*cX +   ix-1))) : false;
+                            const bool nym1xp1 = (iy > 0    && ix < cX-1)    ? (cyx == (@g_image((iy-1)*cX +   ix+1))) : false;
+                            const bool nyp1xm1 = (iy < cY-1 && ix > 0   )    ? (cyx == (@g_image((iy+1)*cX +   ix-1))) : false;
+                            const bool nyp1xp1 = (iy < cY-1 && ix < cX-1)    ? (cyx == (@g_image((iy+1)*cX +   ix+1))) : false;
+
+                            // Lowest label
+                            unsigned int label = lyx;
+
+                            // Find lowest neighbouring label
+                            label = ((nym1x) && (lym1x < label)) ? lym1x : label;
+                            label = ((nyxm1) && (lyxm1 < label)) ? lyxm1 : label;
+                            label = ((nyxp1) && (lyxp1 < label)) ? lyxp1 : label;
+                            label = ((nyp1x) && (lyp1x < label)) ? lyp1x : label;
+
+                            label = ((nym1xm1) && (lym1xm1 < label)) ? lym1xm1 : label;
+                            label = ((nym1xp1) && (lym1xp1 < label)) ? lym1xp1 : label;
+                            label = ((nyp1xm1) && (lyp1xm1 < label)) ? lyp1xm1 : label;
+                            label = ((nyp1xp1) && (lyp1xp1 < label)) ? lyp1xp1 : label;
+
+                            // If labels are different, resolve them
+                            if(label < lyx) {
+                                // Update label
+                                // Nonatomic write may overwrite another label but on average seems to give faster results
+                                @g_labels(lyx) = label;
+
+                                // Record the change
+                                @in1(0) = 1;
+                            }
+                        }
+                    }
+                    ''' + f'''
+                    dim3 block(32, 32);
+                    const int cX= {cX};
+                    const int cY= {cY};''' + '''
+                    dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y));
+                    dim3 resolve_block(32, 32);
+                    dim3 resolve_grid(ceil(cX/(float)resolve_block.x), ceil(cY/(float)resolve_block.y));
+                    
+                    // Initialise labels
+                    init_labels <<< grid, block >>>(@ARGS, cX, cY);
+                    
+                    // Resolve the labels
+                    resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
+
+                    // Changed Flag
+                    int32 changed = 1;
+                    
+                    // While labels have changed
+                    while(changed) {
+                        // Copy changed to device
+                        cudaMemsetAsync(in1_p, 0, 4);
+                        
+                        // Label image
+                        label_equivalence <<< grid, block >>>(@ARGS, cX, cY);
+
+                        // Copy changed flag to host
+                        cudaMemcpy(&changed, in1_p, sizeof(int32), cudaMemcpyDeviceToHost);
+                        
+                        // Resolve the labels
+                        resolve_labels <<< resolve_grid, resolve_block>>>(@ARGS, cX, cY);
+                    }
+                    ''')
+    result = result.reshape((cY, cX)) * data_2d_copy
+    value = jt.unique(result)
+    value = value[value != 0]
+
+    map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
+    map_result[value] = jt.index(value.shape)[0] + 1
+    result = map_result[result]
+
+    return result
diff --git a/python/jittor/ccl/ccl_3d.py b/python/jittor/ccl/ccl_3d.py
new file mode 100644
index 00000000..e0611d98
--- /dev/null
+++ b/python/jittor/ccl/ccl_3d.py
@@ -0,0 +1,196 @@
+import jittor as jt
+
+
+def ccl_3d(data_3d):
+    ''' 
+    3D connected component labelling, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm
+    Args:
+        [in]param data_3d: binary three-dimensional vector
+            type data_3d: jittor array
+
+    Returns:
+        [out]result : labeled three-dimensional vector
+
+    Example:
+    >>> import jittor as jt
+    >>> jt.flags.use_cuda = 1
+    >>> data_3d = jt.zeros((10, 11, 12), dtype=jt.uint32)
+    >>> data_3d[2:4, :, :] = 1
+    >>> data_3d[5:7, :, :] = 1
+    >>> result = ccl_3d(data_3d)
+    >>> print(result[:, 0, 0])
+    >>> print(
+        jt.unique(result, return_counts=True, return_inverse=True)[0],
+        jt.unique(result, return_counts=True, return_inverse=True)[2])
+    '''
+
+    data_3d = data_3d.astype(jt.uint32)
+    cX = data_3d.shape[0]
+    cY = data_3d.shape[1]
+    cZ = data_3d.shape[2]
+    changed = jt.ones([1], dtype=jt.uint32)
+    data_3d_copy = data_3d.copy()
+    data_3d = data_3d.reshape(cX * cY * cZ)
+    result = jt.code(data_3d.shape,
+                     data_3d.dtype, [data_3d, changed],
+                     cuda_header='''
+                    @alias(g_image, in0)
+                    @alias(g_labels, out)
+                    ''',
+                     cuda_src=r'''
+                    __global__ void init_labels(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+                        const unsigned int iz = (blockIdx.z * blockDim.z) + threadIdx.z;
+
+                        if((ix < cX) && (iy < cY) && (iz < cZ)) {
+                            const unsigned char pzyx = @g_image(iz*pY + iy*pX + ix);
+                            
+                            // Neighbour Connections
+                            const bool nzm1yx   = (iz > 0) ? (pzyx == @g_image((iz-1)*pY +  iy   *pX + ix  )) : false;
+                            const bool nzym1x   = (iy > 0) ? (pzyx == @g_image( iz   *pY + (iy-1)*pX + ix  )) : false;
+                            const bool nzyxm1   = (ix > 0) ? (pzyx == @g_image( iz   *pY +  iy   *pX + ix-1)) : false;
+
+                            // Label
+                            unsigned int label;
+
+                            // Initialise Label
+                            label = (nzyxm1) ? (    iz*pY +     iy*pX + ix-1) : (iz*pY + iy*pX + ix);
+                            label = (nzym1x) ? (    iz*pY + (iy-1)*pX +   ix) : label;
+                            label = (nzm1yx) ? ((iz-1)*pY +     iy*pX +   ix) : label;
+                            // Write to Global Memory
+                            @g_labels(iz*pY + iy*pX + ix) = label;
+                        }
+                    }
+
+                    __device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
+                        // Resolve Label
+                        unsigned int next = @g_labels(label);
+
+                        // Follow chain
+                        while(label != next) {
+                            // Move to next
+                            label = next;
+                            next = @g_labels(label);
+                        }
+
+                        // Return label
+                        return label;
+                    }
+
+                    __global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int id = ((blockIdx.z * blockDim.z) + threadIdx.z) * pY +
+                                                ((blockIdx.y * blockDim.y) + threadIdx.y) * pX +
+                                                ((blockIdx.x * blockDim.x) + threadIdx.x);
+                        
+                        // Check Thread Range
+                        if(id < cX*cY*cZ) {
+                            // Resolve Label
+                            @g_labels(id) = find_root(@ARGS, @g_labels(id));
+                        }
+                    }
+
+                    __global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY, const int cZ, const int pX, const int pY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+                        const unsigned int iz = (blockIdx.z * blockDim.z) + threadIdx.z;
+
+                        // Check Thread Range
+                        if((ix < cX) && (iy < cY) && (iz < cZ)) {
+                            // Get image and label values
+                            const unsigned char pzyx  = @g_image(iz*pY + iy*pX + ix);
+                            
+                            // Neighbouring indexes
+                            const unsigned int xm1 = ix-1;
+                            const unsigned int xp1 = ix+1;
+                            const unsigned int ym1 = iy-1;
+                            const unsigned int yp1 = iy+1;
+                            const unsigned int zm1 = iz-1;
+                            const unsigned int zp1 = iz+1;
+
+                            // Get neighbour labels
+                            const unsigned int lzm1yx = (iz > 0)    ? @g_labels(zm1*pY +  iy*pX +  ix) : 0;
+                            const unsigned int lzym1x = (iy > 0)    ? @g_labels( iz*pY + ym1*pX +  ix) : 0;
+                            const unsigned int lzyxm1 = (ix > 0)    ? @g_labels( iz*pY +  iy*pX + xm1) : 0;
+                            const unsigned int lzyx   =               @g_labels( iz*pY +  iy*pX +  ix);
+                            const unsigned int lzyxp1 = (ix < cX-1) ? @g_labels( iz*pY +  iy*pX + xp1) : 0;
+                            const unsigned int lzyp1x = (iy < cY-1) ? @g_labels( iz*pY + yp1*pX +  ix) : 0;
+                            const unsigned int lzp1yx = (iz < cZ-1) ? @g_labels(zp1*pY +  iy*pX +  ix) : 0;
+
+                            const bool nzm1yx = (iz > 0)    ? (pzyx == @g_image(zm1*pY +  iy*pX +  ix)) : false;
+                            const bool nzym1x = (iy > 0)    ? (pzyx == @g_image( iz*pY + ym1*pX +  ix)) : false;
+                            const bool nzyxm1 = (ix > 0)    ? (pzyx == @g_image( iz*pY +  iy*pX + xm1)) : false;
+                            const bool nzyxp1 = (ix < cX-1) ? (pzyx == @g_image( iz*pY +  iy*pX + xp1)) : false;
+                            const bool nzyp1x = (iy < cY-1) ? (pzyx == @g_image( iz*pY + yp1*pX +  ix)) : false;
+                            const bool nzp1yx = (iz < cZ-1) ? (pzyx == @g_image(zp1*pY +  iy*pX +  ix)) : false;
+
+                            // Lowest label
+                            unsigned int label = lzyx;
+
+                            // Find lowest neighbouring label
+                            label = ((nzm1yx) && (lzm1yx < label)) ? lzm1yx : label;
+                            label = ((nzym1x) && (lzym1x < label)) ? lzym1x : label;
+                            label = ((nzyxm1) && (lzyxm1 < label)) ? lzyxm1 : label;
+                            label = ((nzyxp1) && (lzyxp1 < label)) ? lzyxp1 : label;
+                            label = ((nzyp1x) && (lzyp1x < label)) ? lzyp1x : label;
+                            label = ((nzp1yx) && (lzp1yx < label)) ? lzp1yx : label;
+
+                            // If labels are different, resolve them
+                            if(label < lzyx) {
+                                // Update label
+                                // Nonatomic write may overwrite another label but on average seems to give faster results
+                                @g_labels(lzyx) = label;
+
+                                // Record the change
+                                @in1(0) = 1;
+                            }
+                        }
+                    }
+                    ''' + f'''
+                    dim3 block(32, 4, 4);
+                    const int cX= {cX};
+                    const int cY= {cY};
+                    const int cZ= {cZ};
+                    const int pX= cX;
+                    const int pY= cX*cY;''' + '''
+                    dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y), ceil(cZ/(float)block.z));
+                    
+                    // Initialise labels
+                    init_labels <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
+                    
+                    // Resolve the labels
+                    resolve_labels <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
+
+                    // Changed Flag
+                    int32 changed = 1;
+                    
+                    // While labels have changed
+                    while(changed) {
+                        // Copy changed to device
+                        cudaMemsetAsync(in1_p, 0, 4);
+                        
+                        // Label image
+                        label_equivalence <<< grid, block >>>(@ARGS, cX, cY, cZ, pX, pY);
+
+                        // Copy changed flag to host
+                        cudaMemcpy(&changed, in1_p, sizeof(int32), cudaMemcpyDeviceToHost);
+                        
+                        // Resolve the labels
+                        resolve_labels <<< grid, block>>>(@ARGS, cX, cY, cZ, pX, pY);
+                    }
+                    ''')
+    result = result.reshape((cX, cY, cZ)) * data_3d_copy
+    value = jt.unique(result)
+    value = value[value != 0]
+
+    map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
+    map_result[value] = jt.index(value.shape)[0] + 1
+    result = map_result[result]
+
+    return result
diff --git a/python/jittor/ccl/ccl_link.py b/python/jittor/ccl/ccl_link.py
new file mode 100644
index 00000000..0e18cbc8
--- /dev/null
+++ b/python/jittor/ccl/ccl_link.py
@@ -0,0 +1,195 @@
+import jittor as jt
+
+
+def ccl_link(score_map, link_map, result_comp_area_thresh=6):
+    """
+    Find components in score map and link them with link map, original code from https://github.com/DanielPlayne/playne-equivalence-algorithm.
+    Args:
+        [in]param score_map: binary two-dimensional vector
+            type score_map: jittor array
+        [in]param link_map: two-dimensional vector with 8 channels
+            type link_map: jittor array
+        [in]param result_comp_area_thresh: threshold of component area
+            type result_comp_area_thresh: int
+    Returns:
+        [out]result: labeled two-dimensional vector
+    Example:
+    >>> import jittor as jt
+    >>> jt.flags.use_cuda = 1
+    >>> import cv2
+    >>> import numpy as np
+    >>> score_map = jt.Var(np.load("score_map.npy"))
+    >>> link_map = jt.Var(np.load("link_map.npy"))
+    >>> score_map = score_map >= 0.5
+    >>> link_map = link_map >= 0.8
+    >>> for i in range(8):
+    >>>     link_map[:, :, i] = link_map[:, :, i] & score_map
+    
+    >>> result = ccl_link(score_map, link_map)
+    >>> cv2.imwrite('pixellink.png', result.numpy().astype(np.uint8) * 50)
+    """
+    score_map = score_map.astype(jt.uint32)
+    link_map = link_map.astype(jt.uint32)
+    cY = score_map.shape[0]
+    cX = score_map.shape[1]
+    changed = jt.ones([1], dtype=jt.uint32)
+    score_map = score_map.reshape(cX * cY)
+    result = jt.code(score_map.shape,
+                     score_map.dtype, [score_map, link_map, changed],
+                     cuda_header='''
+                    @alias(score_map, in0)
+                    @alias(link_map, in1)
+                    @alias(g_labels, out)
+                    ''',
+                     cuda_src=r'''
+                    __global__ void init_labels(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+                        @g_labels(iy*cX + ix) = iy*cX + ix;
+                    }
+
+                    __device__ __inline__ unsigned int find_root(@ARGS_DEF, unsigned int label) {
+                        // Resolve Label
+                        unsigned int next = @g_labels(label);
+
+                        // Follow chain
+                        while(label != next) {
+                            // Move to next
+                            label = next;
+                            next = @g_labels(label);
+                        }
+
+                        // Return label
+                        return label;
+                    }
+
+                    __global__ void resolve_labels(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int id = ((blockIdx.y * blockDim.y) + threadIdx.y) * cX +
+                                                ((blockIdx.x * blockDim.x) + threadIdx.x);
+                        
+                        // Check Thread Range
+                        if(id < cX*cY) {
+                            // Resolve Label
+                            @g_labels(id) = find_root(@ARGS, @g_labels(id));
+                        }
+                    }
+
+                    __global__ void label_equivalence(@ARGS_DEF, const int cX, const int cY) {
+                        @PRECALC
+                        // Calculate index
+                        const unsigned int ix = (blockIdx.x * blockDim.x) + threadIdx.x;
+                        const unsigned int iy = (blockIdx.y * blockDim.y) + threadIdx.y;
+
+                        // Check Thread Range
+                        if((ix < cX) && (iy < cY)) {
+                            // Get image and label values
+                            const unsigned char cyx   = @score_map( iy*cX + ix);
+                            
+                            // Get neighbour labels
+                            const unsigned int lym1x = (iy > 0)    ? @g_labels((iy-1)*cX +   ix) : 0;
+                            const unsigned int lyxm1 = (ix > 0)    ? @g_labels(iy    *cX + ix-1) : 0;
+                            const unsigned int lyx   =               @g_labels(iy    *cX +   ix);
+                            const unsigned int lyxp1 = (ix < cX-1) ? @g_labels(iy    *cX + ix+1) : 0;
+                            const unsigned int lyp1x = (iy < cY-1) ? @g_labels((iy+1)*cX +   ix) : 0;
+
+                            const unsigned int lym1xm1 = (iy > 0    && ix > 0   )    ? @g_labels((iy-1)*cX +   ix-1) : 0;
+                            const unsigned int lym1xp1 = (iy > 0    && ix < cX-1)    ? @g_labels((iy-1)*cX +   ix+1) : 0;
+                            const unsigned int lyp1xm1 = (iy < cY-1 && ix > 0   )    ? @g_labels((iy+1)*cX +   ix-1) : 0;
+                            const unsigned int lyp1xp1 = (iy < cY-1 && ix < cX-1)    ? @g_labels((iy+1)*cX +   ix+1) : 0;
+                            bool nym1x, nyxm1, nyxp1, nyp1x, nym1xm1, nym1xp1, nyp1xm1, nyp1xp1;
+                            if(cyx) {
+                                nym1x = (iy > 0)    ? ((cyx == (@score_map((iy-1)*cX +   ix))) && (@link_map(iy, ix, 6) || @link_map(iy-1,   ix, 7))) : false; // up
+                                nyxm1 = (ix > 0)    ? ((cyx == (@score_map(iy    *cX + ix-1))) && (@link_map(iy, ix, 0) || @link_map(iy-1, ix-1, 3))) : false; // left
+                                nyxp1 = (ix < cX-1) ? ((cyx == (@score_map(iy    *cX + ix+1))) && (@link_map(iy, ix, 3) || @link_map(iy,   ix+1, 0))) : false; // right
+                                nyp1x = (iy > cY-1) ? ((cyx == (@score_map((iy+1)*cX +   ix))) && (@link_map(iy, ix, 7) || @link_map(iy+1,   ix, 6))) : false; // down
+
+                                nym1xm1 = (iy > 0    && ix > 0   )    ? ((cyx == (@score_map((iy-1)*cX +   ix-1))) && (@link_map(iy, ix, 2) || @link_map(iy-1,  ix-1, 4))) : false; // up-left
+                                nym1xp1 = (iy > 0    && ix < cX-1)    ? ((cyx == (@score_map((iy-1)*cX +   ix+1))) && (@link_map(iy, ix, 5) || @link_map(iy-1,  ix+1, 1))) : false; // up-right
+                                nyp1xm1 = (iy < cY-1 && ix > 0   )    ? ((cyx == (@score_map((iy+1)*cX +   ix-1))) && (@link_map(iy, ix, 1) || @link_map(iy+1,  ix-1, 5))) : false; // down-left
+                                nyp1xp1 = (iy < cY-1 && ix < cX-1)    ? ((cyx == (@score_map((iy+1)*cX +   ix+1))) && (@link_map(iy, ix, 4) || @link_map(iy+1,  ix+1, 2))) : false; // down-right
+                            }
+                            else {
+                                nym1x = (iy > 0)    ? (cyx == (@score_map((iy-1)*cX +   ix))) : false; // up
+                                nyxm1 = (ix > 0)    ? (cyx == (@score_map(iy    *cX + ix-1))) : false; // left
+                                nyxp1 = (ix < cX-1) ? (cyx == (@score_map(iy    *cX + ix+1))) : false; // right
+                                nyp1x = (iy > cY-1) ? (cyx == (@score_map((iy+1)*cX +   ix))) : false; // down
+
+                                nym1xm1 = (iy > 0    && ix > 0   )    ? (cyx == (@score_map((iy-1)*cX +   ix-1))) : false; // up-left
+                                nym1xp1 = (iy > 0    && ix < cX-1)    ? (cyx == (@score_map((iy-1)*cX +   ix+1))) : false; // up-right
+                                nyp1xm1 = (iy < cY-1 && ix > 0   )    ? (cyx == (@score_map((iy+1)*cX +   ix-1))) : false; // down-left
+                                nyp1xp1 = (iy < cY-1 && ix < cX-1)    ? (cyx == (@score_map((iy+1)*cX +   ix+1))) : false; // down-right
+                            }
+
+                            // Lowest label
+                            unsigned int label = lyx;
+
+                            // Find lowest neighbouring label
+                            label = ((nym1x) && (lym1x < label)) ? lym1x : label;
+                            label = ((nyxm1) && (lyxm1 < label)) ? lyxm1 : label;
+                            label = ((nyxp1) && (lyxp1 < label)) ? lyxp1 : label;
+                            label = ((nyp1x) && (lyp1x < label)) ? lyp1x : label;
+
+                            label = ((nym1xm1) && (lym1xm1 < label)) ? lym1xm1 : label;
+                            label = ((nym1xp1) && (lym1xp1 < label)) ? lym1xp1 : label;
+                            label = ((nyp1xm1) && (lyp1xm1 < label)) ? lyp1xm1 : label;
+                            label = ((nyp1xp1) && (lyp1xp1 < label)) ? lyp1xp1 : label;
+
+                            // If labels are different, resolve them
+                            if(label < lyx) {
+                                // Update label
+                                // Nonatomic write may overwrite another label but on average seems to give faster results
+                                @g_labels(lyx) = label;
+
+                                // Record the change
+                                @in2(0) = 1;
+                            }
+                        }
+                    }
+                    ''' + f'''
+                    dim3 block(32, 32);
+                    const int cX= {cX};
+                    const int cY= {cY};''' + '''
+                    dim3 grid(ceil(cX/(float)block.x), ceil(cY/(float)block.y));
+                    dim3 resolve_block(32, 32);
+                    dim3 resolve_grid(ceil(cX/(float)resolve_block.x), ceil(cY/(float)resolve_block.y));
+                    
+                    // Initialise labels
+                    init_labels <<< grid, block >>>(@ARGS, cX, cY);
+                    
+                    // Resolve the labels
+                    resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
+
+                    // Changed Flag
+                    int32 changed = 1;
+                    
+                    // While labels have changed
+                    while(changed) {
+                        // Copy changed to device
+                        cudaMemsetAsync(in2_p, 0, 4);
+                        
+                        // Label image
+                        label_equivalence <<< grid, block >>>(@ARGS, cX, cY);
+
+                        // Copy changed flag to host
+                        cudaMemcpy(&changed, in2_p, sizeof(int32), cudaMemcpyDeviceToHost);
+                        
+                        // Resolve the labels
+                        resolve_labels <<< resolve_grid, resolve_block >>>(@ARGS, cX, cY);
+                    }
+                    ''')
+
+    result = result.reshape((cY, cX))
+
+    value, _, cnt = jt.unique(result, return_inverse=True, return_counts=True)
+    value = (cnt > result_comp_area_thresh) * value
+    value = value[value != 0]
+
+    map_result = jt.zeros((int(value.max().numpy()[0]) + 1), dtype=jt.uint32)
+    map_result[value] = jt.index(value.shape)[0] + 1
+    result = map_result[result]
+
+    return result