forked from OSchip/llvm-project
relational: Implement shuffle builtin
This was added in CL 1.1
Tested with a Radeon HD 7850 (Pitcairn) using the CL CTS via:
test_conformance/relationals/test_relationals shuffle_built_in
v2: Add half-precision support to shuffle when available.
Move to misc/ and add section 6.12.12 to clc.h
Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
llvm-svn: 312403
This commit is contained in:
parent
da8dfefd1c
commit
880f15dae6
|
|
@ -237,6 +237,9 @@
|
|||
|
||||
/* 6.11.13 Image Read and Write Functions */
|
||||
|
||||
/* 6.12.12 Miscellaneous Vector Functions */
|
||||
#include <clc/misc/shuffle.h>
|
||||
|
||||
#include <clc/image/image_defines.h>
|
||||
#include <clc/image/image.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
//===-- generic/include/clc/misc/shuffle.h ------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under both the University of Illinois Open Source
|
||||
// License and the MIT license. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \
|
||||
_CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask);
|
||||
|
||||
//Return type is same base type as the input type, with the same vector size as the mask.
|
||||
//Elements in the mask must be the same size (number of bits) as the input value.
|
||||
//E.g. char8 ret = shuffle(char2 x, uchar8 mask);
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
|
||||
_CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_SHUFFLE_DECL
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
||||
|
|
@ -123,6 +123,7 @@ math/sqrt.cl
|
|||
math/tan.cl
|
||||
math/tanh.cl
|
||||
math/tgamma.cl
|
||||
misc/shuffle.cl
|
||||
relational/all.cl
|
||||
relational/any.cl
|
||||
relational/bitselect.cl
|
||||
|
|
@ -139,6 +140,7 @@ relational/isnormal.cl
|
|||
relational/isnotequal.cl
|
||||
relational/isordered.cl
|
||||
relational/isunordered.cl
|
||||
relational/shuffle2.cl
|
||||
relational/signbit.cl
|
||||
shared/clamp.cl
|
||||
shared/max.cl
|
||||
|
|
|
|||
|
|
@ -0,0 +1,157 @@
|
|||
//===-- generic/lib/misc/shuffle.cl ------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under both the University of Illinois Open Source
|
||||
// License and the MIT license. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc.h>
|
||||
|
||||
#define _CLC_ELEMENT_CASES2(VAR) \
|
||||
case 0: return VAR.s0; \
|
||||
case 1: return VAR.s1;
|
||||
|
||||
#define _CLC_ELEMENT_CASES4(VAR) \
|
||||
_CLC_ELEMENT_CASES2(VAR) \
|
||||
case 2: return VAR.s2; \
|
||||
case 3: return VAR.s3;
|
||||
|
||||
#define _CLC_ELEMENT_CASES8(VAR) \
|
||||
_CLC_ELEMENT_CASES4(VAR) \
|
||||
case 4: return VAR.s4; \
|
||||
case 5: return VAR.s5; \
|
||||
case 6: return VAR.s6; \
|
||||
case 7: return VAR.s7;
|
||||
|
||||
#define _CLC_ELEMENT_CASES16(VAR) \
|
||||
_CLC_ELEMENT_CASES8(VAR) \
|
||||
case 8: return VAR.s8; \
|
||||
case 9: return VAR.s9; \
|
||||
case 10: return VAR.sA; \
|
||||
case 11: return VAR.sB; \
|
||||
case 12: return VAR.sC; \
|
||||
case 13: return VAR.sD; \
|
||||
case 14: return VAR.sE; \
|
||||
case 15: return VAR.sF;
|
||||
|
||||
#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
|
||||
inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
|
||||
switch (idx){ \
|
||||
_CLC_ELEMENT_CASES##ARGSIZE(x) \
|
||||
default: return 0; \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
|
||||
ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
|
||||
|
||||
#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
|
||||
ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
|
||||
ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
|
||||
ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
|
||||
ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
|
||||
ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
|
||||
|
||||
#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
|
||||
ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
|
||||
ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
|
||||
ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
|
||||
ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
|
||||
ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
|
||||
ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
|
||||
ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
|
||||
ARGTYPE##2 ret_val; \
|
||||
mask &= (MASKTYPE##2)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
|
||||
ARGTYPE##4 ret_val; \
|
||||
mask &= (MASKTYPE##4)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
|
||||
ARGTYPE##8 ret_val; \
|
||||
mask &= (MASKTYPE##8)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
|
||||
ARGTYPE##16 ret_val; \
|
||||
mask &= (MASKTYPE##16)(ARGSIZE-1); \
|
||||
_CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
|
||||
return ret_val; \
|
||||
}
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
_CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
|
||||
|
||||
#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
|
||||
_CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
|
||||
|
||||
|
||||
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ELEMENT_CASES2
|
||||
#undef _CLC_ELEMENT_CASES4
|
||||
#undef _CLC_ELEMENT_CASES8
|
||||
#undef _CLC_ELEMENT_CASES16
|
||||
#undef _CLC_GET_ELEMENT_DEFINE
|
||||
#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
|
||||
#undef _CLC_SHUFFLE_SET_2_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_4_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_8_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_SET_16_ELEMENTS
|
||||
#undef _CLC_SHUFFLE_DEFINE2
|
||||
#undef _CLC_SHUFFLE_DEFINE4
|
||||
#undef _CLC_SHUFFLE_DEFINE8
|
||||
#undef _CLC_SHUFFLE_DEFINE16
|
||||
#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
|
||||
#undef _CLC_VECTOR_SHUFFLE_INSIZE
|
||||
Loading…
Reference in New Issue