[mlir] Remove old "tc" linalg ods generator.
* This could have been removed some time ago as it only had one op left in it, which is redundant with the new approach. * `matmul_i8_i8_i32` (the remaining op) can be trivially replaced by `matmul`, which natively supports mixed precision. Differential Revision: https://reviews.llvm.org/D110792
This commit is contained in:
parent
9232ca4712
commit
267bb194f3
|
|
@ -1,36 +1,3 @@
|
|||
# Declare a function to generate ODS with mlir-linalg-ods-gen
|
||||
function(add_linalg_ods_tc_gen tc_filename output_file)
|
||||
set(TC_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/${tc_filename})
|
||||
set(GEN_ODS_FILE ${CMAKE_CURRENT_BINARY_DIR}/${output_file}.tcgen.td)
|
||||
set(GEN_CPP_FILE ${CMAKE_CURRENT_BINARY_DIR}/${output_file}.tcgen.cpp.inc)
|
||||
set_source_files_properties(
|
||||
${GEN_ODS_FILE}
|
||||
PROPERTIES GENERATED TRUE)
|
||||
set_source_files_properties(
|
||||
${GEN_CPP_FILE}
|
||||
PROPERTIES GENERATED TRUE)
|
||||
add_custom_command(
|
||||
OUTPUT ${GEN_ODS_FILE} ${GEN_CPP_FILE}
|
||||
COMMAND ${MLIR_LINALG_ODS_GEN_EXE} -gen-ods-decl ${TC_SOURCE} > ${GEN_ODS_FILE}
|
||||
COMMAND ${MLIR_LINALG_ODS_GEN_EXE} -gen-impl ${TC_SOURCE} > ${GEN_CPP_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${TC_SOURCE}
|
||||
DEPENDS
|
||||
${MLIR_LINALG_ODS_GEN_EXE}
|
||||
${MLIR_LINALG_ODS_GEN_TARGET}
|
||||
VERBATIM)
|
||||
add_custom_target(
|
||||
MLIR${output_file}TcIncGen
|
||||
DEPENDS
|
||||
${MLIR_LINALG_ODS_GEN_EXE}
|
||||
${MLIR_LINALG_ODS_GEN_TARGET}
|
||||
${GEN_ODS_FILE} ${GEN_CPP_FILE})
|
||||
# Setup the file dependencies needed for the subsequent tablegen step.
|
||||
# TODO: Once there is only one way of generating named ops remove this parent
|
||||
# scope manipulation and implement the tablegen generation in the same scope.
|
||||
set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} ${GEN_ODS_FILE} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Declare a function to generate ODS with mlir-linalg-ods-yaml-gen
|
||||
function(add_linalg_ods_yaml_gen yaml_ast_file output_file)
|
||||
set(YAML_AST_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/${yaml_ast_file})
|
||||
|
|
@ -56,25 +23,21 @@ function(add_linalg_ods_yaml_gen yaml_ast_file output_file)
|
|||
${MLIR_LINALG_ODS_YAML_GEN_EXE}
|
||||
${MLIR_LINALG_ODS_YAML_GEN_TARGET}
|
||||
${GEN_ODS_FILE} ${GEN_CPP_FILE})
|
||||
# Setup the file dependencies needed for the subsequent tablegen step.
|
||||
# TODO: Once there is only one way of generating named ops remove this parent
|
||||
# scope manipulation and implement the tablegen generation in the same scope.
|
||||
set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} ${GEN_ODS_FILE} PARENT_SCOPE)
|
||||
list(APPEND LLVM_TARGET_DEPENDS ${GEN_ODS_FILE})
|
||||
set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# TODO: Delete tc generation and replace with the YAML variant once all ops are
|
||||
# ported. At the same time, move the YAML and TableGen generation to the same
|
||||
# scope to avoid the at a distance dependency manipulation via
|
||||
# LLVM_TARGET_DEPENDS.
|
||||
# NOTE: LLVM_TARGET_DEPENDS gets picked up by tablegen targets to add file
|
||||
# level dependencies. This is gross but CMake requires depending on both
|
||||
# targets and generated files, and it must be done when the custom target is
|
||||
# declared (there is no way to add after the fact).
|
||||
set(LLVM_TARGET_DEPENDS "")
|
||||
add_linalg_ods_tc_gen(LinalgNamedStructuredOpsSpec.tc LinalgNamedStructuredOps)
|
||||
add_linalg_ods_yaml_gen(LinalgNamedStructuredOps.yaml LinalgNamedStructuredOps)
|
||||
|
||||
# Provide a short name for all external dependency that needs to
|
||||
# include Linalg in ODS
|
||||
add_custom_target(LinalgOdsGen
|
||||
DEPENDS
|
||||
MLIRLinalgNamedStructuredOpsTcIncGen
|
||||
MLIRLinalgNamedStructuredOpsYamlIncGen
|
||||
)
|
||||
add_dependencies(mlir-headers LinalgOdsGen)
|
||||
|
|
|
|||
|
|
@ -1,7 +0,0 @@
|
|||
ods_def<MatmulI8I8I32Op>
|
||||
implements_interface<LinalgContractionOpInterface> :
|
||||
def matmul_i8_i8_i32(A: i8(M, K), B: i8(K, N)) -> (C: i32(M, N)) {
|
||||
// TODO: ideally something closer to
|
||||
// C(m, n) += cast<i32>(A(m, k)) * cast<i32>(B(k, n))
|
||||
C(m, n) = AddIOp<k>(C(m, n), MulIOp(SignExtendIOp32(A(m, k)), SignExtendIOp32(B(k, n))));
|
||||
}
|
||||
|
|
@ -671,8 +671,6 @@ def GenericOp : LinalgStructuredBase_Op<"generic", [
|
|||
// Named Linalg ops, implemented as a declarative configurations of generic ops.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// This file is auto-generated from a TC def specification.
|
||||
include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.tcgen.td"
|
||||
include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yamlgen.td"
|
||||
|
||||
#endif // LINALG_STRUCTURED_OPS
|
||||
|
|
|
|||
|
|
@ -2687,7 +2687,6 @@ DEFINE_POOLING_OP_GET_EFFECTS(PoolingMaxOp)
|
|||
DEFINE_POOLING_OP_GET_EFFECTS(PoolingMinOp)
|
||||
DEFINE_POOLING_OP_GET_EFFECTS(PoolingSumOp)
|
||||
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.tcgen.cpp.inc"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yamlgen.cpp.inc"
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ set(MLIR_TEST_DEPENDS
|
|||
mlir-capi-pass-test
|
||||
mlir-capi-sparse-tensor-test
|
||||
mlir-cpu-runner
|
||||
mlir-linalg-ods-gen
|
||||
mlir-linalg-ods-yaml-gen
|
||||
mlir-lsp-server
|
||||
mlir-opt
|
||||
|
|
|
|||
|
|
@ -26,14 +26,14 @@ func @matmul_tensors(
|
|||
// CHECK: : tensor<?x?xi8> to tensor<4x3xi8>
|
||||
// CHECK: %[[pC:.*]] = linalg.pad_tensor %[[sTC]] packing low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
|
||||
// CHECK: : tensor<?x?xi32> to tensor<2x3xi32>
|
||||
// CHECK: %[[pD:.*]] = linalg.matmul_i8_i8_i32 ins(%[[pA]], %[[pB]] : tensor<2x4xi8>, tensor<4x3xi8>)
|
||||
// CHECK: %[[pD:.*]] = linalg.matmul ins(%[[pA]], %[[pB]] : tensor<2x4xi8>, tensor<4x3xi8>)
|
||||
// CHECK-SAME: outs(%[[pC]] : tensor<2x3xi32>) -> tensor<2x3xi32>
|
||||
// CHECK: %[[sTD:.*]] = tensor.extract_slice %[[pD]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<2x3xi32> to tensor<?x?xi32>
|
||||
// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<?x?xi32> into tensor<?x?xi32>
|
||||
// CHECK: scf.yield %[[TD]] : tensor<?x?xi32>
|
||||
// CHECK: scf.yield %[[TD2]] : tensor<?x?xi32>
|
||||
// CHECK: scf.yield %[[TD1]] : tensor<?x?xi32>
|
||||
%0 = linalg.matmul_i8_i8_i32 {__internal_linalg_transform__ = "tile"}
|
||||
%0 = linalg.matmul {__internal_linalg_transform__ = "tile"}
|
||||
ins(%arg0, %arg1: tensor<?x?xi8>, tensor<?x?xi8>)
|
||||
outs(%arg2: tensor<?x?xi32>)
|
||||
-> tensor<?x?xi32>
|
||||
|
|
@ -82,19 +82,19 @@ func @generic_scalar_and_tensor(
|
|||
// CHECK-1DIM-TILE: %[[TB:[0-9a-z]+]]: tensor<?x?xi8>
|
||||
// CHECK-1DIM-TILE: %[[TC:[0-9a-z]+]]: tensor<?x?xi32>) -> tensor<?x?xi32> {
|
||||
// CHECK-1DIM-TILE-NOT: scf.for
|
||||
// CHECK-1DIM-TILE: linalg.matmul_i8_i8_i32 ins(%[[TA]], %[[TB]] : tensor<?x?xi8>, tensor<?x?xi8>) outs(%[[TC]] : tensor<?x?xi32>) -> tensor<?x?xi32>
|
||||
// CHECK-1DIM-TILE: linalg.matmul ins(%[[TA]], %[[TB]] : tensor<?x?xi8>, tensor<?x?xi8>) outs(%[[TC]] : tensor<?x?xi32>) -> tensor<?x?xi32>
|
||||
|
||||
func @matmul_partially_padded_tensors(
|
||||
%arg0: tensor<?x8xi8>, %arg1: tensor<8x?xi8>, %arg2: tensor<?x?xi32>)
|
||||
-> tensor<?x?xi32> {
|
||||
%0 = linalg.matmul_i8_i8_i32 {__internal_linalg_transform__ = "tile"}
|
||||
%0 = linalg.matmul {__internal_linalg_transform__ = "tile"}
|
||||
ins(%arg0, %arg1: tensor<?x8xi8>, tensor<8x?xi8>)
|
||||
outs(%arg2: tensor<?x?xi32>)
|
||||
-> tensor<?x?xi32>
|
||||
return %0 : tensor<?x?xi32>
|
||||
}
|
||||
// CHECK-LABEL: func @matmul_partially_padded_tensors(
|
||||
// CHECK: linalg.matmul_i8_i8_i32 ins({{.*}}, {{.*}} : tensor<2x4xi8>, tensor<4x3xi8>) outs({{.*}} : tensor<2x3xi32>) -> tensor<2x3xi32>
|
||||
// CHECK: linalg.matmul ins({{.*}}, {{.*}} : tensor<2x4xi8>, tensor<4x3xi8>) outs({{.*}} : tensor<2x3xi32>) -> tensor<2x3xi32>
|
||||
|
||||
|
||||
// Check only the the input operands are padded.
|
||||
|
|
@ -112,7 +112,7 @@ func @matmul_partially_padded_tensors(
|
|||
// CHECK-1DIM-TILE: : tensor<?x8xi8> to tensor<2x8xi8>
|
||||
// CHECK-1DIM-TILE: %[[pB:.*]] = linalg.pad_tensor %[[sTB]] packing low[%[[C0]], %[[C0]]] high[%{{.*}}, %{{.*}}]
|
||||
// CHECK-1DIM-TILE: : tensor<8x?xi8> to tensor<8x3xi8>
|
||||
// CHECK-1DIM-TILE: %[[pD:.*]] = linalg.matmul_i8_i8_i32 ins(%[[pA]], %[[pB]] : tensor<2x8xi8>, tensor<8x3xi8>)
|
||||
// CHECK-1DIM-TILE: %[[pD:.*]] = linalg.matmul ins(%[[pA]], %[[pB]] : tensor<2x8xi8>, tensor<8x3xi8>)
|
||||
// CHECK-1DIM-TILE: outs(%[[sTC]] : tensor<?x?xi32>) -> tensor<?x?xi32>
|
||||
|
||||
// Check that the tile-and-pad transformation actually introduces the padding
|
||||
|
|
|
|||
|
|
@ -538,35 +538,6 @@ func @matmul_tensors(
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @matmul_i8_i8_i32
|
||||
// CHECK-SAME: %[[ARG0:[a-z0-9]+]]: memref<4x6xi8>
|
||||
// CHECK-SAME: %[[ARG1:[a-z0-9]+]]: memref<6x12xi8>
|
||||
// CHECK-SAME: %[[ARG2:[a-z0-9]+]]: memref<4x12xi32>
|
||||
func @matmul_i8_i8_i32(%a: memref<4x6xi8>, %b: memref<6x12xi8>, %c: memref<4x12xi32>) {
|
||||
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
||||
// CHECK-DAG: %[[VEC_C0:.*]] = constant dense<0> : vector<4x12xi32>
|
||||
// CHECK-DAG: %[[V0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x6xi8>, vector<4x6xi8>
|
||||
// CHECK-DAG: %[[V1:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<6x12xi8>, vector<12x6xi8>
|
||||
// CHECK-DAG: %[[V2:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], {{.*}} : memref<4x12xi32>, vector<4x12xi32>
|
||||
// CHECK-DAG: %[[V0_32:.*]] = sexti %[[V0]] : vector<4x6xi8> to vector<4x6xi32>
|
||||
// CHECK-DAG: %[[V1_32:.*]] = sexti %[[V1]] : vector<12x6xi8> to vector<12x6xi32>
|
||||
//
|
||||
// linalg contraction lowers to %tmp = vector.contract %a, %b, %c0 followed by addf %c, %tmp.
|
||||
// a later canonicalization fuses the add into vector.contract.
|
||||
// CHECK: %[[C:.*]] = vector.contract
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
|
||||
// CHECK-SAME: %[[V0_32]], %[[V1_32]], %[[VEC_C0]]
|
||||
// CHECK-SAME: vector<4x6xi32>, vector<12x6xi32> into vector<4x12xi32>
|
||||
// CHECK: %[[RES:.*]] = addi %[[V2]], %[[C]] : vector<4x12xi32>
|
||||
// CHECK: vector.transfer_write %[[RES]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]}
|
||||
// CHECK-SAME: vector<4x12xi32>, memref<4x12xi32>
|
||||
linalg.matmul_i8_i8_i32 ins(%a, %b : memref<4x6xi8>, memref<6x12xi8>)
|
||||
outs(%c: memref<4x12xi32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @pad_static(
|
||||
// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
|
||||
// CHECK-NOT: linalg.pad_tensor
|
||||
|
|
|
|||
|
|
@ -1,111 +0,0 @@
|
|||
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
|
||||
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
|
||||
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul_i8_i8_i32 register-tile-sizes=12,32,16 vectorize" | \
|
||||
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \
|
||||
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
|
||||
// RUN: mlir-opt -canonicalize -convert-vector-to-scf -lower-affine -convert-linalg-to-loops | \
|
||||
|
||||
// RUN: mlir-opt -canonicalize -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts -mlir-disable-threading | \
|
||||
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
|
||||
// Activate to dump assembly
|
||||
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
|
||||
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
||||
// Use tee to both print to stderr and FileCheck
|
||||
// RUN: tee -a /dev/stderr | FileCheck %s
|
||||
|
||||
|
||||
!elem_type_a = type i8
|
||||
!elem_type_b = type i8
|
||||
!elem_type_c = type i32
|
||||
!row_major_A = type memref<${M}x${K}x!elem_type_a>
|
||||
!row_major_B = type memref<${K}x${N}x!elem_type_b>
|
||||
!row_major_C = type memref<${M}x${N}x!elem_type_c>
|
||||
|
||||
func @matmul(%a: !row_major_A, %b: !row_major_B, %c: !row_major_C)
|
||||
// TODO: activate manually for now.
|
||||
// attributes { passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
|
||||
{
|
||||
linalg.matmul_i8_i8_i32 ins(%a, %b : !row_major_A, !row_major_B)
|
||||
outs(%c: !row_major_C)
|
||||
return
|
||||
}
|
||||
|
||||
func @print_perf(%iters: index, %total_time: f64) {
|
||||
%c2 = constant 2 : index
|
||||
%cM = constant ${M} : index
|
||||
%cN = constant ${N} : index
|
||||
%cK = constant ${K} : index
|
||||
|
||||
%mn = muli %cM, %cN : index
|
||||
%mnk = muli %mn, %cK : index
|
||||
|
||||
// 2*M*N*K.
|
||||
%flops_per_iter = muli %c2, %mnk : index
|
||||
%flops = muli %iters, %flops_per_iter : index
|
||||
%flops_i64 = index_cast %flops : index to i64
|
||||
%flops_f = sitofp %flops_i64 : i64 to f64
|
||||
%flops_per_s = divf %flops_f, %total_time : f64
|
||||
vector.print %flops_per_s : f64
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func @main() {
|
||||
%v0 = constant 0 : !elem_type_c
|
||||
%v1 = constant 1 : !elem_type_a
|
||||
|
||||
%A = memref.alloc() : !row_major_A
|
||||
%B = memref.alloc() : !row_major_B
|
||||
%C = memref.alloc() : !row_major_C
|
||||
|
||||
linalg.fill(%v1, %A) : !elem_type_a, !row_major_A
|
||||
linalg.fill(%v1, %B) : !elem_type_b, !row_major_B
|
||||
linalg.fill(%v0, %C) : !elem_type_c, !row_major_C
|
||||
|
||||
%c0 = constant 0: index
|
||||
%c1 = constant 1: index
|
||||
%iters = constant 100: index
|
||||
|
||||
/// Run and dump performance for matmul.
|
||||
/// Preheating run:
|
||||
scf.for %arg0 = %c0 to %iters step %c1 {
|
||||
linalg.fill(%v0, %C) : !elem_type_c, !row_major_C
|
||||
call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> ()
|
||||
}
|
||||
%t_start_matmul = call @rtclock() : () -> f64
|
||||
scf.for %arg0 = %c0 to %iters step %c1 {
|
||||
// linalg.matmul writes %C in place, need to reset it to zero every time.
|
||||
// This is accounts for about 10-15% perf hit on small sizes.
|
||||
// Once linalg on tensors is ready, fusing fill at the register level will
|
||||
// be easy.
|
||||
linalg.fill(%v0, %C) : !elem_type_c, !row_major_C
|
||||
call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> ()
|
||||
}
|
||||
%t_end_matmul = call @rtclock() : () -> f64
|
||||
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
|
||||
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
|
||||
|
||||
// CHECK: {{^0$}}
|
||||
%C_ref = memref.alloc() : !row_major_C
|
||||
linalg.fill(%v0, %C_ref) : !elem_type_c, !row_major_C
|
||||
linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B)
|
||||
outs(%C_ref: !row_major_C)
|
||||
%res = memref.cast %C : !row_major_C to memref<*xi32>
|
||||
%exp = memref.cast %C_ref : !row_major_C to memref<*xi32>
|
||||
%errors = call @verifyMemRefI32(%res, %exp) : (memref<*xi32>, memref<*xi32>) -> i64
|
||||
vector.print %errors : i64
|
||||
memref.dealloc %C_ref : !row_major_C
|
||||
|
||||
memref.dealloc %A : !row_major_A
|
||||
memref.dealloc %B : !row_major_B
|
||||
memref.dealloc %C : !row_major_C
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func private @rtclock() -> f64
|
||||
func private @verifyMemRefI32(memref<*xi32>, memref<*xi32>) -> i64 attributes { llvm.emit_c_interface }
|
||||
|
||||
// TODO: init with random, run and check output.
|
||||
// func private @fill_random_f32(memref<*xf32>)
|
||||
|
|
@ -609,7 +609,6 @@ static void applyTilePattern(FuncOp funcOp, std::string loopType,
|
|||
linalgTilingOptions.setPaddingValueComputationFunction(paddingFunc);
|
||||
}
|
||||
tilingPattern.add<linalg::LinalgTilingPattern<linalg::MatmulOp>,
|
||||
linalg::LinalgTilingPattern<linalg::MatmulI8I8I32Op>,
|
||||
linalg::LinalgTilingPattern<linalg::GenericOp>>(
|
||||
context, linalgTilingOptions,
|
||||
linalg::LinalgTransformationFilter(Identifier::get("tile", context)));
|
||||
|
|
|
|||
|
|
@ -62,7 +62,6 @@ tools = [
|
|||
'mlir-capi-ir-test',
|
||||
'mlir-capi-pass-test',
|
||||
'mlir-cpu-runner',
|
||||
'mlir-linalg-ods-gen',
|
||||
'mlir-linalg-ods-yaml-gen',
|
||||
'mlir-reduce',
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,209 +0,0 @@
|
|||
// RUN: mlir-linalg-ods-gen %s -gen-ods-decl=1 | FileCheck %s --check-prefix=ODS
|
||||
// RUN: mlir-linalg-ods-gen %s -gen-impl=1 | FileCheck %s --check-prefix=IMPL
|
||||
|
||||
// ODS-LABEL: def Test1Op : LinalgStructuredBase_Op<"test1", [
|
||||
// ODS-NEXT: AttrSizedOperandSegments
|
||||
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
|
||||
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
|
||||
//
|
||||
// IMPL-LABEL: ArrayAttr Test1Op::iterator_types() {
|
||||
// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} }
|
||||
//
|
||||
// IMPL: ArrayAttr Test1Op::indexing_maps() {
|
||||
// IMPL: auto s0 = getAffineSymbolExpr(0, context); (void)s0;
|
||||
// IMPL-NEXT: auto s1 = getAffineSymbolExpr(1, context); (void)s1;
|
||||
// IMPL-NEXT: auto map0 = AffineMap::get(2, 2, {d0, d1}, context);
|
||||
// IMPL-NEXT: map0 = map0.replaceDimsAndSymbols({}, { s0, s1 }, 2, 0);
|
||||
// IMPL-NEXT: map0 = simplifyAffineMap(map0);
|
||||
// IMPL-NEXT: auto map1 = AffineMap::get(2, 2, {d1}, context);
|
||||
// IMPL-NEXT: map1 = map1.replaceDimsAndSymbols({}, { s0, s1 }, 2, 0);
|
||||
// IMPL-NEXT: map1 = simplifyAffineMap(map1);
|
||||
// IMPL-NEXT: auto map2 = AffineMap::get(2, 2, {d0}, context);
|
||||
// IMPL-NEXT: map2 = map2.replaceDimsAndSymbols({}, { s0, s1 }, 2, 0);
|
||||
// IMPL-NEXT: map2 = simplifyAffineMap(map2);
|
||||
// IMPL-NEXT: return {{.+}}.getAffineMapArrayAttr({ map0, map1, map2 });
|
||||
//
|
||||
// IMPL: void Test1Op::regionBuilder(ImplicitLocOpBuilder &b,
|
||||
// IMPL: Block &block) {
|
||||
// IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]);
|
||||
// IMPL: Value [[d:.*]] = b.create<MulFOp>([[a]], [[b]]);
|
||||
// IMPL: Value [[e:.*]] = b.create<AddFOp>([[c]], [[d]]);
|
||||
// IMPL: b.create<linalg::YieldOp>(ValueRange{ [[e]] });
|
||||
//
|
||||
ods_def<Test1Op> :
|
||||
def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) {
|
||||
C(m) = AddFOp<k>(C(m), MulFOp(A(m, k), B(k)));
|
||||
}
|
||||
|
||||
// ODS-LABEL: def Test2Op : LinalgStructuredBase_Op<"test2", [
|
||||
// ODS-NEXT: AttrSizedOperandSegments
|
||||
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
|
||||
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
|
||||
//
|
||||
// IMPL-LABEL: ArrayAttr Test2Op::iterator_types() {
|
||||
// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} }
|
||||
//
|
||||
// IMPL: ArrayAttr Test2Op::indexing_maps() {
|
||||
// IMPL: AffineMap::get(3, 3, {d0, d2}, context)
|
||||
// IMPL: AffineMap::get(3, 3, {d2, d1}, context)
|
||||
// IMPL: AffineMap::get(3, 3, {d0, d1}, context)
|
||||
//
|
||||
// IMPL: Test2Op::regionBuilder(ImplicitLocOpBuilder &b,
|
||||
// IMPL: Block &block) {
|
||||
// IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]);
|
||||
// IMPL: Value [[d:.*]] = b.create<MulFOp>([[a]], [[b]]);
|
||||
// IMPL: Value [[e:.*]] = b.create<AddFOp>([[c]], [[d]]);
|
||||
// IMPL: b.create<linalg::YieldOp>(ValueRange{ [[e]] });
|
||||
//
|
||||
ods_def<Test2Op> :
|
||||
def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) {
|
||||
C(m, n) = AddFOp<k>(C(m, n), MulFOp(A(m, k), B(k, n)));
|
||||
}
|
||||
|
||||
// ODS-LABEL: def Test3Op : LinalgStructuredBase_Op<"test3", [
|
||||
// ODS-NEXT: AttrSizedOperandSegments
|
||||
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
|
||||
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
|
||||
//
|
||||
// IMPL-LABEL: ArrayAttr Test3Op::iterator_types() {
|
||||
// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} }
|
||||
//
|
||||
// IMPL: ArrayAttr Test3Op::indexing_maps() {
|
||||
// IMPL: AffineMap::get(4, 4, {d0, d1, d3}, context)
|
||||
// IMPL: AffineMap::get(4, 4, {d3, d2}, context)
|
||||
// IMPL: AffineMap::get(4, 4, {d0, d1, d2}, context)
|
||||
//
|
||||
// IMPL: Test3Op::regionBuilder(ImplicitLocOpBuilder &b,
|
||||
// IMPL: Block &block) {
|
||||
// IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]);
|
||||
// IMPL: Value [[d:.*]] = b.create<MulFOp>([[a]], [[b]]);
|
||||
// IMPL: Value [[e:.*]] = b.create<AddFOp>([[c]], [[d]]);
|
||||
// IMPL: b.create<linalg::YieldOp>(ValueRange{ [[e]] });
|
||||
//
|
||||
ods_def<Test3Op> :
|
||||
def test3(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N)) {
|
||||
C(b, m, n) = AddFOp<k>(C(b, m, n), MulFOp(A(b, m, k), B(k, n)));
|
||||
}
|
||||
|
||||
// Test attribute definitions
|
||||
// ODS-LABEL: def Test4Op
|
||||
// ODS: F32ArrayAttr:$array_attr,
|
||||
// ODS: F32Attr:$f32_attr,
|
||||
// ODS: RankedF32ElementsAttr<[4]>:$fvec_attr,
|
||||
// ODS: I32Attr:$i32_attr,
|
||||
// ODS: I64Attr:$i64_attr,
|
||||
// ODS: RankedI32ElementsAttr<[5, 6]>:$ivec_attr,
|
||||
// ODS: OptionalAttr<F32Attr>:$optional_attr
|
||||
//
|
||||
// ODS: bool hasDynamicIndexingMaps();
|
||||
// ODS: LogicalResult verifyIndexingMapRequiredAttributes();
|
||||
//
|
||||
// IMPL: bool Test4Op::hasDynamicIndexingMaps() { return true; }
|
||||
// IMPL: LogicalResult Test4Op::verifyIndexingMapRequiredAttributes()
|
||||
// IMPL: op->getAttrOfType<ArrayAttr>("array_attr")
|
||||
// IMPL: op->getAttr("f32_attr")
|
||||
// IMPL: op->getAttrOfType<DenseElementsAttr>("fvec_attr")
|
||||
// IMPL: op->getAttr("i32_attr")
|
||||
// IMPL: op->getAttr("i64_attr")
|
||||
// IMPL: op->getAttrOfType<DenseElementsAttr>("ivec_attr")
|
||||
//
|
||||
ods_def<Test4Op> :
|
||||
def test4(A: f32(Batch, M, K), B: f32(K, N)) -> (C: f32(Batch, M, N))
|
||||
attr(
|
||||
f32_attr: f32,
|
||||
i32_attr: i32,
|
||||
i64_attr: i64,
|
||||
fvec_attr: 4xf32,
|
||||
ivec_attr: 5x6xi32,
|
||||
array_attr : f32[],
|
||||
optional_attr? : f32
|
||||
) {
|
||||
C(b, m, n) = AddFOp<k>(C(b, m, n), MulFOp(A(b, m, k), B(k, n)));
|
||||
}
|
||||
|
||||
// Test attribute usage in affine expressions
|
||||
// IMPL-LABEL: ArrayAttr Test5Op::indexing_maps() {
|
||||
// IMPL: auto cst0 = getAffineConstantExpr(strides().getValue<int>({ 0 }), context);
|
||||
// IMPL: auto cst1 = getAffineConstantExpr(strides().getValue<int>({ 1 }), context);
|
||||
// IMPL: auto map0 = AffineMap::get(7, 9, {d0, d1 * s7 + d4, d2 * s8 + d5, d6}, context);
|
||||
// IMPL: map0 = map0.replaceDimsAndSymbols({}, { s0, s1, s2, s3, s4, s5, s6, cst0, cst1 }, 7, 0);
|
||||
// IMPL: map0 = simplifyAffineMap(map0);
|
||||
// IMPL: auto map1 = AffineMap::get(7, 9, {d3, d4, d5, d6}, context);
|
||||
// IMPL: map1 = map1.replaceDimsAndSymbols({}, { s0, s1, s2, s3, s4, s5, s6, cst0, cst1 }, 7, 0);
|
||||
// IMPL: map1 = simplifyAffineMap(map1);
|
||||
// IMPL: auto map2 = AffineMap::get(7, 7, {d0, d1, d2, d3}, context);
|
||||
// IMPL: map2 = map2.replaceDimsAndSymbols({}, { s0, s1, s2, s3, s4, s5, s6, cst0, cst1 }, 7, 0);
|
||||
// IMPL: map2 = simplifyAffineMap(map2);
|
||||
// IMPL: return {{.+}}.getAffineMapArrayAttr({ map0, map1, map2 });
|
||||
//
|
||||
ods_def<Test5Op>:
|
||||
def test5(I: f32(N, H, W, C), K: f32(F, KH, KW, C)) -> (O: f32(N, H, W, F))
|
||||
attr(strides: 2xi32) {
|
||||
O(n, h, w, f) = AddFOp<kh, kw>(
|
||||
MulFOp(AddFOp(I(n, h * strides[0] + kh, w * strides[1] + kw, c),
|
||||
I(n, h * strides[0] + kh, w * strides[1] + kw, c)),
|
||||
K(f, kh, kw, c)));
|
||||
}
|
||||
|
||||
// Test documentation
|
||||
// ODS-LABEL: def Test6Op
|
||||
// ODS: let summary = [{ My magic op. }];
|
||||
// ODS-NEXT: let description = [{
|
||||
// ODS-NEXT: It has two inputs.
|
||||
// ODS-NEXT: It has one output.
|
||||
// ODS-NEXT: }];
|
||||
//
|
||||
ods_def<Test6Op>:
|
||||
def test6(A: f32(M, K), B: f32(K)) -> (C: f32(M))
|
||||
"""
|
||||
My magic op.
|
||||
|
||||
It has two inputs.
|
||||
It has one output.
|
||||
"""
|
||||
{
|
||||
C(m) = AddFOp<k>(C(m), MulFOp(A(m, k), B(k)));
|
||||
}
|
||||
|
||||
// Test attribute builder
|
||||
// ODS-LABEL: def Test7Op
|
||||
// ODS: OpBuilder<
|
||||
// ODS: (ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
|
||||
// ODS: "ValueRange":$outputs, "Attribute":$attr_a, "Attribute":$attr_b,
|
||||
// ODS: CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)
|
||||
// ODS: $_state.addAttribute("attr_a", attr_a);
|
||||
// ODS: $_state.addAttribute("attr_b", attr_b);
|
||||
//
|
||||
ods_def<Test7Op>:
|
||||
def test7(A: f32(M, K), B: f32(K)) -> (C: f32(M))
|
||||
attr(attr_a: f32, attr_b: 4xi32)
|
||||
{
|
||||
C(m) = AddFOp<k>(C(m), MulFOp(A(m, k), B(k)));
|
||||
}
|
||||
|
||||
// Test output arg order.
|
||||
// IMPL-LABEL: void Test8Op::regionBuilder(ImplicitLocOpBuilder &b,
|
||||
// IMPL: Block &block) {
|
||||
// IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]);
|
||||
// IMPL: Value [[d:.*]] = b.create<MulFOp>([[a]], [[b]]);
|
||||
// IMPL: Value [[e:.*]] = b.create<SubFOp>([[d]], [[c]]);
|
||||
// IMPL: b.create<linalg::YieldOp>(ValueRange{ [[e]] });
|
||||
ods_def<Test8Op>:
|
||||
def test8(A: f32(M, K), B: f32(K)) -> (C: f32(M))
|
||||
{
|
||||
C(m) = SubFOp<k>(MulFOp(A(m, k), B(k)), C(m));
|
||||
}
|
||||
|
||||
// Test shape-only operand.
|
||||
// IMPL-LABEL: ArrayAttr Test9Op::indexing_maps() {
|
||||
// IMPL: auto map0 = AffineMap::get(2, 2, {d0, d1}, context);
|
||||
// IMPL: auto map1 = AffineMap::get(2, 2, {d1}, context);
|
||||
// IMPL: auto map2 = AffineMap::get(2, 2, {d0}, context);
|
||||
// IMPL-LABEL: void Test9Op::regionBuilder(ImplicitLocOpBuilder &b,
|
||||
// IMPL: Block &block) {
|
||||
// IMPL: Value [[a:.*]](args[0]), [[c:.*]](args[2]);
|
||||
ods_def<Test9Op>:
|
||||
def test9(A: f32(M, K), B: f32(K)) -> (C: f32(M))
|
||||
{
|
||||
C(m) = AddFOp<k>(C(m), A(m, k));
|
||||
}
|
||||
|
|
@ -3,42 +3,6 @@ set(LLVM_LINK_COMPONENTS
|
|||
Support
|
||||
)
|
||||
|
||||
set(LLVM_OPTIONAL_SOURCES
|
||||
mlir-linalg-ods-gen.cpp
|
||||
mlir-linalg-ods-yaml-gen.cpp
|
||||
)
|
||||
|
||||
# Original mlir-linalg-ods-gen (to be replaced).
|
||||
add_llvm_tool(mlir-linalg-ods-gen
|
||||
mlir-linalg-ods-gen.cpp
|
||||
)
|
||||
llvm_update_compile_flags(mlir-linalg-ods-gen)
|
||||
target_link_libraries(mlir-linalg-ods-gen PRIVATE
|
||||
MLIRSupport
|
||||
MLIRIR
|
||||
)
|
||||
|
||||
set(MLIR_LINALG_ODS_GEN mlir-linalg-ods-gen CACHE
|
||||
STRING "Native mlir-linalg-ods-gen executable. Saves building one when cross-compiling.")
|
||||
|
||||
set(MLIR_LINALG_ODS_GEN_EXE ${MLIR_LINALG_ODS_GEN} PARENT_SCOPE)
|
||||
set(MLIR_LINALG_ODS_GEN_TARGET mlir-linalg-ods-gen PARENT_SCOPE)
|
||||
|
||||
if(LLVM_USE_HOST_TOOLS)
|
||||
if (${MLIR_LINALG_ODS_GEN} STREQUAL "mlir-linalg-ods-gen")
|
||||
build_native_tool(mlir-linalg-ods-gen MLIR_LINALG_ODS_GEN_EXE DEPENDS mlir-linalg-ods-gen)
|
||||
set(MLIR_LINALG_ODS_GEN_EXE ${MLIR_LINALG_ODS_GEN_EXE} PARENT_SCOPE)
|
||||
|
||||
add_custom_target(mlir-linalg-ods-gen-host DEPENDS ${MLIR_LINALG_ODS_GEN_EXE})
|
||||
set(MLIR_LINALG_ODS_GEN_TARGET mlir-linalg-ods-gen-host DEPENDS PARENT_SCOPE)
|
||||
|
||||
if(NOT LLVM_BUILD_UTILS)
|
||||
set_target_properties(mlir-linalg-ods-gen PROPERTIES EXCLUDE_FROM_ALL ON)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
# New mlir-linalg-ods-yaml-gen.
|
||||
add_llvm_tool(mlir-linalg-ods-yaml-gen
|
||||
mlir-linalg-ods-yaml-gen.cpp
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -5465,20 +5465,6 @@ cc_binary(
|
|||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "mlir-linalg-ods-gen",
|
||||
srcs = [
|
||||
"tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp",
|
||||
],
|
||||
deps = [
|
||||
":IR",
|
||||
":Support",
|
||||
"//llvm:Support",
|
||||
"//llvm:TableGen",
|
||||
"//llvm:config",
|
||||
],
|
||||
)
|
||||
|
||||
cc_binary(
|
||||
name = "mlir-linalg-ods-yaml-gen",
|
||||
srcs = [
|
||||
|
|
@ -5911,22 +5897,6 @@ gentbl_cc_library(
|
|||
deps = [":LinalgOpsTdFiles"],
|
||||
)
|
||||
|
||||
genlinalg(
|
||||
name = "LinalgNamedStructuredOpsTcIncGen",
|
||||
src = "include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc",
|
||||
linalg_outs = [
|
||||
(
|
||||
"-gen-impl -o=$@",
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.tcgen.cpp.inc",
|
||||
),
|
||||
(
|
||||
"-gen-ods-decl -o=$@",
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.tcgen.td",
|
||||
),
|
||||
],
|
||||
linalggen = ":mlir-linalg-ods-gen",
|
||||
)
|
||||
|
||||
genlinalg(
|
||||
name = "LinalgNamedStructuredOpsYamlIncGen",
|
||||
src = "include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml",
|
||||
|
|
@ -5947,7 +5917,6 @@ td_library(
|
|||
name = "LinalgStructuredOpsTdFiles",
|
||||
srcs = [
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td",
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.tcgen.td",
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yamlgen.td",
|
||||
"include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td",
|
||||
],
|
||||
|
|
@ -6123,7 +6092,6 @@ cc_library(
|
|||
":InferTypeOpInterface",
|
||||
":LinalgInterfaces",
|
||||
":LinalgInterfacesIncGen",
|
||||
":LinalgNamedStructuredOpsTcIncGen",
|
||||
":LinalgNamedStructuredOpsYamlIncGen",
|
||||
":LinalgOpsIncGen",
|
||||
":LinalgStructuredOpsIncGen",
|
||||
|
|
|
|||
Loading…
Reference in New Issue