Add ppcg-0.04 to lib/External
ppcg will be used to provide mapping decisions for GPU code generation. As we do not use C as input language, we do not include pet. However, we include pet.h from pet 82cacb71 plus a set of dummy functions to ensure ppcg links without problems. The version of ppcg committed is unmodified ppcg-0.04 which has been well tested in the context of LLVM. It does not provide an official library interface yet, which means that in upcoming commits we will add minor modifications to make necessary functionality accessible. We will aim to upstream these modifications after we gained enough experience with GPU generation support in Polly to propose a stable interface. Reviewers: Meinersbur Subscribers: pollydev, llvm-commits Differential Revision: http://reviews.llvm.org/D22033 llvm-svn: 275274
This commit is contained in:
parent
4cff2f8d49
commit
a041239bb7
|
|
@ -55,6 +55,10 @@ add_polly_library(Polly
|
|||
${POLLY_HEADER_FILES}
|
||||
)
|
||||
|
||||
if (GPU_CODEGEN)
|
||||
target_link_libraries(Polly PollyPPCG)
|
||||
endif (GPU_CODEGEN)
|
||||
|
||||
target_link_libraries(Polly PollyISL)
|
||||
|
||||
if (BUILD_SHARED_LIBS)
|
||||
|
|
|
|||
|
|
@ -5,19 +5,19 @@ set(ISL_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/isl")
|
|||
# Determine version of isl
|
||||
if (EXISTS "${ISL_SOURCE_DIR}/GIT_HEAD_ID")
|
||||
# The source comes from a 'make dist' archive
|
||||
file(READ "${ISL_SOURCE_DIR}/GIT_HEAD_ID" GIT_HEAD_ID)
|
||||
string(STRIP "${GIT_HEAD_ID}" GIT_HEAD_ID)
|
||||
file(READ "${ISL_SOURCE_DIR}/GIT_HEAD_ID" ISL_GIT_HEAD_ID)
|
||||
string(STRIP "${ISL_GIT_HEAD_ID}" ISL_GIT_HEAD_ID)
|
||||
elseif (EXISTS "${ISL_SOURCE_DIR}/gitversion.h")
|
||||
# The source directory is preconfigured
|
||||
file(READ "${ISL_SOURCE_DIR}/gitversion.h" GITVERSION_H)
|
||||
string(REGEX REPLACE ".*\\\"([^\\\"]*)\\\".*" "\\1" GIT_HEAD_ID "${GITVERSION_H}")
|
||||
string(REGEX REPLACE ".*\\\"([^\\\"]*)\\\".*" "\\1" ISL_GIT_HEAD_ID "${GITVERSION_H}")
|
||||
elseif ()
|
||||
# Unknown revision
|
||||
# TODO: We could look for a .git and get the revision from HEAD
|
||||
set(GIT_HEAD_ID "UNKNOWN")
|
||||
set(ISL_GIT_HEAD_ID "UNKNOWN")
|
||||
endif ()
|
||||
|
||||
message(STATUS "ISL version: ${GIT_HEAD_ID}")
|
||||
message(STATUS "ISL version: ${ISL_GIT_HEAD_ID}")
|
||||
|
||||
# Enable small integer optimization and imath
|
||||
set(USE_GMP_FOR_MP OFF)
|
||||
|
|
@ -155,7 +155,7 @@ endif ()
|
|||
# Write configure result
|
||||
# configure_file(... COPYONLY) avoids that the time stamp changes if the file is identical
|
||||
file(WRITE "${ISL_BINARY_DIR}/gitversion.h.tmp"
|
||||
"#define GIT_HEAD_ID \"${GIT_HEAD_ID}\"")
|
||||
"#define GIT_HEAD_ID \"${ISL_GIT_HEAD_ID}\"")
|
||||
configure_file("${ISL_BINARY_DIR}/gitversion.h.tmp"
|
||||
"${ISL_BINARY_DIR}/gitversion.h" COPYONLY)
|
||||
|
||||
|
|
@ -278,3 +278,55 @@ if (NOT MSVC)
|
|||
COMPILE_FLAGS "-fvisibility=hidden -w"
|
||||
)
|
||||
endif ()
|
||||
|
||||
set(PET_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pet")
|
||||
set(PPCG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ppcg")
|
||||
set(PPCG_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/ppcg")
|
||||
|
||||
# Determine version of ppcg
|
||||
if (EXISTS "${PPCG_SOURCE_DIR}/GIT_HEAD_ID")
|
||||
# The source comes from a 'make dist' archive
|
||||
file(READ "${PPCG_SOURCE_DIR}/GIT_HEAD_ID" PPCG_GIT_HEAD_ID)
|
||||
string(STRIP "${PPCG_GIT_HEAD_ID}" PPCG_GIT_HEAD_ID)
|
||||
elseif (EXISTS "${PPCG_SOURCE_DIR}/gitversion.h")
|
||||
# The source directory is preconfigured
|
||||
file(READ "${PPCG_SOURCE_DIR}/gitversion.h" GITVERSION_H)
|
||||
string(REGEX REPLACE ".*\\\"([^\\\"]*)\\\".*" "\\1" PPCG_GIT_HEAD_ID "${GITVERSION_H}")
|
||||
elseif ()
|
||||
# Unknown revision
|
||||
# TODO: We could look for a .git and get the revision from HEAD
|
||||
set(PPCG_GIT_HEAD_ID "UNKNOWN")
|
||||
endif ()
|
||||
|
||||
message(STATUS "PPCG version: ${PPCG_GIT_HEAD_ID}")
|
||||
|
||||
set (PPCG_FILES
|
||||
ppcg/cuda.c
|
||||
ppcg/cuda_common.c
|
||||
ppcg/gpu_array_tile.c
|
||||
ppcg/gpu.c
|
||||
ppcg/gpu_group.c
|
||||
ppcg/gpu_print.c
|
||||
ppcg/gpu_tree.c
|
||||
ppcg/ppcg.c
|
||||
ppcg/ppcg_options.c
|
||||
ppcg/schedule.c
|
||||
ppcg/external.c
|
||||
)
|
||||
|
||||
include_directories(BEFORE
|
||||
${PPCG_BINARY_DIR}
|
||||
${PPCG_SOURCE_DIR}/imath
|
||||
${PPCG_SOURCE_DIR}/include
|
||||
${PET_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
add_polly_library(PollyPPCG FORCE_STATIC
|
||||
${PPCG_FILES}
|
||||
)
|
||||
|
||||
if (NOT MSVC)
|
||||
set_target_properties(PollyPPCG PROPERTIES
|
||||
COMPILE_FLAGS "-fvisibility=hidden -w"
|
||||
)
|
||||
endif ()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,600 @@
|
|||
#ifndef PET_H
|
||||
#define PET_H
|
||||
|
||||
#include <isl/aff.h>
|
||||
#include <isl/arg.h>
|
||||
#include <isl/ast_build.h>
|
||||
#include <isl/set.h>
|
||||
#include <isl/map.h>
|
||||
#include <isl/union_map.h>
|
||||
#include <isl/printer.h>
|
||||
#include <isl/id_to_ast_expr.h>
|
||||
#include <isl/id_to_pw_aff.h>
|
||||
#include <isl/schedule.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct pet_options;
|
||||
ISL_ARG_DECL(pet_options, struct pet_options, pet_options_args)
|
||||
|
||||
/* If autodetect is set, any valid scop is extracted.
|
||||
* Otherwise, the scop needs to be delimited by pragmas.
|
||||
*/
|
||||
int pet_options_set_autodetect(isl_ctx *ctx, int val);
|
||||
int pet_options_get_autodetect(isl_ctx *ctx);
|
||||
|
||||
int pet_options_set_detect_conditional_assignment(isl_ctx *ctx, int val);
|
||||
int pet_options_get_detect_conditional_assignment(isl_ctx *ctx);
|
||||
|
||||
/* If encapsulate-dynamic-control is set, then any dynamic control
|
||||
* in the input program will be encapsulated in macro statements.
|
||||
* This means in particular that no statements with arguments
|
||||
* will be created.
|
||||
*/
|
||||
int pet_options_set_encapsulate_dynamic_control(isl_ctx *ctx, int val);
|
||||
int pet_options_get_encapsulate_dynamic_control(isl_ctx *ctx);
|
||||
|
||||
#define PET_OVERFLOW_AVOID 0
|
||||
#define PET_OVERFLOW_IGNORE 1
|
||||
int pet_options_set_signed_overflow(isl_ctx *ctx, int val);
|
||||
int pet_options_get_signed_overflow(isl_ctx *ctx);
|
||||
|
||||
struct pet_loc;
|
||||
typedef struct pet_loc pet_loc;
|
||||
|
||||
/* Return an additional reference to "loc". */
|
||||
__isl_give pet_loc *pet_loc_copy(__isl_keep pet_loc *loc);
|
||||
/* Free a reference to "loc". */
|
||||
pet_loc *pet_loc_free(__isl_take pet_loc *loc);
|
||||
|
||||
/* Return the offset in the input file of the start of "loc". */
|
||||
unsigned pet_loc_get_start(__isl_keep pet_loc *loc);
|
||||
/* Return the offset in the input file of the character after "loc". */
|
||||
unsigned pet_loc_get_end(__isl_keep pet_loc *loc);
|
||||
/* Return the line number of a line within the "loc" region. */
|
||||
int pet_loc_get_line(__isl_keep pet_loc *loc);
|
||||
/* Return the indentation of the "loc" region. */
|
||||
__isl_keep const char *pet_loc_get_indent(__isl_keep pet_loc *loc);
|
||||
|
||||
enum pet_expr_type {
|
||||
pet_expr_error = -1,
|
||||
pet_expr_access,
|
||||
pet_expr_call,
|
||||
pet_expr_cast,
|
||||
pet_expr_int,
|
||||
pet_expr_double,
|
||||
pet_expr_op
|
||||
};
|
||||
|
||||
enum pet_op_type {
|
||||
/* only compound assignments operators before assignment */
|
||||
pet_op_add_assign,
|
||||
pet_op_sub_assign,
|
||||
pet_op_mul_assign,
|
||||
pet_op_div_assign,
|
||||
pet_op_assign,
|
||||
pet_op_add,
|
||||
pet_op_sub,
|
||||
pet_op_mul,
|
||||
pet_op_div,
|
||||
pet_op_mod,
|
||||
pet_op_shl,
|
||||
pet_op_shr,
|
||||
pet_op_eq,
|
||||
pet_op_ne,
|
||||
pet_op_le,
|
||||
pet_op_ge,
|
||||
pet_op_lt,
|
||||
pet_op_gt,
|
||||
pet_op_minus,
|
||||
pet_op_post_inc,
|
||||
pet_op_post_dec,
|
||||
pet_op_pre_inc,
|
||||
pet_op_pre_dec,
|
||||
pet_op_address_of,
|
||||
pet_op_assume,
|
||||
pet_op_kill,
|
||||
pet_op_and,
|
||||
pet_op_xor,
|
||||
pet_op_or,
|
||||
pet_op_not,
|
||||
pet_op_land,
|
||||
pet_op_lor,
|
||||
pet_op_lnot,
|
||||
pet_op_cond,
|
||||
pet_op_last
|
||||
};
|
||||
|
||||
/* Index into the pet_expr->args array when pet_expr->type == pet_expr_unary
|
||||
*/
|
||||
enum pet_un_arg_type {
|
||||
pet_un_arg
|
||||
};
|
||||
|
||||
/* Indices into the pet_expr->args array when
|
||||
* pet_expr->type == pet_expr_binary
|
||||
*/
|
||||
enum pet_bin_arg_type {
|
||||
pet_bin_lhs,
|
||||
pet_bin_rhs
|
||||
};
|
||||
|
||||
/* Indices into the pet_expr->args array when
|
||||
* pet_expr->type == pet_expr_ternary
|
||||
*/
|
||||
enum pet_ter_arg_type {
|
||||
pet_ter_cond,
|
||||
pet_ter_true,
|
||||
pet_ter_false
|
||||
};
|
||||
|
||||
struct pet_expr;
|
||||
typedef struct pet_expr pet_expr;
|
||||
|
||||
/* Return an additional reference to "expr". */
|
||||
__isl_give pet_expr *pet_expr_copy(__isl_keep pet_expr *expr);
|
||||
/* Free a reference to "expr". */
|
||||
__isl_null pet_expr *pet_expr_free(__isl_take pet_expr *expr);
|
||||
|
||||
/* Return the isl_ctx in which "expr" was created. */
|
||||
isl_ctx *pet_expr_get_ctx(__isl_keep pet_expr *expr);
|
||||
|
||||
/* Return the type of "expr". */
|
||||
enum pet_expr_type pet_expr_get_type(__isl_keep pet_expr *expr);
|
||||
/* Return the number of arguments of "expr". */
|
||||
int pet_expr_get_n_arg(__isl_keep pet_expr *expr);
|
||||
/* Set the number of arguments of "expr" to "n". */
|
||||
__isl_give pet_expr *pet_expr_set_n_arg(__isl_take pet_expr *expr, int n);
|
||||
/* Return the argument of "expr" at position "pos". */
|
||||
__isl_give pet_expr *pet_expr_get_arg(__isl_keep pet_expr *expr, int pos);
|
||||
/* Replace the argument of "expr" at position "pos" by "arg". */
|
||||
__isl_give pet_expr *pet_expr_set_arg(__isl_take pet_expr *expr, int pos,
|
||||
__isl_take pet_expr *arg);
|
||||
|
||||
/* Return the operation type of operation expression "expr". */
|
||||
enum pet_op_type pet_expr_op_get_type(__isl_keep pet_expr *expr);
|
||||
/* Replace the operation type of operation expression "expr" by "type". */
|
||||
__isl_give pet_expr *pet_expr_op_set_type(__isl_take pet_expr *expr,
|
||||
enum pet_op_type type);
|
||||
|
||||
/* Construct a (read) access pet_expr from an index expression. */
|
||||
__isl_give pet_expr *pet_expr_from_index(__isl_take isl_multi_pw_aff *index);
|
||||
|
||||
/* Does "expr" represent an affine expression? */
|
||||
int pet_expr_is_affine(__isl_keep pet_expr *expr);
|
||||
/* Does the access expression "expr" read the accessed elements? */
|
||||
int pet_expr_access_is_read(__isl_keep pet_expr *expr);
|
||||
/* Does the access expression "expr" write to the accessed elements? */
|
||||
int pet_expr_access_is_write(__isl_keep pet_expr *expr);
|
||||
/* Mark "expr" as a read dependening on "read". */
|
||||
__isl_give pet_expr *pet_expr_access_set_read(__isl_take pet_expr *expr,
|
||||
int read);
|
||||
/* Mark "expr" as a write dependening on "write". */
|
||||
__isl_give pet_expr *pet_expr_access_set_write(__isl_take pet_expr *expr,
|
||||
int write);
|
||||
/* Mark "expr" as a kill dependening on "kill". */
|
||||
__isl_give pet_expr *pet_expr_access_set_kill(__isl_take pet_expr *expr,
|
||||
int kill);
|
||||
/* Return the reference identifier of access expression "expr". */
|
||||
__isl_give isl_id *pet_expr_access_get_ref_id(__isl_keep pet_expr *expr);
|
||||
/* Replace the reference identifier of access expression "expr" by "ref_id". */
|
||||
__isl_give pet_expr *pet_expr_access_set_ref_id(__isl_take pet_expr *expr,
|
||||
__isl_take isl_id *ref_id);
|
||||
/* Return the identifier of the outer array accessed by "expr". */
|
||||
__isl_give isl_id *pet_expr_access_get_id(__isl_keep pet_expr *expr);
|
||||
/* Return the index expression of access expression "expr". */
|
||||
__isl_give isl_multi_pw_aff *pet_expr_access_get_index(
|
||||
__isl_keep pet_expr *expr);
|
||||
|
||||
/* Return the potential read access relation of access expression "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_may_read(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the potential write access relation of access expression "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_may_write(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the definite write access relation of access expression "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_must_write(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the argument dependent potential read access relation of "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_dependent_may_read(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the argument dependent potential write access relation of "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_dependent_may_write(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the argument dependent definite write access relation of "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_dependent_must_write(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the tagged potential read access relation of access "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_tagged_may_read(
|
||||
__isl_keep pet_expr *expr);
|
||||
/* Return the tagged potential write access relation of access "expr". */
|
||||
__isl_give isl_union_map *pet_expr_access_get_tagged_may_write(
|
||||
__isl_keep pet_expr *expr);
|
||||
|
||||
/* Return the name of the function called by "expr". */
|
||||
__isl_keep const char *pet_expr_call_get_name(__isl_keep pet_expr *expr);
|
||||
/* Replace the name of the function called by "expr" by "name". */
|
||||
__isl_give pet_expr *pet_expr_call_set_name(__isl_take pet_expr *expr,
|
||||
__isl_keep const char *name);
|
||||
|
||||
/* Create a pet_expr representing a cast of "arg" to "type_name". */
|
||||
__isl_give pet_expr *pet_expr_new_cast(const char *type_name,
|
||||
__isl_take pet_expr *arg);
|
||||
/* Replace the type of the cast performed by "expr" by "name". */
|
||||
__isl_give pet_expr *pet_expr_cast_set_type_name(__isl_take pet_expr *expr,
|
||||
__isl_keep const char *name);
|
||||
|
||||
/* Return the value of the integer represented by "expr". */
|
||||
__isl_give isl_val *pet_expr_int_get_val(__isl_keep pet_expr *expr);
|
||||
/* Replace the value of the integer represented by "expr" by "v". */
|
||||
__isl_give pet_expr *pet_expr_int_set_val(__isl_take pet_expr *expr,
|
||||
__isl_take isl_val *v);
|
||||
|
||||
/* Return a string representation of the double expression "expr". */
|
||||
__isl_give char *pet_expr_double_get_str(__isl_keep pet_expr *expr);
|
||||
/* Replace value and string representation of the double expression "expr" */
|
||||
__isl_give pet_expr *pet_expr_double_set(__isl_take pet_expr *expr,
|
||||
double d, __isl_keep const char *s);
|
||||
|
||||
/* Call "fn" on each of the subexpressions of "expr" of type pet_expr_access. */
|
||||
int pet_expr_foreach_access_expr(__isl_keep pet_expr *expr,
|
||||
int (*fn)(__isl_keep pet_expr *expr, void *user), void *user);
|
||||
/* Call "fn" on each of the subexpressions of "expr" of type pet_expr_call. */
|
||||
int pet_expr_foreach_call_expr(__isl_keep pet_expr *expr,
|
||||
int (*fn)(__isl_keep pet_expr *expr, void *user), void *user);
|
||||
|
||||
struct pet_context;
|
||||
typedef struct pet_context pet_context;
|
||||
|
||||
/* Create a context with the given domain. */
|
||||
__isl_give pet_context *pet_context_alloc(__isl_take isl_set *domain);
|
||||
/* Return an additional reference to "pc". */
|
||||
__isl_give pet_context *pet_context_copy(__isl_keep pet_context *pc);
|
||||
/* Free a reference to "pc". */
|
||||
__isl_null pet_context *pet_context_free(__isl_take pet_context *pc);
|
||||
|
||||
/* Return the isl_ctx in which "pc" was created. */
|
||||
isl_ctx *pet_context_get_ctx(__isl_keep pet_context *pc);
|
||||
|
||||
/* Extract an affine expression defined over the domain of "pc" from "expr"
|
||||
* or return NaN.
|
||||
*/
|
||||
__isl_give isl_pw_aff *pet_expr_extract_affine(__isl_keep pet_expr *expr,
|
||||
__isl_keep pet_context *pc);
|
||||
|
||||
void pet_expr_dump(__isl_keep pet_expr *expr);
|
||||
|
||||
enum pet_tree_type {
|
||||
pet_tree_error = -1,
|
||||
pet_tree_expr,
|
||||
pet_tree_block,
|
||||
pet_tree_break,
|
||||
pet_tree_continue,
|
||||
pet_tree_decl, /* A declaration without initialization */
|
||||
pet_tree_decl_init, /* A declaration with initialization */
|
||||
pet_tree_if, /* An if without an else branch */
|
||||
pet_tree_if_else, /* An if with an else branch */
|
||||
pet_tree_for,
|
||||
pet_tree_infinite_loop,
|
||||
pet_tree_while
|
||||
};
|
||||
|
||||
struct pet_tree;
|
||||
typedef struct pet_tree pet_tree;
|
||||
|
||||
/* Return the isl_ctx in which "tree" was created. */
|
||||
isl_ctx *pet_tree_get_ctx(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Return an additional reference to "tree". */
|
||||
__isl_give pet_tree *pet_tree_copy(__isl_keep pet_tree *tree);
|
||||
/* Free a reference to "tree". */
|
||||
__isl_null pet_tree *pet_tree_free(__isl_take pet_tree *tree);
|
||||
|
||||
/* Return the location of "tree". */
|
||||
__isl_give pet_loc *pet_tree_get_loc(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Return the type of "tree". */
|
||||
enum pet_tree_type pet_tree_get_type(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Return the expression of the expression tree "tree". */
|
||||
__isl_give pet_expr *pet_tree_expr_get_expr(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Return the number of children of the block tree "tree". */
|
||||
int pet_tree_block_n_child(__isl_keep pet_tree *tree);
|
||||
/* Return child "pos" of the block tree "tree". */
|
||||
__isl_give pet_tree *pet_tree_block_get_child(__isl_keep pet_tree *tree,
|
||||
int pos);
|
||||
|
||||
/* Is "tree" a declaration (with or without initialization)? */
|
||||
int pet_tree_is_decl(__isl_keep pet_tree *tree);
|
||||
/* Return the variable declared by the declaration tree "tree". */
|
||||
__isl_give pet_expr *pet_tree_decl_get_var(__isl_keep pet_tree *tree);
|
||||
/* Return the initial value of the pet_tree_decl_init tree "tree". */
|
||||
__isl_give pet_expr *pet_tree_decl_get_init(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Return the condition of the if tree "tree". */
|
||||
__isl_give pet_expr *pet_tree_if_get_cond(__isl_keep pet_tree *tree);
|
||||
/* Return the then branch of the if tree "tree". */
|
||||
__isl_give pet_tree *pet_tree_if_get_then(__isl_keep pet_tree *tree);
|
||||
/* Return the else branch of the if tree with else branch "tree". */
|
||||
__isl_give pet_tree *pet_tree_if_get_else(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Is "tree" a for loop, a while loop or an infinite loop? */
|
||||
int pet_tree_is_loop(__isl_keep pet_tree *tree);
|
||||
/* Return the induction variable of the for loop "tree" */
|
||||
__isl_give pet_expr *pet_tree_loop_get_var(__isl_keep pet_tree *tree);
|
||||
/* Return the initial value of the induction variable of the for loop "tree" */
|
||||
__isl_give pet_expr *pet_tree_loop_get_init(__isl_keep pet_tree *tree);
|
||||
/* Return the condition of the loop tree "tree" */
|
||||
__isl_give pet_expr *pet_tree_loop_get_cond(__isl_keep pet_tree *tree);
|
||||
/* Return the induction variable of the for loop "tree" */
|
||||
__isl_give pet_expr *pet_tree_loop_get_inc(__isl_keep pet_tree *tree);
|
||||
/* Return the body of the loop tree "tree" */
|
||||
__isl_give pet_tree *pet_tree_loop_get_body(__isl_keep pet_tree *tree);
|
||||
|
||||
/* Call "fn" on each top-level expression in the nodes of "tree" */
|
||||
int pet_tree_foreach_expr(__isl_keep pet_tree *tree,
|
||||
int (*fn)(__isl_keep pet_expr *expr, void *user), void *user);
|
||||
/* Call "fn" on each access subexpression in the nodes of "tree" */
|
||||
int pet_tree_foreach_access_expr(__isl_keep pet_tree *tree,
|
||||
int (*fn)(__isl_keep pet_expr *expr, void *user), void *user);
|
||||
/* Modify all call subexpressions in the nodes of "tree" through "fn". */
|
||||
__isl_give pet_tree *pet_tree_map_call_expr(__isl_take pet_tree *tree,
|
||||
__isl_give pet_expr *(*fn)(__isl_take pet_expr *expr, void *user),
|
||||
void *user);
|
||||
|
||||
void pet_tree_dump(__isl_keep pet_tree *tree);
|
||||
|
||||
/* "loc" represents the region of the source code that is represented
|
||||
* by this statement.
|
||||
*
|
||||
* If the statement has arguments, i.e., n_arg != 0, then
|
||||
* "domain" is a wrapped map, mapping the iteration domain
|
||||
* to the values of the arguments for which this statement
|
||||
* is executed.
|
||||
* Otherwise, it is simply the iteration domain.
|
||||
*
|
||||
* If one of the arguments is an access expression that accesses
|
||||
* more than one element for a given iteration, then the constraints
|
||||
* on the value of this argument (encoded in "domain") should be satisfied
|
||||
* for all of those accessed elements.
|
||||
*/
|
||||
struct pet_stmt {
|
||||
pet_loc *loc;
|
||||
isl_set *domain;
|
||||
pet_tree *body;
|
||||
|
||||
unsigned n_arg;
|
||||
pet_expr **args;
|
||||
};
|
||||
|
||||
/* Return the iteration space of "stmt". */
|
||||
__isl_give isl_space *pet_stmt_get_space(struct pet_stmt *stmt);
|
||||
|
||||
/* Is "stmt" an assignment statement? */
|
||||
int pet_stmt_is_assign(struct pet_stmt *stmt);
|
||||
/* Is "stmt" a kill statement? */
|
||||
int pet_stmt_is_kill(struct pet_stmt *stmt);
|
||||
|
||||
/* pet_stmt_build_ast_exprs is currently limited to only handle
|
||||
* some forms of data dependent accesses.
|
||||
* If pet_stmt_can_build_ast_exprs returns 1, then pet_stmt_build_ast_exprs
|
||||
* can safely be called on "stmt".
|
||||
*/
|
||||
int pet_stmt_can_build_ast_exprs(struct pet_stmt *stmt);
|
||||
/* Construct an associative array from reference identifiers of
|
||||
* access expressions in "stmt" to the corresponding isl_ast_expr.
|
||||
* Each index expression is first transformed through "fn_index"
|
||||
* (if not NULL). Then an AST expression is generated using "build".
|
||||
* Finally, the AST expression is transformed using "fn_expr"
|
||||
* (if not NULL).
|
||||
*/
|
||||
__isl_give isl_id_to_ast_expr *pet_stmt_build_ast_exprs(struct pet_stmt *stmt,
|
||||
__isl_keep isl_ast_build *build,
|
||||
__isl_give isl_multi_pw_aff *(*fn_index)(
|
||||
__isl_take isl_multi_pw_aff *mpa, __isl_keep isl_id *id,
|
||||
void *user), void *user_index,
|
||||
__isl_give isl_ast_expr *(*fn_expr)(__isl_take isl_ast_expr *expr,
|
||||
__isl_keep isl_id *id, void *user), void *user_expr);
|
||||
|
||||
/* Print "stmt" to "p".
|
||||
*
|
||||
* The access expressions in "stmt" are replaced by the isl_ast_expr
|
||||
* associated to its reference identifier in "ref2expr".
|
||||
*/
|
||||
__isl_give isl_printer *pet_stmt_print_body(struct pet_stmt *stmt,
|
||||
__isl_take isl_printer *p, __isl_keep isl_id_to_ast_expr *ref2expr);
|
||||
|
||||
/* This structure represents a defined type.
|
||||
* "name" is the name of the type, while "definition" is a string
|
||||
* representation of its definition.
|
||||
*/
|
||||
struct pet_type {
|
||||
char *name;
|
||||
char *definition;
|
||||
};
|
||||
|
||||
/* context holds constraints on the parameter that ensure that
|
||||
* this array has a valid (i.e., non-negative) size
|
||||
*
|
||||
* extent holds constraints on the indices
|
||||
*
|
||||
* value_bounds holds constraints on the elements of the array
|
||||
* and may be NULL if no such constraints were specified by the user
|
||||
*
|
||||
* element_size is the size in bytes of each array element
|
||||
* element_type is the type of the array elements.
|
||||
* element_is_record is set if this type is a record type.
|
||||
*
|
||||
* live_out is set if the array appears in a live-out pragma
|
||||
*
|
||||
* if uniquely_defined is set then the array is written by a single access
|
||||
* such that any element that is ever read
|
||||
* is known to be assigned exactly once before the read
|
||||
*
|
||||
* declared is set if the array was declared somewhere inside the scop.
|
||||
* exposed is set if the declared array is visible outside the scop.
|
||||
*/
|
||||
struct pet_array {
|
||||
isl_set *context;
|
||||
isl_set *extent;
|
||||
isl_set *value_bounds;
|
||||
char *element_type;
|
||||
int element_is_record;
|
||||
int element_size;
|
||||
int live_out;
|
||||
int uniquely_defined;
|
||||
int declared;
|
||||
int exposed;
|
||||
};
|
||||
|
||||
/* This structure represents an implication on a boolean filter.
|
||||
* In particular, if the filter value of an element in the domain
|
||||
* of "extension" is equal to "satisfied", then the filter values
|
||||
* of the corresponding images in "extension" are also equal
|
||||
* to "satisfied".
|
||||
*/
|
||||
struct pet_implication {
|
||||
int satisfied;
|
||||
isl_map *extension;
|
||||
};
|
||||
|
||||
/* This structure represents an independence implied by a for loop
|
||||
* that is marked as independent in the source code.
|
||||
* "filter" contains pairs of statement instances that are guaranteed
|
||||
* not to be dependent on each other based on the independent for loop,
|
||||
* assuming that no dependences carried by this loop are implied
|
||||
* by the variables in "local".
|
||||
* "local" contains the variables that are local to the loop that was
|
||||
* marked independent.
|
||||
*/
|
||||
struct pet_independence {
|
||||
isl_union_map *filter;
|
||||
isl_union_set *local;
|
||||
};
|
||||
|
||||
/* "loc" represents the region of the source code that is represented
|
||||
* by this scop.
|
||||
* If the scop was detected based on scop and endscop pragmas, then
|
||||
* the lines containing these pragmas are included in this region.
|
||||
* In the final result, the context describes the set of parameter values
|
||||
* for which the scop can be executed.
|
||||
* During the construction of the pet_scop, the context lives in a set space
|
||||
* where each dimension refers to an outer loop.
|
||||
* context_value describes assignments to the parameters (if any)
|
||||
* outside of the scop.
|
||||
*
|
||||
* "schedule" is the schedule of the statements in the scop.
|
||||
*
|
||||
* The n_type types define types that may be referenced from by the arrays.
|
||||
*
|
||||
* The n_implication implications describe implications on boolean filters.
|
||||
*
|
||||
* The n_independence independences describe independences implied
|
||||
* by for loops that are marked independent in the source code.
|
||||
*/
|
||||
struct pet_scop {
|
||||
pet_loc *loc;
|
||||
|
||||
isl_set *context;
|
||||
isl_set *context_value;
|
||||
isl_schedule *schedule;
|
||||
|
||||
int n_type;
|
||||
struct pet_type **types;
|
||||
|
||||
int n_array;
|
||||
struct pet_array **arrays;
|
||||
|
||||
int n_stmt;
|
||||
struct pet_stmt **stmts;
|
||||
|
||||
int n_implication;
|
||||
struct pet_implication **implications;
|
||||
|
||||
int n_independence;
|
||||
struct pet_independence **independences;
|
||||
};
|
||||
|
||||
/* Return a textual representation of the operator. */
|
||||
const char *pet_op_str(enum pet_op_type op);
|
||||
int pet_op_is_inc_dec(enum pet_op_type op);
|
||||
|
||||
/* Extract a pet_scop from a C source file.
|
||||
* If function is not NULL, then the pet_scop is extracted from
|
||||
* a function with that name.
|
||||
*/
|
||||
struct pet_scop *pet_scop_extract_from_C_source(isl_ctx *ctx,
|
||||
const char *filename, const char *function);
|
||||
|
||||
/* Transform the C source file "input" by rewriting each scop
|
||||
* When autodetecting scops, at most one scop per function is rewritten.
|
||||
* The transformed C code is written to "output".
|
||||
*/
|
||||
int pet_transform_C_source(isl_ctx *ctx, const char *input, FILE *output,
|
||||
__isl_give isl_printer *(*transform)(__isl_take isl_printer *p,
|
||||
struct pet_scop *scop, void *user), void *user);
|
||||
/* Given a scop and a printer passed to a pet_transform_C_source callback,
|
||||
* print the original corresponding code to the printer.
|
||||
*/
|
||||
__isl_give isl_printer *pet_scop_print_original(struct pet_scop *scop,
|
||||
__isl_take isl_printer *p);
|
||||
|
||||
/* Update all isl_sets and isl_maps such that they all have the same
|
||||
* parameters in the same order.
|
||||
*/
|
||||
struct pet_scop *pet_scop_align_params(struct pet_scop *scop);
|
||||
|
||||
/* Does "scop" contain any data dependent accesses? */
|
||||
int pet_scop_has_data_dependent_accesses(struct pet_scop *scop);
|
||||
/* Does "scop" contain any data dependent conditions? */
|
||||
int pet_scop_has_data_dependent_conditions(struct pet_scop *scop);
|
||||
/* pet_stmt_build_ast_exprs is currently limited to only handle
|
||||
* some forms of data dependent accesses.
|
||||
* If pet_scop_can_build_ast_exprs returns 1, then pet_stmt_build_ast_exprs
|
||||
* can safely be called on all statements in the scop.
|
||||
*/
|
||||
int pet_scop_can_build_ast_exprs(struct pet_scop *scop);
|
||||
|
||||
void pet_scop_dump(struct pet_scop *scop);
|
||||
struct pet_scop *pet_scop_free(struct pet_scop *scop);
|
||||
|
||||
__isl_give isl_union_set *pet_scop_collect_domains(struct pet_scop *scop);
|
||||
/* Collect all potential read access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_may_reads(struct pet_scop *scop);
|
||||
/* Collect all tagged potential read access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_tagged_may_reads(
|
||||
struct pet_scop *scop);
|
||||
/* Collect all potential write access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_may_writes(struct pet_scop *scop);
|
||||
/* Collect all definite write access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_must_writes(struct pet_scop *scop);
|
||||
/* Collect all tagged potential write access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_tagged_may_writes(
|
||||
struct pet_scop *scop);
|
||||
/* Collect all tagged definite write access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_tagged_must_writes(
|
||||
struct pet_scop *scop);
|
||||
/* Collect all definite kill access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_must_kills(struct pet_scop *scop);
|
||||
/* Collect all tagged definite kill access relations. */
|
||||
__isl_give isl_union_map *pet_scop_collect_tagged_must_kills(
|
||||
struct pet_scop *scop);
|
||||
|
||||
/* Compute a mapping from all outermost arrays (of structs) in scop
|
||||
* to their innermost members.
|
||||
*/
|
||||
__isl_give isl_union_map *pet_scop_compute_outer_to_inner(
|
||||
struct pet_scop *scop);
|
||||
/* Compute a mapping from all outermost arrays (of structs) in scop
|
||||
* to their members, including the outermost arrays themselves.
|
||||
*/
|
||||
__isl_give isl_union_map *pet_scop_compute_outer_to_any(struct pet_scop *scop);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
version: 0.04
|
||||
date: Wed Jun 17 10:52:58 CEST 2015
|
||||
changes:
|
||||
- use schedule trees
|
||||
- fix live-range reordering
|
||||
- improve generation of synchronization
|
||||
- exploit independences during dependence analysis
|
||||
|
|
@ -0,0 +1 @@
|
|||
ppcg-0.04
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
if BUNDLED_ISL
|
||||
MAYBE_ISL = isl
|
||||
ISL_LA = $(top_builddir)/isl/libisl.la
|
||||
LOCAL_ISL_LA = isl/libisl.la
|
||||
endif
|
||||
if BUNDLED_PET
|
||||
MAYBE_PET = pet
|
||||
PET_LA = $(top_builddir)/pet/libpet.la
|
||||
endif
|
||||
|
||||
SUBDIRS = $(MAYBE_ISL) $(MAYBE_PET) .
|
||||
|
||||
FORCE:
|
||||
isl/libisl.la: FORCE
|
||||
cd isl; $(MAKE) $(AM_MAKEFLAGS) libisl.la
|
||||
pet/libpet.la: FORCE
|
||||
cd pet; $(MAKE) $(AM_MAKEFLAGS) libpet.la
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
LIB_ISL = $(ISL_LA) @ISL_LIBS@
|
||||
LIB_PET = $(PET_LA) @PET_LIBS@
|
||||
|
||||
AM_CPPFLAGS = @ISL_CFLAGS@ @PET_CFLAGS@
|
||||
LDADD = $(LIB_PET) $(LIB_ISL)
|
||||
|
||||
bin_PROGRAMS = ppcg
|
||||
ppcg_SOURCES = \
|
||||
cpu.c \
|
||||
cpu.h \
|
||||
cuda.c \
|
||||
cuda.h \
|
||||
opencl.c \
|
||||
opencl.h \
|
||||
cuda_common.h \
|
||||
cuda_common.c \
|
||||
gpu.c \
|
||||
gpu.h \
|
||||
gpu_array_tile.c \
|
||||
gpu_array_tile.h \
|
||||
gpu_group.c \
|
||||
gpu_group.h \
|
||||
gpu_print.c \
|
||||
gpu_print.h \
|
||||
gpu_tree.c \
|
||||
gpu_tree.h \
|
||||
schedule.c \
|
||||
schedule.h \
|
||||
ppcg_options.c \
|
||||
ppcg_options.h \
|
||||
ppcg.c \
|
||||
ppcg.h \
|
||||
print.c \
|
||||
print.h \
|
||||
util.h \
|
||||
version.c
|
||||
|
||||
TESTS = @extra_tests@
|
||||
EXTRA_TESTS = opencl_test.sh polybench_test.sh
|
||||
TEST_EXTENSIONS = .sh
|
||||
|
||||
EXTRA_DIST = \
|
||||
ocl_utilities.c \
|
||||
ocl_utilities.h \
|
||||
tests
|
||||
|
||||
dist-hook:
|
||||
echo @GIT_HEAD_VERSION@ > $(distdir)/GIT_HEAD_ID
|
||||
|
||||
gitversion.h: @GIT_HEAD@
|
||||
$(AM_V_GEN)echo '#define GIT_HEAD_ID "'@GIT_HEAD_VERSION@'"' > $@
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,226 @@
|
|||
Requirements:
|
||||
|
||||
- automake, autoconf, libtool
|
||||
(not needed when compiling a release)
|
||||
- pkg-config (http://www.freedesktop.org/wiki/Software/pkg-config)
|
||||
(not needed when compiling a release using the included isl and pet)
|
||||
- gmp (http://gmplib.org/)
|
||||
- libyaml (http://pyyaml.org/wiki/LibYAML)
|
||||
(only needed if you want to compile the pet executable)
|
||||
- LLVM/clang libraries, 2.9 or higher (http://clang.llvm.org/get_started.html)
|
||||
Unless you have some other reasons for wanting to use the svn version,
|
||||
it is best to install the latest release (3.6).
|
||||
For more details, see pet/README.
|
||||
|
||||
If you are installing on Ubuntu, then you can install the following packages:
|
||||
|
||||
automake autoconf libtool pkg-config libgmp3-dev libyaml-dev libclang-dev llvm
|
||||
|
||||
Note that you need at least version 3.2 of libclang-dev (ubuntu raring).
|
||||
Older versions of this package did not include the required libraries.
|
||||
If you are using an older version of ubuntu, then you need to compile and
|
||||
install LLVM/clang from source.
|
||||
|
||||
|
||||
Preparing:
|
||||
|
||||
Grab the latest release and extract it or get the source from
|
||||
the git repository as follows. This process requires autoconf,
|
||||
automake, libtool and pkg-config.
|
||||
|
||||
git clone git://repo.or.cz/ppcg.git
|
||||
cd ppcg
|
||||
git submodule init
|
||||
git submodule update
|
||||
./autogen.sh
|
||||
|
||||
|
||||
Compilation:
|
||||
|
||||
./configure
|
||||
make
|
||||
make check
|
||||
|
||||
If you have installed any of the required libraries in a non-standard
|
||||
location, then you may need to use the --with-gmp-prefix,
|
||||
--with-libyaml-prefix and/or --with-clang-prefix options
|
||||
when calling "./configure".
|
||||
|
||||
|
||||
Using PPCG to generate CUDA or OpenCL code
|
||||
|
||||
To convert a fragment of a C program to CUDA, insert a line containing
|
||||
|
||||
#pragma scop
|
||||
|
||||
before the fragment and add a line containing
|
||||
|
||||
#pragma endscop
|
||||
|
||||
after the fragment. To generate CUDA code run
|
||||
|
||||
ppcg --target=cuda file.c
|
||||
|
||||
where file.c is the file containing the fragment. The generated
|
||||
code is stored in file_host.cu and file_kernel.cu.
|
||||
|
||||
To generate OpenCL code run
|
||||
|
||||
ppcg --target=opencl file.c
|
||||
|
||||
where file.c is the file containing the fragment. The generated code
|
||||
is stored in file_host.c and file_kernel.cl.
|
||||
|
||||
|
||||
Specifying tile, grid and block sizes
|
||||
|
||||
The iterations space tile size, grid size and block size can
|
||||
be specified using the --sizes option. The argument is a union map
|
||||
in isl notation mapping kernels identified by their sequence number
|
||||
in a "kernel" space to singleton sets in the "tile", "grid" and "block"
|
||||
spaces. The sizes are specified outermost to innermost.
|
||||
|
||||
The dimension of the "tile" space indicates the (maximal) number of loop
|
||||
dimensions to tile. The elements of the single integer tuple
|
||||
specify the tile sizes in each dimension.
|
||||
|
||||
The dimension of the "grid" space indicates the (maximal) number of block
|
||||
dimensions in the grid. The elements of the single integer tuple
|
||||
specify the number of blocks in each dimension.
|
||||
|
||||
The dimension of the "block" space indicates the (maximal) number of thread
|
||||
dimensions in the grid. The elements of the single integer tuple
|
||||
specify the number of threads in each dimension.
|
||||
|
||||
For example,
|
||||
|
||||
{ kernel[0] -> tile[64,64]; kernel[i] -> block[16] : i != 4 }
|
||||
|
||||
specifies that in kernel 0, two loops should be tiled with a tile
|
||||
size of 64 in both dimensions and that all kernels except kernel 4
|
||||
should be run using a block of 16 threads.
|
||||
|
||||
Since PPCG performs some scheduling, it can be difficult to predict
|
||||
what exactly will end up in a kernel. If you want to specify
|
||||
tile, grid or block sizes, you may want to run PPCG first with the defaults,
|
||||
examine the kernels and then run PPCG again with the desired sizes.
|
||||
Instead of examining the kernels, you can also specify the option
|
||||
--dump-sizes on the first run to obtain the effectively used default sizes.
|
||||
|
||||
|
||||
Compiling the generated CUDA code with nvcc
|
||||
|
||||
To get optimal performance from nvcc, it is important to choose --arch
|
||||
according to your target GPU. Specifically, use the flag "--arch sm_20"
|
||||
for fermi, "--arch sm_30" for GK10x Kepler and "--arch sm_35" for
|
||||
GK110 Kepler. We discourage the use of older cards as we have seen
|
||||
correctness issues with compilation for older architectures.
|
||||
Note that in the absence of any --arch flag, nvcc defaults to
|
||||
"--arch sm_13". This will not only be slower, but can also cause
|
||||
correctness issues.
|
||||
If you want to obtain results that are identical to those obtained
|
||||
by the original code, then you may need to disable some optimizations
|
||||
by passing the "--fmad=false" option.
|
||||
|
||||
|
||||
Compiling the generated OpenCL code with gcc
|
||||
|
||||
To compile the host code you need to link against the file
|
||||
ocl_utilities.c which contains utility functions used by the generated
|
||||
OpenCL host code. To compile the host code with gcc, run
|
||||
|
||||
gcc -std=c99 file_host.c ocl_utilities.c -lOpenCL
|
||||
|
||||
Note that we have experienced the generated OpenCL code freezing
|
||||
on some inputs (e.g., the PolyBench symm benchmark) when using
|
||||
at least some version of the Nvidia OpenCL library, while the
|
||||
corresponding CUDA code runs fine.
|
||||
We have experienced no such freezes when using AMD, ARM or Intel
|
||||
OpenCL libraries.
|
||||
|
||||
By default, the compiled executable will need the _kernel.cl file at
|
||||
run time. Alternatively, the option --opencl-embed-kernel-code may be
|
||||
given to place the kernel code in a string literal. The kernel code is
|
||||
then compiled into the host binary, such that the _kernel.cl file is no
|
||||
longer needed at run time. Any kernel include files, in particular
|
||||
those supplied using --opencl-include-file, will still be required at
|
||||
run time.
|
||||
|
||||
|
||||
Function calls
|
||||
|
||||
Function calls inside the analyzed fragment are reproduced
|
||||
in the CUDA or OpenCL code, but for now it is left to the user
|
||||
to make sure that the functions that are being called are
|
||||
available from the generated kernels.
|
||||
|
||||
In the case of OpenCL code, the --opencl-include-file option
|
||||
may be used to specify one or more files to be #include'd
|
||||
from the generated code. These files may then contain
|
||||
the definitions of the functions being called from the
|
||||
program fragment. If the pathnames of the included files
|
||||
are relative to the current directory, then you may need
|
||||
to additionally specify the --opencl-compiler-options=-I.
|
||||
to make sure that the files can be found by the OpenCL compiler.
|
||||
The included files may contain definitions of types used by the
|
||||
generated kernels. By default, PPCG generates definitions for
|
||||
types as needed, but these definitions may collide with those in
|
||||
the included files, as PPCG does not consider the contents of the
|
||||
included files. The --no-opencl-print-kernel-types will prevent
|
||||
PPCG from generating type definitions.
|
||||
|
||||
|
||||
Processing PolyBench
|
||||
|
||||
When processing a PolyBench/C 3.2 benchmark, you should always specify
|
||||
-DPOLYBENCH_USE_C99_PROTO on the ppcg command line. Otherwise, the source
|
||||
files are inconsistent, having fixed size arrays but parametrically
|
||||
bounded loops iterating over them.
|
||||
However, you should not specify this define when compiling
|
||||
the PPCG generated code using nvcc since CUDA does not support VLAs.
|
||||
|
||||
|
||||
CUDA and function overloading
|
||||
|
||||
While CUDA supports function overloading based on the arguments types,
|
||||
no such function overloading exists in the input language C. Since PPCG
|
||||
simply prints out the same function name as in the original code, this
|
||||
may result in a different function being called based on the types
|
||||
of the arguments. For example, if the original code contains a call
|
||||
to the function sqrt() with a float argument, then the argument will
|
||||
be promoted to a double and the sqrt() function will be called.
|
||||
In the transformed (CUDA) code, however, overloading will cause the
|
||||
function sqrtf() to be called. Until this issue has been resolved in PPCG,
|
||||
we recommend that users either explicitly call the function sqrtf() or
|
||||
explicitly cast the argument to double in the input code.
|
||||
|
||||
|
||||
Contact
|
||||
|
||||
For bug reports, feature requests and questions,
|
||||
contact http://groups.google.com/group/isl-development
|
||||
|
||||
|
||||
Citing PPCG
|
||||
|
||||
If you use PPCG for your research, you are invited to cite
|
||||
the following paper.
|
||||
|
||||
@article{Verdoolaege2013PPCG,
|
||||
author = {Verdoolaege, Sven and Juega, Juan Carlos and Cohen, Albert and
|
||||
G\'{o}mez, Jos{\'e} Ignacio and Tenllado, Christian and
|
||||
Catthoor, Francky},
|
||||
title = {Polyhedral parallel code generation for CUDA},
|
||||
journal = {ACM Trans. Archit. Code Optim.},
|
||||
issue_date = {January 2013},
|
||||
volume = {9},
|
||||
number = {4},
|
||||
month = jan,
|
||||
year = {2013},
|
||||
issn = {1544-3566},
|
||||
pages = {54:1--54:23},
|
||||
doi = {10.1145/2400682.2400713},
|
||||
acmid = {2400713},
|
||||
publisher = {ACM},
|
||||
address = {New York, NY, USA},
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,347 @@
|
|||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand '-c -o'.
|
||||
|
||||
scriptversion=2012-10-14.11; # UTC
|
||||
|
||||
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
nl='
|
||||
'
|
||||
|
||||
# We need space, tab and new line, in precisely that order. Quoting is
|
||||
# there to prevent tools from complaining about whitespace usage.
|
||||
IFS=" "" $nl"
|
||||
|
||||
file_conv=
|
||||
|
||||
# func_file_conv build_file lazy
|
||||
# Convert a $build file to $host form and store it in $file
|
||||
# Currently only supports Windows hosts. If the determined conversion
|
||||
# type is listed in (the comma separated) LAZY, no conversion will
|
||||
# take place.
|
||||
func_file_conv ()
|
||||
{
|
||||
file=$1
|
||||
case $file in
|
||||
/ | /[!/]*) # absolute file, and not a UNC file
|
||||
if test -z "$file_conv"; then
|
||||
# lazily determine how to convert abs files
|
||||
case `uname -s` in
|
||||
MINGW*)
|
||||
file_conv=mingw
|
||||
;;
|
||||
CYGWIN*)
|
||||
file_conv=cygwin
|
||||
;;
|
||||
*)
|
||||
file_conv=wine
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
case $file_conv/,$2, in
|
||||
*,$file_conv,*)
|
||||
;;
|
||||
mingw/*)
|
||||
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
|
||||
;;
|
||||
cygwin/*)
|
||||
file=`cygpath -m "$file" || echo "$file"`
|
||||
;;
|
||||
wine/*)
|
||||
file=`winepath -w "$file" || echo "$file"`
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# func_cl_dashL linkdir
|
||||
# Make cl look for libraries in LINKDIR
|
||||
func_cl_dashL ()
|
||||
{
|
||||
func_file_conv "$1"
|
||||
if test -z "$lib_path"; then
|
||||
lib_path=$file
|
||||
else
|
||||
lib_path="$lib_path;$file"
|
||||
fi
|
||||
linker_opts="$linker_opts -LIBPATH:$file"
|
||||
}
|
||||
|
||||
# func_cl_dashl library
|
||||
# Do a library search-path lookup for cl
|
||||
func_cl_dashl ()
|
||||
{
|
||||
lib=$1
|
||||
found=no
|
||||
save_IFS=$IFS
|
||||
IFS=';'
|
||||
for dir in $lib_path $LIB
|
||||
do
|
||||
IFS=$save_IFS
|
||||
if $shared && test -f "$dir/$lib.dll.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.dll.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/$lib.lib"; then
|
||||
found=yes
|
||||
lib=$dir/$lib.lib
|
||||
break
|
||||
fi
|
||||
if test -f "$dir/lib$lib.a"; then
|
||||
found=yes
|
||||
lib=$dir/lib$lib.a
|
||||
break
|
||||
fi
|
||||
done
|
||||
IFS=$save_IFS
|
||||
|
||||
if test "$found" != yes; then
|
||||
lib=$lib.lib
|
||||
fi
|
||||
}
|
||||
|
||||
# func_cl_wrapper cl arg...
|
||||
# Adjust compile command to suit cl
|
||||
func_cl_wrapper ()
|
||||
{
|
||||
# Assume a capable shell
|
||||
lib_path=
|
||||
shared=:
|
||||
linker_opts=
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.[oO][bB][jJ])
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fo"$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
func_file_conv "$2"
|
||||
set x "$@" -Fe"$file"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
-I)
|
||||
eat=1
|
||||
func_file_conv "$2" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-I*)
|
||||
func_file_conv "${1#-I}" mingw
|
||||
set x "$@" -I"$file"
|
||||
shift
|
||||
;;
|
||||
-l)
|
||||
eat=1
|
||||
func_cl_dashl "$2"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-l*)
|
||||
func_cl_dashl "${1#-l}"
|
||||
set x "$@" "$lib"
|
||||
shift
|
||||
;;
|
||||
-L)
|
||||
eat=1
|
||||
func_cl_dashL "$2"
|
||||
;;
|
||||
-L*)
|
||||
func_cl_dashL "${1#-L}"
|
||||
;;
|
||||
-static)
|
||||
shared=false
|
||||
;;
|
||||
-Wl,*)
|
||||
arg=${1#-Wl,}
|
||||
save_ifs="$IFS"; IFS=','
|
||||
for flag in $arg; do
|
||||
IFS="$save_ifs"
|
||||
linker_opts="$linker_opts $flag"
|
||||
done
|
||||
IFS="$save_ifs"
|
||||
;;
|
||||
-Xlinker)
|
||||
eat=1
|
||||
linker_opts="$linker_opts $2"
|
||||
;;
|
||||
-*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
|
||||
func_file_conv "$1"
|
||||
set x "$@" -Tp"$file"
|
||||
shift
|
||||
;;
|
||||
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
|
||||
func_file_conv "$1" mingw
|
||||
set x "$@" "$file"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
if test -n "$linker_opts"; then
|
||||
linker_opts="-link$linker_opts"
|
||||
fi
|
||||
exec "$@" $linker_opts
|
||||
exit 1
|
||||
}
|
||||
|
||||
eat=
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand '-c -o'.
|
||||
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file 'INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
|
||||
func_cl_wrapper "$@" # Doesn't return...
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as 'compile cc -o foo foo.c'.
|
||||
# So we strip '-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no '-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# '.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,82 @@
|
|||
AC_INIT([ppcg], [0.04], [isl-development@googlegroups.com])
|
||||
AC_CONFIG_AUX_DIR([.])
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
AM_INIT_AUTOMAKE([foreign])
|
||||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
|
||||
|
||||
AC_PROG_CC
|
||||
AC_PROG_LIBTOOL
|
||||
PKG_PROG_PKG_CONFIG
|
||||
|
||||
AX_CHECK_OPENMP
|
||||
AX_CHECK_OPENCL
|
||||
if test $HAVE_OPENCL = yes; then
|
||||
extra_tests="$extra_tests opencl_test.sh"
|
||||
fi
|
||||
|
||||
AX_SUBMODULE(isl,build|bundled|system,bundled)
|
||||
AM_CONDITIONAL(BUNDLED_ISL, test $with_isl = bundled)
|
||||
|
||||
AC_SUBST(ISL_CFLAGS)
|
||||
AC_SUBST(ISL_LIBS)
|
||||
case "$with_isl" in
|
||||
bundled)
|
||||
ISL_CFLAGS="-I\$(top_srcdir)/isl/include -I\$(top_builddir)/isl/include"
|
||||
ISL_CFLAGS="$ISL_CFLAGS"
|
||||
ppcg_configure_args="$ppcg_configure_args --with-isl-builddir=../isl"
|
||||
ppcg_configure_args="$ppcg_configure_args --with-isl=build"
|
||||
;;
|
||||
build)
|
||||
ISL_BUILDDIR=`echo @abs_builddir@ | $with_isl_builddir/config.status --file=-`
|
||||
ppcg_configure_args="$ppcg_configure_args --with-isl-builddir=$ISL_BUILDDIR"
|
||||
ISL_CFLAGS="-I$isl_srcdir/include -I$ISL_BUILDDIR/include"
|
||||
ISL_CFLAGS="$ISL_CFLAGS"
|
||||
ISL_LIBS="$with_isl_builddir/libisl.la"
|
||||
;;
|
||||
system)
|
||||
PKG_CHECK_MODULES([ISL], [isl])
|
||||
esac
|
||||
|
||||
AX_SUBMODULE(pet,bundled|system,bundled)
|
||||
AM_CONDITIONAL(BUNDLED_PET, test $with_pet = bundled)
|
||||
|
||||
AC_SUBST(PET_CFLAGS)
|
||||
AC_SUBST(PET_LIBS)
|
||||
case "$with_pet" in
|
||||
bundled)
|
||||
PET_CFLAGS="$PET_CFLAGS -I\$(top_srcdir)/pet/include"
|
||||
;;
|
||||
system)
|
||||
PKG_CHECK_MODULES([PET], [pet])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_SUBST(POLYBENCH_DIR)
|
||||
AC_SUBST(extra_tests)
|
||||
AC_ARG_WITH([polybench],
|
||||
[AS_HELP_STRING([--with-polybench=DIR], [PolyBench location])],
|
||||
[
|
||||
if test -f "$with_polybench/utilities/benchmark_list"; then
|
||||
POLYBENCH_DIR=$with_polybench
|
||||
extra_tests="$extra_tests polybench_test.sh"
|
||||
fi
|
||||
])
|
||||
|
||||
AX_DETECT_GIT_HEAD
|
||||
echo '#define GIT_HEAD_ID "'$GIT_HEAD_ID'"' > gitversion.h
|
||||
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_CONFIG_FILES([polybench_test.sh], [chmod +x polybench_test.sh])
|
||||
AC_CONFIG_FILES([opencl_test.sh], [chmod +x opencl_test.sh])
|
||||
if test $with_isl = bundled; then
|
||||
AC_CONFIG_SUBDIRS(isl)
|
||||
fi
|
||||
if test $with_pet = bundled; then
|
||||
AC_CONFIG_SUBDIRS(pet)
|
||||
fi
|
||||
AC_CONFIG_COMMANDS_POST([
|
||||
dnl pass on arguments to subdir configures, but don't
|
||||
dnl add them to config.status
|
||||
ac_configure_args="$ac_configure_args $ppcg_configure_args"
|
||||
])
|
||||
AC_OUTPUT
|
||||
|
|
@ -0,0 +1,552 @@
|
|||
/*
|
||||
* Copyright 2012 INRIA Paris-Rocquencourt
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Tobias Grosser, INRIA Paris-Rocquencourt,
|
||||
* Domaine de Voluceau, Rocquenqourt, B.P. 105,
|
||||
* 78153 Le Chesnay Cedex France
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <isl/aff.h>
|
||||
#include <isl/ctx.h>
|
||||
#include <isl/map.h>
|
||||
#include <isl/ast_build.h>
|
||||
#include <pet.h>
|
||||
|
||||
#include "ppcg.h"
|
||||
#include "ppcg_options.h"
|
||||
#include "cpu.h"
|
||||
#include "print.h"
|
||||
|
||||
/* Representation of a statement inside a generated AST.
|
||||
*
|
||||
* "stmt" refers to the original statement.
|
||||
* "ref2expr" maps the reference identifier of each access in
|
||||
* the statement to an AST expression that should be printed
|
||||
* at the place of the access.
|
||||
*/
|
||||
struct ppcg_stmt {
|
||||
struct pet_stmt *stmt;
|
||||
|
||||
isl_id_to_ast_expr *ref2expr;
|
||||
};
|
||||
|
||||
static void ppcg_stmt_free(void *user)
|
||||
{
|
||||
struct ppcg_stmt *stmt = user;
|
||||
int i;
|
||||
|
||||
if (!stmt)
|
||||
return;
|
||||
|
||||
isl_id_to_ast_expr_free(stmt->ref2expr);
|
||||
|
||||
free(stmt);
|
||||
}
|
||||
|
||||
/* Derive the output file name from the input file name.
|
||||
* 'input' is the entire path of the input file. The output
|
||||
* is the file name plus the additional extension.
|
||||
*
|
||||
* We will basically replace everything after the last point
|
||||
* with '.ppcg.c'. This means file.c becomes file.ppcg.c
|
||||
*/
|
||||
static FILE *get_output_file(const char *input, const char *output)
|
||||
{
|
||||
char name[PATH_MAX];
|
||||
const char *ext;
|
||||
const char ppcg_marker[] = ".ppcg";
|
||||
int len;
|
||||
FILE *file;
|
||||
|
||||
len = ppcg_extract_base_name(name, input);
|
||||
|
||||
strcpy(name + len, ppcg_marker);
|
||||
ext = strrchr(input, '.');
|
||||
strcpy(name + len + sizeof(ppcg_marker) - 1, ext ? ext : ".c");
|
||||
|
||||
if (!output)
|
||||
output = name;
|
||||
|
||||
file = fopen(output, "w");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Unable to open '%s' for writing\n", output);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
/* Data used to annotate for nodes in the ast.
|
||||
*/
|
||||
struct ast_node_userinfo {
|
||||
/* The for node is an openmp parallel for node. */
|
||||
int is_openmp;
|
||||
};
|
||||
|
||||
/* Information used while building the ast.
|
||||
*/
|
||||
struct ast_build_userinfo {
|
||||
/* The current ppcg scop. */
|
||||
struct ppcg_scop *scop;
|
||||
|
||||
/* Are we currently in a parallel for loop? */
|
||||
int in_parallel_for;
|
||||
};
|
||||
|
||||
/* Check if the current scheduling dimension is parallel.
|
||||
*
|
||||
* We check for parallelism by verifying that the loop does not carry any
|
||||
* dependences.
|
||||
* If the live_range_reordering option is set, then this currently
|
||||
* includes the order dependences. In principle, non-zero order dependences
|
||||
* could be allowed, but this would require privatization and/or expansion.
|
||||
*
|
||||
* Parallelism test: if the distance is zero in all outer dimensions, then it
|
||||
* has to be zero in the current dimension as well.
|
||||
* Implementation: first, translate dependences into time space, then force
|
||||
* outer dimensions to be equal. If the distance is zero in the current
|
||||
* dimension, then the loop is parallel.
|
||||
* The distance is zero in the current dimension if it is a subset of a map
|
||||
* with equal values for the current dimension.
|
||||
*/
|
||||
static int ast_schedule_dim_is_parallel(__isl_keep isl_ast_build *build,
|
||||
struct ppcg_scop *scop)
|
||||
{
|
||||
isl_union_map *schedule_node, *schedule, *deps;
|
||||
isl_map *schedule_deps, *test;
|
||||
isl_space *schedule_space;
|
||||
unsigned i, dimension, is_parallel;
|
||||
|
||||
schedule = isl_ast_build_get_schedule(build);
|
||||
schedule_space = isl_ast_build_get_schedule_space(build);
|
||||
|
||||
dimension = isl_space_dim(schedule_space, isl_dim_out) - 1;
|
||||
|
||||
deps = isl_union_map_copy(scop->dep_flow);
|
||||
deps = isl_union_map_union(deps, isl_union_map_copy(scop->dep_false));
|
||||
if (scop->options->live_range_reordering) {
|
||||
isl_union_map *order = isl_union_map_copy(scop->dep_order);
|
||||
deps = isl_union_map_union(deps, order);
|
||||
}
|
||||
deps = isl_union_map_apply_range(deps, isl_union_map_copy(schedule));
|
||||
deps = isl_union_map_apply_domain(deps, schedule);
|
||||
|
||||
if (isl_union_map_is_empty(deps)) {
|
||||
isl_union_map_free(deps);
|
||||
isl_space_free(schedule_space);
|
||||
return 1;
|
||||
}
|
||||
|
||||
schedule_deps = isl_map_from_union_map(deps);
|
||||
|
||||
for (i = 0; i < dimension; i++)
|
||||
schedule_deps = isl_map_equate(schedule_deps, isl_dim_out, i,
|
||||
isl_dim_in, i);
|
||||
|
||||
test = isl_map_universe(isl_map_get_space(schedule_deps));
|
||||
test = isl_map_equate(test, isl_dim_out, dimension, isl_dim_in,
|
||||
dimension);
|
||||
is_parallel = isl_map_is_subset(schedule_deps, test);
|
||||
|
||||
isl_space_free(schedule_space);
|
||||
isl_map_free(test);
|
||||
isl_map_free(schedule_deps);
|
||||
|
||||
return is_parallel;
|
||||
}
|
||||
|
||||
/* Mark a for node openmp parallel, if it is the outermost parallel for node.
|
||||
*/
|
||||
static void mark_openmp_parallel(__isl_keep isl_ast_build *build,
|
||||
struct ast_build_userinfo *build_info,
|
||||
struct ast_node_userinfo *node_info)
|
||||
{
|
||||
if (build_info->in_parallel_for)
|
||||
return;
|
||||
|
||||
if (ast_schedule_dim_is_parallel(build, build_info->scop)) {
|
||||
build_info->in_parallel_for = 1;
|
||||
node_info->is_openmp = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate an ast_node_info structure and initialize it with default values.
|
||||
*/
|
||||
static struct ast_node_userinfo *allocate_ast_node_userinfo()
|
||||
{
|
||||
struct ast_node_userinfo *node_info;
|
||||
node_info = (struct ast_node_userinfo *)
|
||||
malloc(sizeof(struct ast_node_userinfo));
|
||||
node_info->is_openmp = 0;
|
||||
return node_info;
|
||||
}
|
||||
|
||||
/* Free an ast_node_info structure.
|
||||
*/
|
||||
static void free_ast_node_userinfo(void *ptr)
|
||||
{
|
||||
struct ast_node_userinfo *info;
|
||||
info = (struct ast_node_userinfo *) ptr;
|
||||
free(info);
|
||||
}
|
||||
|
||||
/* This method is executed before the construction of a for node. It creates
|
||||
* an isl_id that is used to annotate the subsequently generated ast for nodes.
|
||||
*
|
||||
* In this function we also run the following analyses:
|
||||
*
|
||||
* - Detection of openmp parallel loops
|
||||
*/
|
||||
static __isl_give isl_id *ast_build_before_for(
|
||||
__isl_keep isl_ast_build *build, void *user)
|
||||
{
|
||||
isl_id *id;
|
||||
struct ast_build_userinfo *build_info;
|
||||
struct ast_node_userinfo *node_info;
|
||||
|
||||
build_info = (struct ast_build_userinfo *) user;
|
||||
node_info = allocate_ast_node_userinfo();
|
||||
id = isl_id_alloc(isl_ast_build_get_ctx(build), "", node_info);
|
||||
id = isl_id_set_free_user(id, free_ast_node_userinfo);
|
||||
|
||||
mark_openmp_parallel(build, build_info, node_info);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
/* This method is executed after the construction of a for node.
|
||||
*
|
||||
* It performs the following actions:
|
||||
*
|
||||
* - Reset the 'in_parallel_for' flag, as soon as we leave a for node,
|
||||
* that is marked as openmp parallel.
|
||||
*
|
||||
*/
|
||||
static __isl_give isl_ast_node *ast_build_after_for(__isl_take isl_ast_node *node,
|
||||
__isl_keep isl_ast_build *build, void *user) {
|
||||
isl_id *id;
|
||||
struct ast_build_userinfo *build_info;
|
||||
struct ast_node_userinfo *info;
|
||||
|
||||
id = isl_ast_node_get_annotation(node);
|
||||
info = isl_id_get_user(id);
|
||||
|
||||
if (info && info->is_openmp) {
|
||||
build_info = (struct ast_build_userinfo *) user;
|
||||
build_info->in_parallel_for = 0;
|
||||
}
|
||||
|
||||
isl_id_free(id);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Find the element in scop->stmts that has the given "id".
|
||||
*/
|
||||
static struct pet_stmt *find_stmt(struct ppcg_scop *scop, __isl_keep isl_id *id)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < scop->pet->n_stmt; ++i) {
|
||||
struct pet_stmt *stmt = scop->pet->stmts[i];
|
||||
isl_id *id_i;
|
||||
|
||||
id_i = isl_set_get_tuple_id(stmt->domain);
|
||||
isl_id_free(id_i);
|
||||
|
||||
if (id_i == id)
|
||||
return stmt;
|
||||
}
|
||||
|
||||
isl_die(isl_id_get_ctx(id), isl_error_internal,
|
||||
"statement not found", return NULL);
|
||||
}
|
||||
|
||||
/* Print a user statement in the generated AST.
|
||||
* The ppcg_stmt has been attached to the node in at_each_domain.
|
||||
*/
|
||||
static __isl_give isl_printer *print_user(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
struct ppcg_stmt *stmt;
|
||||
isl_id *id;
|
||||
|
||||
id = isl_ast_node_get_annotation(node);
|
||||
stmt = isl_id_get_user(id);
|
||||
isl_id_free(id);
|
||||
|
||||
p = pet_stmt_print_body(stmt->stmt, p, stmt->ref2expr);
|
||||
|
||||
isl_ast_print_options_free(print_options);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/* Print a for loop node as an openmp parallel loop.
|
||||
*
|
||||
* To print an openmp parallel loop we print a normal for loop, but add
|
||||
* "#pragma openmp parallel for" in front.
|
||||
*
|
||||
* Variables that are declared within the body of this for loop are
|
||||
* automatically openmp 'private'. Iterators declared outside of the
|
||||
* for loop are automatically openmp 'shared'. As ppcg declares all iterators
|
||||
* at the position where they are assigned, there is no need to explicitly mark
|
||||
* variables. Their automatically assigned type is already correct.
|
||||
*
|
||||
* This function only generates valid OpenMP code, if the ast was generated
|
||||
* with the 'atomic-bounds' option enabled.
|
||||
*
|
||||
*/
|
||||
static __isl_give isl_printer *print_for_with_openmp(
|
||||
__isl_keep isl_ast_node *node, __isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "#pragma omp parallel for");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_ast_node_for_print(node, p, print_options);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a for node.
|
||||
*
|
||||
* Depending on how the node is annotated, we either print a normal
|
||||
* for node or an openmp parallel for node.
|
||||
*/
|
||||
static __isl_give isl_printer *print_for(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
struct ppcg_print_info *print_info;
|
||||
isl_id *id;
|
||||
int openmp;
|
||||
|
||||
openmp = 0;
|
||||
id = isl_ast_node_get_annotation(node);
|
||||
|
||||
if (id) {
|
||||
struct ast_node_userinfo *info;
|
||||
|
||||
info = (struct ast_node_userinfo *) isl_id_get_user(id);
|
||||
if (info && info->is_openmp)
|
||||
openmp = 1;
|
||||
}
|
||||
|
||||
if (openmp)
|
||||
p = print_for_with_openmp(node, p, print_options);
|
||||
else
|
||||
p = isl_ast_node_for_print(node, p, print_options);
|
||||
|
||||
isl_id_free(id);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Index transformation callback for pet_stmt_build_ast_exprs.
|
||||
*
|
||||
* "index" expresses the array indices in terms of statement iterators
|
||||
* "iterator_map" expresses the statement iterators in terms of
|
||||
* AST loop iterators.
|
||||
*
|
||||
* The result expresses the array indices in terms of
|
||||
* AST loop iterators.
|
||||
*/
|
||||
static __isl_give isl_multi_pw_aff *pullback_index(
|
||||
__isl_take isl_multi_pw_aff *index, __isl_keep isl_id *id, void *user)
|
||||
{
|
||||
isl_pw_multi_aff *iterator_map = user;
|
||||
|
||||
iterator_map = isl_pw_multi_aff_copy(iterator_map);
|
||||
return isl_multi_pw_aff_pullback_pw_multi_aff(index, iterator_map);
|
||||
}
|
||||
|
||||
/* Transform the accesses in the statement associated to the domain
|
||||
* called by "node" to refer to the AST loop iterators, construct
|
||||
* corresponding AST expressions using "build",
|
||||
* collect them in a ppcg_stmt and annotate the node with the ppcg_stmt.
|
||||
*/
|
||||
static __isl_give isl_ast_node *at_each_domain(__isl_take isl_ast_node *node,
|
||||
__isl_keep isl_ast_build *build, void *user)
|
||||
{
|
||||
struct ppcg_scop *scop = user;
|
||||
isl_ast_expr *expr, *arg;
|
||||
isl_ctx *ctx;
|
||||
isl_id *id;
|
||||
isl_map *map;
|
||||
isl_pw_multi_aff *iterator_map;
|
||||
struct ppcg_stmt *stmt;
|
||||
|
||||
ctx = isl_ast_node_get_ctx(node);
|
||||
stmt = isl_calloc_type(ctx, struct ppcg_stmt);
|
||||
if (!stmt)
|
||||
goto error;
|
||||
|
||||
expr = isl_ast_node_user_get_expr(node);
|
||||
arg = isl_ast_expr_get_op_arg(expr, 0);
|
||||
isl_ast_expr_free(expr);
|
||||
id = isl_ast_expr_get_id(arg);
|
||||
isl_ast_expr_free(arg);
|
||||
stmt->stmt = find_stmt(scop, id);
|
||||
isl_id_free(id);
|
||||
if (!stmt->stmt)
|
||||
goto error;
|
||||
|
||||
map = isl_map_from_union_map(isl_ast_build_get_schedule(build));
|
||||
map = isl_map_reverse(map);
|
||||
iterator_map = isl_pw_multi_aff_from_map(map);
|
||||
stmt->ref2expr = pet_stmt_build_ast_exprs(stmt->stmt, build,
|
||||
&pullback_index, iterator_map, NULL, NULL);
|
||||
isl_pw_multi_aff_free(iterator_map);
|
||||
|
||||
id = isl_id_alloc(isl_ast_node_get_ctx(node), NULL, stmt);
|
||||
id = isl_id_set_free_user(id, &ppcg_stmt_free);
|
||||
return isl_ast_node_set_annotation(node, id);
|
||||
error:
|
||||
ppcg_stmt_free(stmt);
|
||||
return isl_ast_node_free(node);
|
||||
}
|
||||
|
||||
/* Set *depth to the number of scheduling dimensions
|
||||
* for the schedule of the first domain.
|
||||
* We assume here that this number is the same for all domains.
|
||||
*/
|
||||
static isl_stat set_depth(__isl_take isl_map *map, void *user)
|
||||
{
|
||||
unsigned *depth = user;
|
||||
|
||||
*depth = isl_map_dim(map, isl_dim_out);
|
||||
|
||||
isl_map_free(map);
|
||||
return isl_stat_error;
|
||||
}
|
||||
|
||||
/* Code generate the scop 'scop' and print the corresponding C code to 'p'.
|
||||
*/
|
||||
static __isl_give isl_printer *print_scop(struct ppcg_scop *scop,
|
||||
__isl_take isl_printer *p, struct ppcg_options *options)
|
||||
{
|
||||
isl_ctx *ctx = isl_printer_get_ctx(p);
|
||||
isl_set *context;
|
||||
isl_union_set *domain_set;
|
||||
isl_union_map *schedule_map;
|
||||
isl_ast_build *build;
|
||||
isl_ast_print_options *print_options;
|
||||
isl_ast_node *tree;
|
||||
isl_id_list *iterators;
|
||||
struct ast_build_userinfo build_info;
|
||||
int depth;
|
||||
|
||||
context = isl_set_copy(scop->context);
|
||||
domain_set = isl_union_set_copy(scop->domain);
|
||||
schedule_map = isl_schedule_get_map(scop->schedule);
|
||||
schedule_map = isl_union_map_intersect_domain(schedule_map, domain_set);
|
||||
|
||||
isl_union_map_foreach_map(schedule_map, &set_depth, &depth);
|
||||
|
||||
build = isl_ast_build_from_context(context);
|
||||
iterators = ppcg_scop_generate_names(scop, depth, "c");
|
||||
build = isl_ast_build_set_iterators(build, iterators);
|
||||
build = isl_ast_build_set_at_each_domain(build, &at_each_domain, scop);
|
||||
|
||||
if (options->openmp) {
|
||||
build_info.scop = scop;
|
||||
build_info.in_parallel_for = 0;
|
||||
|
||||
build = isl_ast_build_set_before_each_for(build,
|
||||
&ast_build_before_for,
|
||||
&build_info);
|
||||
build = isl_ast_build_set_after_each_for(build,
|
||||
&ast_build_after_for,
|
||||
&build_info);
|
||||
}
|
||||
|
||||
tree = isl_ast_build_node_from_schedule_map(build, schedule_map);
|
||||
isl_ast_build_free(build);
|
||||
|
||||
print_options = isl_ast_print_options_alloc(ctx);
|
||||
print_options = isl_ast_print_options_set_print_user(print_options,
|
||||
&print_user, NULL);
|
||||
|
||||
print_options = isl_ast_print_options_set_print_for(print_options,
|
||||
&print_for, NULL);
|
||||
|
||||
p = ppcg_print_macros(p, tree);
|
||||
p = isl_ast_node_print(tree, p, print_options);
|
||||
|
||||
isl_ast_node_free(tree);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Generate CPU code for the scop "ps" and print the corresponding C code
|
||||
* to "p", including variable declarations.
|
||||
*/
|
||||
__isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
|
||||
struct ppcg_scop *ps, struct ppcg_options *options)
|
||||
{
|
||||
int hidden;
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "/* ppcg generated CPU code */");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
|
||||
p = ppcg_print_exposed_declarations(p, ps);
|
||||
hidden = ppcg_scop_any_hidden_declarations(ps);
|
||||
if (hidden) {
|
||||
p = ppcg_start_block(p);
|
||||
p = ppcg_print_hidden_declarations(p, ps);
|
||||
}
|
||||
if (options->debug->dump_final_schedule)
|
||||
isl_schedule_dump(ps->schedule);
|
||||
p = print_scop(ps, p, options);
|
||||
if (hidden)
|
||||
p = ppcg_end_block(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Wrapper around print_cpu for use as a ppcg_transform callback.
|
||||
*/
|
||||
static __isl_give isl_printer *print_cpu_wrap(__isl_take isl_printer *p,
|
||||
struct ppcg_scop *scop, void *user)
|
||||
{
|
||||
struct ppcg_options *options = user;
|
||||
|
||||
return print_cpu(p, scop, options);
|
||||
}
|
||||
|
||||
/* Transform the code in the file called "input" by replacing
|
||||
* all scops by corresponding CPU code and write the results to a file
|
||||
* called "output".
|
||||
*/
|
||||
int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input, const char *output)
|
||||
{
|
||||
FILE *output_file;
|
||||
int r;
|
||||
|
||||
output_file = get_output_file(input, output);
|
||||
if (!output_file)
|
||||
return -1;
|
||||
|
||||
r = ppcg_transform(ctx, input, output_file, options,
|
||||
&print_cpu_wrap, options);
|
||||
|
||||
fclose(output_file);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef _CPU_H
|
||||
#define _CPU_H
|
||||
|
||||
#include <isl/ctx.h>
|
||||
|
||||
#include "ppcg.h"
|
||||
|
||||
struct ppcg_options;
|
||||
|
||||
__isl_give isl_printer *print_cpu(__isl_take isl_printer *p,
|
||||
struct ppcg_scop *ps, struct ppcg_options *options);
|
||||
int generate_cpu(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input, const char *output);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,704 @@
|
|||
/*
|
||||
* Copyright 2012 Ecole Normale Superieure
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege,
|
||||
* Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
|
||||
*/
|
||||
|
||||
#include <isl/aff.h>
|
||||
#include <isl/ast.h>
|
||||
|
||||
#include "cuda_common.h"
|
||||
#include "cuda.h"
|
||||
#include "gpu.h"
|
||||
#include "gpu_print.h"
|
||||
#include "print.h"
|
||||
#include "util.h"
|
||||
|
||||
static __isl_give isl_printer *print_cuda_macros(__isl_take isl_printer *p)
|
||||
{
|
||||
const char *macros =
|
||||
"#define cudaCheckReturn(ret) \\\n"
|
||||
" do { \\\n"
|
||||
" cudaError_t cudaCheckReturn_e = (ret); \\\n"
|
||||
" if (cudaCheckReturn_e != cudaSuccess) { \\\n"
|
||||
" fprintf(stderr, \"CUDA error: %s\\n\", "
|
||||
"cudaGetErrorString(cudaCheckReturn_e)); \\\n"
|
||||
" fflush(stderr); \\\n"
|
||||
" } \\\n"
|
||||
" assert(cudaCheckReturn_e == cudaSuccess); \\\n"
|
||||
" } while(0)\n"
|
||||
"#define cudaCheckKernel() \\\n"
|
||||
" do { \\\n"
|
||||
" cudaCheckReturn(cudaGetLastError()); \\\n"
|
||||
" } while(0)\n\n";
|
||||
|
||||
p = isl_printer_print_str(p, macros);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a declaration for the device array corresponding to "array" on "p".
|
||||
*/
|
||||
static __isl_give isl_printer *declare_device_array(__isl_take isl_printer *p,
|
||||
struct gpu_array_info *array)
|
||||
{
|
||||
int i;
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, array->type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
if (!array->linearize && array->n_index > 1)
|
||||
p = isl_printer_print_str(p, "(");
|
||||
p = isl_printer_print_str(p, "*dev_");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
if (!array->linearize && array->n_index > 1) {
|
||||
p = isl_printer_print_str(p, ")");
|
||||
for (i = 1; i < array->n_index; i++) {
|
||||
p = isl_printer_print_str(p, "[");
|
||||
p = isl_printer_print_pw_aff(p, array->bound[i]);
|
||||
p = isl_printer_print_str(p, "]");
|
||||
}
|
||||
}
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *declare_device_arrays(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < prog->n_array; ++i) {
|
||||
if (!gpu_array_requires_device_allocation(&prog->array[i]))
|
||||
continue;
|
||||
|
||||
p = declare_device_array(p, &prog->array[i]);
|
||||
}
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_end_line(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *allocate_device_arrays(
|
||||
__isl_take isl_printer *p, struct gpu_prog *prog)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < prog->n_array; ++i) {
|
||||
if (!gpu_array_requires_device_allocation(&prog->array[i]))
|
||||
continue;
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p,
|
||||
"cudaCheckReturn(cudaMalloc((void **) &dev_");
|
||||
p = isl_printer_print_str(p, prog->array[i].name);
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
p = gpu_array_info_print_size(p, &prog->array[i]);
|
||||
p = isl_printer_print_str(p, "));");
|
||||
p = isl_printer_end_line(p);
|
||||
}
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_end_line(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print code to "p" for copying "array" from the host to the device
|
||||
* in its entirety. The bounds on the extent of "array" have
|
||||
* been precomputed in extract_array_info and are used in
|
||||
* gpu_array_info_print_size.
|
||||
*/
|
||||
static __isl_give isl_printer *copy_array_to_device(__isl_take isl_printer *p,
|
||||
struct gpu_array_info *array)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(dev_");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
|
||||
if (gpu_array_is_scalar(array))
|
||||
p = isl_printer_print_str(p, "&");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
|
||||
p = gpu_array_info_print_size(p, array);
|
||||
p = isl_printer_print_str(p, ", cudaMemcpyHostToDevice));");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print code to "p" for copying "array" back from the device to the host
|
||||
* in its entirety. The bounds on the extent of "array" have
|
||||
* been precomputed in extract_array_info and are used in
|
||||
* gpu_array_info_print_size.
|
||||
*/
|
||||
static __isl_give isl_printer *copy_array_from_device(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "cudaCheckReturn(cudaMemcpy(");
|
||||
if (gpu_array_is_scalar(array))
|
||||
p = isl_printer_print_str(p, "&");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
p = isl_printer_print_str(p, ", dev_");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
p = gpu_array_info_print_size(p, array);
|
||||
p = isl_printer_print_str(p, ", cudaMemcpyDeviceToHost));");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static void print_reverse_list(FILE *out, int len, int *list)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (len == 0)
|
||||
return;
|
||||
|
||||
fprintf(out, "(");
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (i)
|
||||
fprintf(out, ", ");
|
||||
fprintf(out, "%d", list[len - 1 - i]);
|
||||
}
|
||||
fprintf(out, ")");
|
||||
}
|
||||
|
||||
/* Print the effective grid size as a list of the sizes in each
|
||||
* dimension, from innermost to outermost.
|
||||
*/
|
||||
static __isl_give isl_printer *print_grid_size(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int i;
|
||||
int dim;
|
||||
|
||||
dim = isl_multi_pw_aff_dim(kernel->grid_size, isl_dim_set);
|
||||
if (dim == 0)
|
||||
return p;
|
||||
|
||||
p = isl_printer_print_str(p, "(");
|
||||
for (i = dim - 1; i >= 0; --i) {
|
||||
isl_pw_aff *bound;
|
||||
|
||||
bound = isl_multi_pw_aff_get_pw_aff(kernel->grid_size, i);
|
||||
p = isl_printer_print_pw_aff(p, bound);
|
||||
isl_pw_aff_free(bound);
|
||||
|
||||
if (i > 0)
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
}
|
||||
|
||||
p = isl_printer_print_str(p, ")");
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the grid definition.
|
||||
*/
|
||||
static __isl_give isl_printer *print_grid(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "dim3 k");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "_dimGrid");
|
||||
p = print_grid_size(p, kernel);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the arguments to a kernel declaration or call. If "types" is set,
|
||||
* then print a declaration (including the types of the arguments).
|
||||
*
|
||||
* The arguments are printed in the following order
|
||||
* - the arrays accessed by the kernel
|
||||
* - the parameters
|
||||
* - the host loop iterators
|
||||
*/
|
||||
static __isl_give isl_printer *print_kernel_arguments(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, struct ppcg_kernel *kernel, int types)
|
||||
{
|
||||
int i, n;
|
||||
int first = 1;
|
||||
unsigned nparam;
|
||||
isl_space *space;
|
||||
const char *type;
|
||||
|
||||
for (i = 0; i < prog->n_array; ++i) {
|
||||
int required;
|
||||
|
||||
required = ppcg_kernel_requires_array_argument(kernel, i);
|
||||
if (required < 0)
|
||||
return isl_printer_free(p);
|
||||
if (!required)
|
||||
continue;
|
||||
|
||||
if (!first)
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
|
||||
if (types)
|
||||
p = gpu_array_info_print_declaration_argument(p,
|
||||
&prog->array[i], NULL);
|
||||
else
|
||||
p = gpu_array_info_print_call_argument(p,
|
||||
&prog->array[i]);
|
||||
|
||||
first = 0;
|
||||
}
|
||||
|
||||
space = isl_union_set_get_space(kernel->arrays);
|
||||
nparam = isl_space_dim(space, isl_dim_param);
|
||||
for (i = 0; i < nparam; ++i) {
|
||||
const char *name;
|
||||
|
||||
name = isl_space_get_dim_name(space, isl_dim_param, i);
|
||||
|
||||
if (!first)
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
if (types)
|
||||
p = isl_printer_print_str(p, "int ");
|
||||
p = isl_printer_print_str(p, name);
|
||||
|
||||
first = 0;
|
||||
}
|
||||
isl_space_free(space);
|
||||
|
||||
n = isl_space_dim(kernel->space, isl_dim_set);
|
||||
type = isl_options_get_ast_iterator_type(prog->ctx);
|
||||
for (i = 0; i < n; ++i) {
|
||||
const char *name;
|
||||
|
||||
if (!first)
|
||||
p = isl_printer_print_str(p, ", ");
|
||||
name = isl_space_get_dim_name(kernel->space, isl_dim_set, i);
|
||||
if (types) {
|
||||
p = isl_printer_print_str(p, type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
}
|
||||
p = isl_printer_print_str(p, name);
|
||||
|
||||
first = 0;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the header of the given kernel.
|
||||
*/
|
||||
static __isl_give isl_printer *print_kernel_header(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, struct ppcg_kernel *kernel)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "__global__ void kernel");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "(");
|
||||
p = print_kernel_arguments(p, prog, kernel, 1);
|
||||
p = isl_printer_print_str(p, ")");
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the header of the given kernel to both gen->cuda.kernel_h
|
||||
* and gen->cuda.kernel_c.
|
||||
*/
|
||||
static void print_kernel_headers(struct gpu_prog *prog,
|
||||
struct ppcg_kernel *kernel, struct cuda_info *cuda)
|
||||
{
|
||||
isl_printer *p;
|
||||
|
||||
p = isl_printer_to_file(prog->ctx, cuda->kernel_h);
|
||||
p = isl_printer_set_output_format(p, ISL_FORMAT_C);
|
||||
p = print_kernel_header(p, prog, kernel);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
isl_printer_free(p);
|
||||
|
||||
p = isl_printer_to_file(prog->ctx, cuda->kernel_c);
|
||||
p = isl_printer_set_output_format(p, ISL_FORMAT_C);
|
||||
p = print_kernel_header(p, prog, kernel);
|
||||
p = isl_printer_end_line(p);
|
||||
isl_printer_free(p);
|
||||
}
|
||||
|
||||
static void print_indent(FILE *dst, int indent)
|
||||
{
|
||||
fprintf(dst, "%*s", indent, "");
|
||||
}
|
||||
|
||||
/* Print a list of iterators of type "type" with names "ids" to "out".
|
||||
* Each iterator is assigned one of the cuda identifiers in cuda_dims.
|
||||
* In particular, the last iterator is assigned the x identifier
|
||||
* (the first in the list of cuda identifiers).
|
||||
*/
|
||||
static void print_iterators(FILE *out, const char *type,
|
||||
__isl_keep isl_id_list *ids, const char *cuda_dims[])
|
||||
{
|
||||
int i, n;
|
||||
|
||||
n = isl_id_list_n_id(ids);
|
||||
if (n <= 0)
|
||||
return;
|
||||
print_indent(out, 4);
|
||||
fprintf(out, "%s ", type);
|
||||
for (i = 0; i < n; ++i) {
|
||||
isl_id *id;
|
||||
|
||||
if (i)
|
||||
fprintf(out, ", ");
|
||||
id = isl_id_list_get_id(ids, i);
|
||||
fprintf(out, "%s = %s", isl_id_get_name(id),
|
||||
cuda_dims[n - 1 - i]);
|
||||
isl_id_free(id);
|
||||
}
|
||||
fprintf(out, ";\n");
|
||||
}
|
||||
|
||||
static void print_kernel_iterators(FILE *out, struct ppcg_kernel *kernel)
|
||||
{
|
||||
isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
|
||||
const char *type;
|
||||
const char *block_dims[] = { "blockIdx.x", "blockIdx.y" };
|
||||
const char *thread_dims[] = { "threadIdx.x", "threadIdx.y",
|
||||
"threadIdx.z" };
|
||||
|
||||
type = isl_options_get_ast_iterator_type(ctx);
|
||||
|
||||
print_iterators(out, type, kernel->block_ids, block_dims);
|
||||
print_iterators(out, type, kernel->thread_ids, thread_dims);
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *print_kernel_var(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_var *var)
|
||||
{
|
||||
int j;
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
if (var->type == ppcg_access_shared)
|
||||
p = isl_printer_print_str(p, "__shared__ ");
|
||||
p = isl_printer_print_str(p, var->array->type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
p = isl_printer_print_str(p, var->name);
|
||||
for (j = 0; j < var->array->n_index; ++j) {
|
||||
isl_val *v;
|
||||
|
||||
p = isl_printer_print_str(p, "[");
|
||||
v = isl_vec_get_element_val(var->size, j);
|
||||
p = isl_printer_print_val(p, v);
|
||||
isl_val_free(v);
|
||||
p = isl_printer_print_str(p, "]");
|
||||
}
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *print_kernel_vars(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kernel->n_var; ++i)
|
||||
p = print_kernel_var(p, &kernel->var[i]);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a sync statement.
|
||||
*/
|
||||
static __isl_give isl_printer *print_sync(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "__syncthreads();");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* This function is called for each user statement in the AST,
|
||||
* i.e., for each kernel body statement, copy statement or sync statement.
|
||||
*/
|
||||
static __isl_give isl_printer *print_kernel_stmt(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
isl_id *id;
|
||||
struct ppcg_kernel_stmt *stmt;
|
||||
|
||||
id = isl_ast_node_get_annotation(node);
|
||||
stmt = isl_id_get_user(id);
|
||||
isl_id_free(id);
|
||||
|
||||
isl_ast_print_options_free(print_options);
|
||||
|
||||
switch (stmt->type) {
|
||||
case ppcg_kernel_copy:
|
||||
return ppcg_kernel_print_copy(p, stmt);
|
||||
case ppcg_kernel_sync:
|
||||
return print_sync(p, stmt);
|
||||
case ppcg_kernel_domain:
|
||||
return ppcg_kernel_print_domain(p, stmt);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static void print_kernel(struct gpu_prog *prog, struct ppcg_kernel *kernel,
|
||||
struct cuda_info *cuda)
|
||||
{
|
||||
isl_ctx *ctx = isl_ast_node_get_ctx(kernel->tree);
|
||||
isl_ast_print_options *print_options;
|
||||
isl_printer *p;
|
||||
|
||||
print_kernel_headers(prog, kernel, cuda);
|
||||
fprintf(cuda->kernel_c, "{\n");
|
||||
print_kernel_iterators(cuda->kernel_c, kernel);
|
||||
|
||||
p = isl_printer_to_file(ctx, cuda->kernel_c);
|
||||
p = isl_printer_set_output_format(p, ISL_FORMAT_C);
|
||||
p = isl_printer_indent(p, 4);
|
||||
|
||||
p = print_kernel_vars(p, kernel);
|
||||
p = isl_printer_end_line(p);
|
||||
p = isl_ast_op_type_print_macro(isl_ast_op_fdiv_q, p);
|
||||
p = ppcg_print_macros(p, kernel->tree);
|
||||
|
||||
print_options = isl_ast_print_options_alloc(ctx);
|
||||
print_options = isl_ast_print_options_set_print_user(print_options,
|
||||
&print_kernel_stmt, NULL);
|
||||
p = isl_ast_node_print(kernel->tree, p, print_options);
|
||||
isl_printer_free(p);
|
||||
|
||||
fprintf(cuda->kernel_c, "}\n");
|
||||
}
|
||||
|
||||
/* Print a statement for copying an array to or from the device.
|
||||
* The statement identifier is called "to_device_<array name>" or
|
||||
* "from_device_<array name>" and its user pointer points
|
||||
* to the gpu_array_info of the array that needs to be copied.
|
||||
*
|
||||
* Extract the array from the identifier and call
|
||||
* copy_array_to_device or copy_array_from_device.
|
||||
*/
|
||||
static __isl_give isl_printer *print_to_from_device(__isl_take isl_printer *p,
|
||||
__isl_keep isl_ast_node *node, struct gpu_prog *prog)
|
||||
{
|
||||
isl_ast_expr *expr, *arg;
|
||||
isl_id *id;
|
||||
const char *name;
|
||||
struct gpu_array_info *array;
|
||||
|
||||
expr = isl_ast_node_user_get_expr(node);
|
||||
arg = isl_ast_expr_get_op_arg(expr, 0);
|
||||
id = isl_ast_expr_get_id(arg);
|
||||
name = isl_id_get_name(id);
|
||||
array = isl_id_get_user(id);
|
||||
isl_id_free(id);
|
||||
isl_ast_expr_free(arg);
|
||||
isl_ast_expr_free(expr);
|
||||
|
||||
if (!name)
|
||||
array = NULL;
|
||||
if (!array)
|
||||
return isl_printer_free(p);
|
||||
|
||||
if (!prefixcmp(name, "to_device"))
|
||||
return copy_array_to_device(p, array);
|
||||
else
|
||||
return copy_array_from_device(p, array);
|
||||
}
|
||||
|
||||
struct print_host_user_data {
|
||||
struct cuda_info *cuda;
|
||||
struct gpu_prog *prog;
|
||||
};
|
||||
|
||||
/* Print the user statement of the host code to "p".
|
||||
*
|
||||
* The host code may contain original user statements, kernel launches and
|
||||
* statements that copy data to/from the device.
|
||||
* The original user statements and the kernel launches have
|
||||
* an associated annotation, while the data copy statements do not.
|
||||
* The latter are handled by print_to_from_device.
|
||||
* The annotation on the user statements is called "user".
|
||||
*
|
||||
* In case of a kernel launch, print a block of statements that
|
||||
* defines the grid and the block and then launches the kernel.
|
||||
*/
|
||||
static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *print_options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
isl_id *id;
|
||||
int is_user;
|
||||
struct ppcg_kernel *kernel;
|
||||
struct ppcg_kernel_stmt *stmt;
|
||||
struct print_host_user_data *data;
|
||||
|
||||
isl_ast_print_options_free(print_options);
|
||||
|
||||
data = (struct print_host_user_data *) user;
|
||||
|
||||
id = isl_ast_node_get_annotation(node);
|
||||
if (!id)
|
||||
return print_to_from_device(p, node, data->prog);
|
||||
|
||||
is_user = !strcmp(isl_id_get_name(id), "user");
|
||||
kernel = is_user ? NULL : isl_id_get_user(id);
|
||||
stmt = is_user ? isl_id_get_user(id) : NULL;
|
||||
isl_id_free(id);
|
||||
|
||||
if (is_user)
|
||||
return ppcg_kernel_print_domain(p, stmt);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "{");
|
||||
p = isl_printer_end_line(p);
|
||||
p = isl_printer_indent(p, 2);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "dim3 k");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "_dimBlock");
|
||||
print_reverse_list(isl_printer_get_file(p),
|
||||
kernel->n_block, kernel->block_dim);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = print_grid(p, kernel);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "kernel");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, " <<<k");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "_dimGrid, k");
|
||||
p = isl_printer_print_int(p, kernel->id);
|
||||
p = isl_printer_print_str(p, "_dimBlock>>> (");
|
||||
p = print_kernel_arguments(p, data->prog, kernel, 0);
|
||||
p = isl_printer_print_str(p, ");");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "cudaCheckKernel();");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_printer_indent(p, -2);
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "}");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
print_kernel(data->prog, kernel, data->cuda);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *print_host_code(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
|
||||
struct cuda_info *cuda)
|
||||
{
|
||||
isl_ast_print_options *print_options;
|
||||
isl_ctx *ctx = isl_ast_node_get_ctx(tree);
|
||||
struct print_host_user_data data = { cuda, prog };
|
||||
|
||||
print_options = isl_ast_print_options_alloc(ctx);
|
||||
print_options = isl_ast_print_options_set_print_user(print_options,
|
||||
&print_host_user, &data);
|
||||
|
||||
p = ppcg_print_macros(p, tree);
|
||||
p = isl_ast_node_print(tree, p, print_options);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __isl_give isl_printer *free_device_arrays(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < prog->n_array; ++i) {
|
||||
if (!gpu_array_requires_device_allocation(&prog->array[i]))
|
||||
continue;
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "cudaCheckReturn(cudaFree(dev_");
|
||||
p = isl_printer_print_str(p, prog->array[i].name);
|
||||
p = isl_printer_print_str(p, "));");
|
||||
p = isl_printer_end_line(p);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Given a gpu_prog "prog" and the corresponding transformed AST
|
||||
* "tree", print the entire CUDA code to "p".
|
||||
* "types" collects the types for which a definition has already
|
||||
* been printed.
|
||||
*/
|
||||
static __isl_give isl_printer *print_cuda(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
|
||||
struct gpu_types *types, void *user)
|
||||
{
|
||||
struct cuda_info *cuda = user;
|
||||
isl_printer *kernel;
|
||||
|
||||
kernel = isl_printer_to_file(isl_printer_get_ctx(p), cuda->kernel_c);
|
||||
kernel = isl_printer_set_output_format(kernel, ISL_FORMAT_C);
|
||||
kernel = gpu_print_types(kernel, types, prog);
|
||||
isl_printer_free(kernel);
|
||||
|
||||
if (!kernel)
|
||||
return isl_printer_free(p);
|
||||
|
||||
p = ppcg_start_block(p);
|
||||
|
||||
p = print_cuda_macros(p);
|
||||
|
||||
p = gpu_print_local_declarations(p, prog);
|
||||
p = declare_device_arrays(p, prog);
|
||||
p = allocate_device_arrays(p, prog);
|
||||
|
||||
p = print_host_code(p, prog, tree, cuda);
|
||||
|
||||
p = free_device_arrays(p, prog);
|
||||
|
||||
p = ppcg_end_block(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Transform the code in the file called "input" by replacing
|
||||
* all scops by corresponding CUDA code.
|
||||
* The names of the output files are derived from "input".
|
||||
*
|
||||
* We let generate_gpu do all the hard work and then let it call
|
||||
* us back for printing the AST in print_cuda.
|
||||
*
|
||||
* To prepare for this printing, we first open the output files
|
||||
* and we close them after generate_gpu has finished.
|
||||
*/
|
||||
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input)
|
||||
{
|
||||
struct cuda_info cuda;
|
||||
int r;
|
||||
|
||||
cuda_open_files(&cuda, input);
|
||||
|
||||
r = generate_gpu(ctx, input, cuda.host_c, options, &print_cuda, &cuda);
|
||||
|
||||
cuda_close_files(&cuda);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef _CUDA_H
|
||||
#define _CUDA_H
|
||||
|
||||
#include "ppcg_options.h"
|
||||
#include "ppcg.h"
|
||||
|
||||
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright 2010 INRIA Saclay
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
|
||||
* Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
|
||||
* 91893 Orsay, France
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cuda_common.h"
|
||||
#include "ppcg.h"
|
||||
|
||||
/* Open the host .cu file and the kernel .hu and .cu files for writing.
|
||||
* Add the necessary includes.
|
||||
*/
|
||||
void cuda_open_files(struct cuda_info *info, const char *input)
|
||||
{
|
||||
char name[PATH_MAX];
|
||||
int len;
|
||||
|
||||
len = ppcg_extract_base_name(name, input);
|
||||
|
||||
strcpy(name + len, "_host.cu");
|
||||
info->host_c = fopen(name, "w");
|
||||
|
||||
strcpy(name + len, "_kernel.cu");
|
||||
info->kernel_c = fopen(name, "w");
|
||||
|
||||
strcpy(name + len, "_kernel.hu");
|
||||
info->kernel_h = fopen(name, "w");
|
||||
fprintf(info->host_c, "#include <assert.h>\n");
|
||||
fprintf(info->host_c, "#include <stdio.h>\n");
|
||||
fprintf(info->host_c, "#include \"%s\"\n", name);
|
||||
fprintf(info->kernel_c, "#include \"%s\"\n", name);
|
||||
fprintf(info->kernel_h, "#include \"cuda.h\"\n\n");
|
||||
}
|
||||
|
||||
/* Close all output files.
|
||||
*/
|
||||
void cuda_close_files(struct cuda_info *info)
|
||||
{
|
||||
fclose(info->kernel_c);
|
||||
fclose(info->kernel_h);
|
||||
fclose(info->host_c);
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef _CUDA_COMMON_H_
|
||||
#define _CUDA_COMMON_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
struct cuda_info {
|
||||
FILE *host_c;
|
||||
FILE *kernel_c;
|
||||
FILE *kernel_h;
|
||||
};
|
||||
|
||||
void cuda_open_files(struct cuda_info *info, const char *input);
|
||||
void cuda_close_files(struct cuda_info *info);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,708 @@
|
|||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2012-03-27.16; # UTC
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
|
||||
# 2011, 2012 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by 'PROGRAMS ARGS'.
|
||||
object Object file output by 'PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputting dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
# A tabulation character.
|
||||
tab=' '
|
||||
# A newline character.
|
||||
nl='
|
||||
'
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
if test "$depmode" = msvc7msys; then
|
||||
# This is just like msvc7 but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u='sed s,\\\\,/,g'
|
||||
depmode=msvc7
|
||||
fi
|
||||
|
||||
if test "$depmode" = xlc; then
|
||||
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
|
||||
gccflag=-qmakedep=gcc,-MF
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say).
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||
## The second -e expression handles DOS-style file names with drive letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the "deleted header file" problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
tr ' ' "$nl" < "$tmpdepfile" |
|
||||
## Some versions of gcc put a space before the ':'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||
## to the object. Take care to not repeat it in the output.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||
| sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||
tr "$nl" ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' "$nl" < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
xlc)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form 'foo.o: dependent.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
icc)
|
||||
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
|
||||
# However on
|
||||
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||
# ICC 7.0 will fill foo.d with something like
|
||||
# foo.o: sub/foo.c
|
||||
# foo.o: sub/foo.h
|
||||
# which is wrong. We want
|
||||
# sub/foo.o: sub/foo.c
|
||||
# sub/foo.o: sub/foo.h
|
||||
# sub/foo.c:
|
||||
# sub/foo.h:
|
||||
# ICC 7.1 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using '\':
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
# tcc 0.9.26 (FIXME still under development at the moment of writing)
|
||||
# will emit a similar output, but also prepend the continuation lines
|
||||
# with horizontal tabulation characters.
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form 'foo.o: dependent.h',
|
||||
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
|
||||
< "$tmpdepfile" > "$depfile"
|
||||
sed '
|
||||
s/[ '"$tab"'][ '"$tab"']*/ /g
|
||||
s/^ *//
|
||||
s/ *\\*$//
|
||||
s/^[^:]*: *//
|
||||
/^$/d
|
||||
/:$/d
|
||||
s/$/ :/
|
||||
' < "$tmpdepfile" >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add 'dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in 'foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# With Tru64 cc, shared objects can also be used to make a
|
||||
# static library. This mechanism is used in libtool 1.4 series to
|
||||
# handle both shared and static libraries in a single compilation.
|
||||
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
||||
#
|
||||
# With libtool 1.5 this exception was removed, and libtool now
|
||||
# generates 2 separate objects for the 2 libraries. These two
|
||||
# compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
||||
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
||||
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.o.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
tmpdepfile4=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7)
|
||||
if test "$libtool" = yes; then
|
||||
showIncludes=-Wc,-showIncludes
|
||||
else
|
||||
showIncludes=-showIncludes
|
||||
fi
|
||||
"$@" $showIncludes > "$tmpdepfile"
|
||||
stat=$?
|
||||
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||
if test "$stat" = 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
# The first sed program below extracts the file names and escapes
|
||||
# backslashes for cygpath. The second sed program outputs the file
|
||||
# name when reading, but also accumulates all include files in the
|
||||
# hold buffer in order to output them again at the end. This only
|
||||
# works with sed implementations that can handle large buffers.
|
||||
sed < "$tmpdepfile" -n '
|
||||
/^Note: including file: *\(.*\)/ {
|
||||
s//\1/
|
||||
s/\\/\\\\/g
|
||||
p
|
||||
}' | $cygpath_u | sort -u | sed -n '
|
||||
s/ /\\ /g
|
||||
s/\(.*\)/'"$tab"'\1 \\/p
|
||||
s/.\(.*\) \\/\1:/
|
||||
H
|
||||
$ {
|
||||
s/.*/'"$tab"'/
|
||||
G
|
||||
p
|
||||
}' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvc7msys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for ':'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
tr ' ' "$nl" < "$tmpdepfile" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
# makedepend may prepend the VPATH from the source file name to the object.
|
||||
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove '-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E |
|
||||
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
||||
sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||
echo "$tab" >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
void ppcg_start_block(){}
|
||||
void ppcg_end_block(){}
|
||||
void ppcg_print_macros(){}
|
||||
void pet_scop_compute_outer_to_any(){}
|
||||
void pet_scop_compute_outer_to_inner(){}
|
||||
void pet_tree_get_type(){}
|
||||
void pet_tree_foreach_access_expr(){}
|
||||
void pet_expr_get_ctx(){}
|
||||
void pet_expr_access_is_read(){}
|
||||
void pet_expr_access_is_write(){}
|
||||
void pet_expr_access_get_tagged_may_read(){}
|
||||
void pet_expr_access_get_tagged_may_write(){}
|
||||
void pet_expr_access_get_must_write(){}
|
||||
void pet_expr_access_get_index(){}
|
||||
void pet_expr_access_get_ref_id(){}
|
||||
void print_cpu(){}
|
||||
void ppcg_print_exposed_declarations(){}
|
||||
void ppcg_print_declaration(){}
|
||||
void pet_stmt_print_body(){}
|
||||
void pet_loc_get_start(){}
|
||||
void pet_loc_get_end(){}
|
||||
void pet_scop_collect_tagged_may_reads(){}
|
||||
void pet_scop_collect_may_reads(){}
|
||||
void pet_scop_collect_tagged_may_writes(){}
|
||||
void pet_scop_collect_may_writes(){}
|
||||
void pet_scop_collect_tagged_must_writes(){}
|
||||
void pet_scop_collect_must_writes(){}
|
||||
void pet_scop_collect_tagged_must_kills(){}
|
||||
void pet_transform_C_source(){}
|
||||
void pet_scop_print_original(){}
|
||||
void pet_scop_free(){}
|
||||
void pet_scop_align_params(){}
|
||||
void pet_scop_can_build_ast_exprs(){}
|
||||
void pet_scop_has_data_dependent_conditions(){}
|
||||
void pet_tree_foreach_expr(){}
|
||||
void pet_expr_foreach_call_expr(){}
|
||||
void pet_stmt_is_kill(){}
|
||||
void pet_options_args() {}
|
||||
void ppcg_print_guarded() {}
|
||||
void ppcg_version() {}
|
||||
void pet_options_set_encapsulate_dynamic_control() {}
|
||||
void generate_opencl() {}
|
||||
void generate_cpu() {}
|
||||
void pet_stmt_build_ast_exprs() {}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,355 @@
|
|||
#ifndef _GPU_H
|
||||
#define _GPU_H
|
||||
|
||||
#include <isl/ast.h>
|
||||
#include <isl/id_to_ast_expr.h>
|
||||
|
||||
#include "ppcg.h"
|
||||
#include "ppcg_options.h"
|
||||
|
||||
/* Represents an outer array possibly accessed by a gpu_prog.
|
||||
*/
|
||||
struct gpu_array_info {
|
||||
/* The array data space. */
|
||||
isl_space *space;
|
||||
/* Element type. */
|
||||
char *type;
|
||||
/* Element size. */
|
||||
int size;
|
||||
/* Name of the array. */
|
||||
char *name;
|
||||
/* Extent of the array that needs to be copied. */
|
||||
isl_set *extent;
|
||||
/* Number of indices. */
|
||||
unsigned n_index;
|
||||
/* For each index, a bound on "extent" in that direction. */
|
||||
isl_pw_aff **bound;
|
||||
|
||||
/* All references to this array; point to elements of a linked list. */
|
||||
int n_ref;
|
||||
struct gpu_stmt_access **refs;
|
||||
|
||||
/* Is this array accessed at all by the program? */
|
||||
int accessed;
|
||||
|
||||
/* Is this a scalar that is read-only within the entire program? */
|
||||
int read_only_scalar;
|
||||
|
||||
/* Are the elements of the array structures? */
|
||||
int has_compound_element;
|
||||
|
||||
/* Is the array local to the scop? */
|
||||
int local;
|
||||
/* Is the array local and should it be declared on the host? */
|
||||
int declare_local;
|
||||
|
||||
/* Is the corresponding global device memory accessed in any way? */
|
||||
int global;
|
||||
|
||||
/* Should the array be linearized? */
|
||||
int linearize;
|
||||
|
||||
/* Order dependences on this array.
|
||||
* Only used if live_range_reordering option is set.
|
||||
* It is set to NULL otherwise.
|
||||
*/
|
||||
isl_union_map *dep_order;
|
||||
};
|
||||
|
||||
/* Represents an outer array accessed by a ppcg_kernel, localized
|
||||
* to the context of this kernel.
|
||||
*
|
||||
* "array" points to the corresponding array in the gpu_prog.
|
||||
* The "n_group" "groups" are the reference groups associated to the array.
|
||||
* If "force_private" is set, then the array (in practice a scalar)
|
||||
* must be mapped to a register.
|
||||
* "global" is set if the global device memory corresponding
|
||||
* to this array is accessed by the kernel.
|
||||
* For each index i with 0 <= i < n_index,
|
||||
* bound[i] is equal to array->bound[i] specialized to the current kernel.
|
||||
*/
|
||||
struct gpu_local_array_info {
|
||||
struct gpu_array_info *array;
|
||||
|
||||
int n_group;
|
||||
struct gpu_array_ref_group **groups;
|
||||
|
||||
int force_private;
|
||||
int global;
|
||||
|
||||
unsigned n_index;
|
||||
isl_pw_aff_list *bound;
|
||||
};
|
||||
|
||||
__isl_give isl_ast_expr *gpu_local_array_info_linearize_index(
|
||||
struct gpu_local_array_info *array, __isl_take isl_ast_expr *expr);
|
||||
|
||||
/* A sequence of "n" names of types.
|
||||
*/
|
||||
struct gpu_types {
|
||||
int n;
|
||||
char **name;
|
||||
};
|
||||
|
||||
/* "read" and "write" contain the original access relations, possibly
|
||||
* involving member accesses.
|
||||
*
|
||||
* The elements of "array", as well as the ranges of "copy_in" and "copy_out"
|
||||
* only refer to the outer arrays of any possible member accesses.
|
||||
*/
|
||||
struct gpu_prog {
|
||||
isl_ctx *ctx;
|
||||
|
||||
struct ppcg_scop *scop;
|
||||
|
||||
/* Set of parameter values */
|
||||
isl_set *context;
|
||||
|
||||
/* All potential read accesses in the entire program */
|
||||
isl_union_map *read;
|
||||
|
||||
/* All potential write accesses in the entire program */
|
||||
isl_union_map *may_write;
|
||||
/* All definite write accesses in the entire program */
|
||||
isl_union_map *must_write;
|
||||
/* All tagged definite kills in the entire program */
|
||||
isl_union_map *tagged_must_kill;
|
||||
|
||||
/* The set of inner array elements that may be preserved. */
|
||||
isl_union_set *may_persist;
|
||||
|
||||
/* A mapping from all innermost arrays to their outer arrays. */
|
||||
isl_union_map *to_outer;
|
||||
/* A mapping from the outer arrays to all corresponding inner arrays. */
|
||||
isl_union_map *to_inner;
|
||||
/* A mapping from all intermediate arrays to their outer arrays,
|
||||
* including an identity mapping from the anoymous 1D space to itself.
|
||||
*/
|
||||
isl_union_map *any_to_outer;
|
||||
|
||||
/* Order dependences on non-scalars. */
|
||||
isl_union_map *array_order;
|
||||
|
||||
/* Array of statements */
|
||||
int n_stmts;
|
||||
struct gpu_stmt *stmts;
|
||||
|
||||
int n_array;
|
||||
struct gpu_array_info *array;
|
||||
};
|
||||
|
||||
struct gpu_gen {
|
||||
isl_ctx *ctx;
|
||||
struct ppcg_options *options;
|
||||
|
||||
/* Callback for printing of AST in appropriate format. */
|
||||
__isl_give isl_printer *(*print)(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
|
||||
struct gpu_types *types, void *user);
|
||||
void *print_user;
|
||||
|
||||
struct gpu_prog *prog;
|
||||
/* The generated AST. */
|
||||
isl_ast_node *tree;
|
||||
|
||||
/* The sequence of types for which a definition has been printed. */
|
||||
struct gpu_types types;
|
||||
|
||||
/* User specified tile, grid and block sizes for each kernel */
|
||||
isl_union_map *sizes;
|
||||
|
||||
/* Effectively used tile, grid and block sizes for each kernel */
|
||||
isl_union_map *used_sizes;
|
||||
|
||||
/* Identifier of the next kernel. */
|
||||
int kernel_id;
|
||||
};
|
||||
|
||||
enum ppcg_kernel_access_type {
|
||||
ppcg_access_global,
|
||||
ppcg_access_shared,
|
||||
ppcg_access_private
|
||||
};
|
||||
|
||||
enum ppcg_kernel_stmt_type {
|
||||
ppcg_kernel_copy,
|
||||
ppcg_kernel_domain,
|
||||
ppcg_kernel_sync
|
||||
};
|
||||
|
||||
/* Representation of special statements, in particular copy statements
|
||||
* and __syncthreads statements, inside a kernel.
|
||||
*
|
||||
* type represents the kind of statement
|
||||
*
|
||||
*
|
||||
* for ppcg_kernel_copy statements we have
|
||||
*
|
||||
* read is set if the statement should copy data from global memory
|
||||
* to shared memory or registers.
|
||||
*
|
||||
* index expresses an access to the array element that needs to be copied
|
||||
* local_index expresses the corresponding element in the tile
|
||||
*
|
||||
* array refers to the original array being copied
|
||||
* local_array is a pointer to the appropriate element in the "array"
|
||||
* array of the ppcg_kernel to which this copy access belongs
|
||||
*
|
||||
*
|
||||
* for ppcg_kernel_domain statements we have
|
||||
*
|
||||
* stmt is the corresponding input statement
|
||||
*
|
||||
* n_access is the number of accesses in stmt
|
||||
* access is an array of local information about the accesses
|
||||
*/
|
||||
struct ppcg_kernel_stmt {
|
||||
enum ppcg_kernel_stmt_type type;
|
||||
|
||||
union {
|
||||
struct {
|
||||
int read;
|
||||
isl_ast_expr *index;
|
||||
isl_ast_expr *local_index;
|
||||
struct gpu_array_info *array;
|
||||
struct gpu_local_array_info *local_array;
|
||||
} c;
|
||||
struct {
|
||||
struct gpu_stmt *stmt;
|
||||
isl_id_to_ast_expr *ref2expr;
|
||||
} d;
|
||||
} u;
|
||||
};
|
||||
|
||||
/* Representation of a local variable in a kernel.
|
||||
*/
|
||||
struct ppcg_kernel_var {
|
||||
struct gpu_array_info *array;
|
||||
enum ppcg_kernel_access_type type;
|
||||
char *name;
|
||||
isl_vec *size;
|
||||
};
|
||||
|
||||
/* Representation of a kernel.
|
||||
*
|
||||
* prog describes the original code from which the kernel is extracted.
|
||||
*
|
||||
* id is the sequence number of the kernel.
|
||||
*
|
||||
* block_ids contains the list of block identifiers for this kernel.
|
||||
* thread_ids contains the list of thread identifiers for this kernel.
|
||||
*
|
||||
* the first n_grid elements of grid_dim represent the specified size
|
||||
* of the grid.
|
||||
* the first n_block elements of block_dim represent the specified or
|
||||
* effective size of the block.
|
||||
* Note that in the input file, the sizes of the grid and the blocks
|
||||
* are specified in the order x, y, z, but internally, the sizes
|
||||
* are stored in reverse order, so that the last element always
|
||||
* refers to the x dimension.
|
||||
*
|
||||
* grid_size reflects the effective grid size.
|
||||
*
|
||||
* context contains the values of the parameters and outer schedule dimensions
|
||||
* for which any statement instance in this kernel needs to be executed.
|
||||
*
|
||||
* n_sync is the number of synchronization operations that have
|
||||
* been introduced in the schedule tree corresponding to this kernel (so far).
|
||||
*
|
||||
* core contains the spaces of the statement domains that form
|
||||
* the core computation of the kernel. It is used to navigate
|
||||
* the tree during the construction of the device part of the schedule
|
||||
* tree in create_kernel.
|
||||
*
|
||||
* arrays is the set of possibly accessed outer array elements.
|
||||
*
|
||||
* space is the schedule space of the AST context. That is, it represents
|
||||
* the loops of the generated host code containing the kernel launch.
|
||||
*
|
||||
* n_array is the total number of arrays in the input program and also
|
||||
* the number of element in the array array.
|
||||
* array contains information about each array that is local
|
||||
* to the current kernel. If an array is not used in a kernel,
|
||||
* then the corresponding entry does not contain any information.
|
||||
*
|
||||
* any_force_private is set if any array in the kernel is marked force_private
|
||||
*
|
||||
* block_filter contains constraints on the domain elements in the kernel
|
||||
* that encode the mapping to block identifiers, where the block identifiers
|
||||
* are represented by "n_grid" parameters with as names the elements
|
||||
* of "block_ids".
|
||||
*
|
||||
* thread_filter contains constraints on the domain elements in the kernel
|
||||
* that encode the mapping to thread identifiers, where the thread identifiers
|
||||
* are represented by "n_block" parameters with as names the elements
|
||||
* of "thread_ids".
|
||||
*
|
||||
* shared_schedule corresponds to the schedule dimensions of
|
||||
* the (tiled) schedule for this kernel that have been taken into account
|
||||
* for computing private/shared memory tiles.
|
||||
* shared_schedule_dim is the dimension of this schedule.
|
||||
*
|
||||
* sync_writes contains write references that require synchronization.
|
||||
* Each reference is represented by a universe set in a space [S[i,j] -> R[]]
|
||||
* with S[i,j] the statement instance space and R[] the array reference.
|
||||
*/
|
||||
struct ppcg_kernel {
|
||||
isl_ctx *ctx;
|
||||
struct ppcg_options *options;
|
||||
|
||||
struct gpu_prog *prog;
|
||||
|
||||
int id;
|
||||
|
||||
isl_id_list *block_ids;
|
||||
isl_id_list *thread_ids;
|
||||
|
||||
int n_grid;
|
||||
int n_block;
|
||||
int grid_dim[2];
|
||||
int block_dim[3];
|
||||
|
||||
isl_multi_pw_aff *grid_size;
|
||||
isl_set *context;
|
||||
|
||||
int n_sync;
|
||||
isl_union_set *core;
|
||||
isl_union_set *arrays;
|
||||
|
||||
isl_space *space;
|
||||
|
||||
int n_array;
|
||||
struct gpu_local_array_info *array;
|
||||
|
||||
int n_var;
|
||||
struct ppcg_kernel_var *var;
|
||||
|
||||
int any_force_private;
|
||||
|
||||
isl_union_set *block_filter;
|
||||
isl_union_set *thread_filter;
|
||||
isl_union_pw_multi_aff *shared_schedule;
|
||||
int shared_schedule_dim;
|
||||
|
||||
isl_union_set *sync_writes;
|
||||
|
||||
isl_ast_node *tree;
|
||||
};
|
||||
|
||||
int gpu_array_is_scalar(struct gpu_array_info *array);
|
||||
int gpu_array_is_read_only_scalar(struct gpu_array_info *array);
|
||||
int gpu_array_requires_device_allocation(struct gpu_array_info *array);
|
||||
__isl_give isl_set *gpu_array_positive_size_guard(struct gpu_array_info *array);
|
||||
|
||||
struct gpu_prog *gpu_prog_alloc(isl_ctx *ctx, struct ppcg_scop *scop);
|
||||
void *gpu_prog_free(struct gpu_prog *prog);
|
||||
|
||||
int ppcg_kernel_requires_array_argument(struct ppcg_kernel *kernel, int i);
|
||||
|
||||
int generate_gpu(isl_ctx *ctx, const char *input, FILE *out,
|
||||
struct ppcg_options *options,
|
||||
__isl_give isl_printer *(*print)(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
|
||||
struct gpu_types *types, void *user), void *user);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
#include <isl/aff.h>
|
||||
#include <isl/map.h>
|
||||
|
||||
#include "gpu_array_tile.h"
|
||||
|
||||
struct gpu_array_tile *gpu_array_tile_free(struct gpu_array_tile *tile)
|
||||
{
|
||||
int j;
|
||||
|
||||
if (!tile)
|
||||
return NULL;
|
||||
|
||||
for (j = 0; j < tile->n; ++j) {
|
||||
isl_val_free(tile->bound[j].size);
|
||||
isl_val_free(tile->bound[j].stride);
|
||||
isl_aff_free(tile->bound[j].lb);
|
||||
isl_aff_free(tile->bound[j].shift);
|
||||
}
|
||||
free(tile->bound);
|
||||
isl_multi_aff_free(tile->tiling);
|
||||
free(tile);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create a gpu_array_tile for an array of dimension "n_index".
|
||||
*/
|
||||
struct gpu_array_tile *gpu_array_tile_create(isl_ctx *ctx, int n_index)
|
||||
{
|
||||
int i;
|
||||
struct gpu_array_tile *tile;
|
||||
|
||||
tile = isl_calloc_type(ctx, struct gpu_array_tile);
|
||||
if (!tile)
|
||||
return NULL;
|
||||
|
||||
tile->ctx = ctx;
|
||||
tile->bound = isl_alloc_array(ctx, struct gpu_array_bound, n_index);
|
||||
if (!tile->bound)
|
||||
return gpu_array_tile_free(tile);
|
||||
|
||||
tile->n = n_index;
|
||||
|
||||
for (i = 0; i < n_index; ++i) {
|
||||
tile->bound[i].size = NULL;
|
||||
tile->bound[i].lb = NULL;
|
||||
tile->bound[i].stride = NULL;
|
||||
tile->bound[i].shift = NULL;
|
||||
}
|
||||
|
||||
return tile;
|
||||
}
|
||||
|
||||
/* Compute the size of the tile specified by "tile"
|
||||
* in number of elements and return the result.
|
||||
*/
|
||||
__isl_give isl_val *gpu_array_tile_size(struct gpu_array_tile *tile)
|
||||
{
|
||||
int i;
|
||||
isl_val *size;
|
||||
|
||||
if (!tile)
|
||||
return NULL;
|
||||
|
||||
size = isl_val_one(tile->ctx);
|
||||
|
||||
for (i = 0; i < tile->n; ++i)
|
||||
size = isl_val_mul(size, isl_val_copy(tile->bound[i].size));
|
||||
|
||||
return size;
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
#ifndef GPU_ARRAY_TILE_H
|
||||
#define GPU_ARRAY_TILE_H
|
||||
|
||||
#include <isl/aff_type.h>
|
||||
#include <isl/map_type.h>
|
||||
#include <isl/val.h>
|
||||
|
||||
/* The fields stride and shift only contain valid information
|
||||
* if shift != NULL.
|
||||
* If so, they express that current index is such that if you add shift,
|
||||
* then the result is always a multiple of stride.
|
||||
* Let D represent the initial group->depth dimensions of the computed schedule.
|
||||
* The spaces of "lb" and "shift" are of the form
|
||||
*
|
||||
* D -> [b]
|
||||
*/
|
||||
struct gpu_array_bound {
|
||||
isl_val *size;
|
||||
isl_aff *lb;
|
||||
|
||||
isl_val *stride;
|
||||
isl_aff *shift;
|
||||
};
|
||||
|
||||
/* A tile of an array.
|
||||
*
|
||||
* requires_unroll is set if the schedule dimensions that are mapped
|
||||
* to threads need to be unrolled for this (private) tile to be used.
|
||||
*
|
||||
* n is the dimension of the array.
|
||||
* bound is an array of size "n" representing the lower bound
|
||||
* and size for each index.
|
||||
*
|
||||
* tiling maps a tile in the global array to the corresponding
|
||||
* shared/private memory tile and is of the form
|
||||
*
|
||||
* { [D[i] -> A[a]] -> T[(a + shift(i))/stride - lb(i)] }
|
||||
*
|
||||
* where D represents the initial group->depth dimensions
|
||||
* of the computed schedule.
|
||||
*/
|
||||
struct gpu_array_tile {
|
||||
isl_ctx *ctx;
|
||||
int requires_unroll;
|
||||
int n;
|
||||
struct gpu_array_bound *bound;
|
||||
isl_multi_aff *tiling;
|
||||
};
|
||||
|
||||
struct gpu_array_tile *gpu_array_tile_create(isl_ctx *ctx, int n_index);
|
||||
struct gpu_array_tile *gpu_array_tile_free(struct gpu_array_tile *tile);
|
||||
|
||||
__isl_give isl_val *gpu_array_tile_size(struct gpu_array_tile *tile);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,67 @@
|
|||
#ifndef GPU_GROUP_H
|
||||
#define GPU_GROUP_H
|
||||
|
||||
#include <isl/schedule_node.h>
|
||||
#include "gpu.h"
|
||||
|
||||
/* A group of array references in a kernel that should be handled together.
|
||||
* If private_tile is not NULL, then it is mapped to registers.
|
||||
* Otherwise, if shared_tile is not NULL, it is mapped to shared memory.
|
||||
* Otherwise, it is accessed from global memory.
|
||||
* Note that if both private_tile and shared_tile are set, then shared_tile
|
||||
* is only used inside group_common_shared_memory_tile.
|
||||
* "depth" reflects the number of schedule dimensions that affect the tile
|
||||
* (private_tile if set; shared_tile if shared_tile is set and private_tile
|
||||
* is not). The copying into and/or out of the tile is performed at that
|
||||
* depth.
|
||||
*/
|
||||
struct gpu_array_ref_group {
|
||||
/* The references in this group access this local array. */
|
||||
struct gpu_local_array_info *local_array;
|
||||
/* This is the corresponding array. */
|
||||
struct gpu_array_info *array;
|
||||
/* Position of this group in the list of reference groups of array. */
|
||||
int nr;
|
||||
|
||||
/* The following fields are use during the construction of the groups.
|
||||
* access is the combined access relation relative to the shared
|
||||
* memory tiling. In particular, the domain of the map corresponds
|
||||
* to the first shared_schedule_dim dimensions of the kernel schedule.
|
||||
* write is set if any access in the group is a write.
|
||||
* exact_write is set if all writes are definite writes.
|
||||
* slice is set if there is at least one access in the group
|
||||
* that refers to more than one element
|
||||
*/
|
||||
isl_map *access;
|
||||
int write;
|
||||
int exact_write;
|
||||
int slice;
|
||||
|
||||
/* The shared memory tile, NULL if none. */
|
||||
struct gpu_array_tile *shared_tile;
|
||||
|
||||
/* The private memory tile, NULL if none. */
|
||||
struct gpu_array_tile *private_tile;
|
||||
|
||||
int depth;
|
||||
|
||||
/* References in this group; point to elements of a linked list. */
|
||||
int n_ref;
|
||||
struct gpu_stmt_access **refs;
|
||||
};
|
||||
|
||||
int gpu_group_references(struct ppcg_kernel *kernel,
|
||||
__isl_keep isl_schedule_node *node);
|
||||
|
||||
__isl_give isl_printer *gpu_array_ref_group_print_name(
|
||||
struct gpu_array_ref_group *group, __isl_take isl_printer *p);
|
||||
void gpu_array_ref_group_compute_tiling(struct gpu_array_ref_group *group);
|
||||
__isl_give isl_union_map *gpu_array_ref_group_access_relation(
|
||||
struct gpu_array_ref_group *group, int read, int write);
|
||||
int gpu_array_ref_group_requires_unroll(struct gpu_array_ref_group *group);
|
||||
struct gpu_array_tile *gpu_array_ref_group_tile(
|
||||
struct gpu_array_ref_group *group);
|
||||
struct gpu_array_ref_group *gpu_array_ref_group_free(
|
||||
struct gpu_array_ref_group *group);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Copyright 2012 Ecole Normale Superieure
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege,
|
||||
* Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <isl/aff.h>
|
||||
|
||||
#include "gpu_print.h"
|
||||
#include "print.h"
|
||||
#include "schedule.h"
|
||||
|
||||
/* Print declarations to "p" for arrays that are local to "prog"
|
||||
* but that are used on the host and therefore require a declaration.
|
||||
*/
|
||||
__isl_give isl_printer *gpu_print_local_declarations(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog)
|
||||
{
|
||||
int i;
|
||||
isl_ast_build *build;
|
||||
|
||||
if (!prog)
|
||||
return isl_printer_free(p);
|
||||
|
||||
build = isl_ast_build_from_context(isl_set_copy(prog->scop->context));
|
||||
for (i = 0; i < prog->n_array; ++i) {
|
||||
if (!prog->array[i].declare_local)
|
||||
continue;
|
||||
p = ppcg_print_declaration(p, prog->scop->pet->arrays[i],
|
||||
build);
|
||||
}
|
||||
isl_ast_build_free(build);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print an expression for the size of "array" in bytes.
|
||||
*/
|
||||
__isl_give isl_printer *gpu_array_info_print_size(__isl_take isl_printer *prn,
|
||||
struct gpu_array_info *array)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < array->n_index; ++i) {
|
||||
prn = isl_printer_print_str(prn, "(");
|
||||
prn = isl_printer_print_pw_aff(prn, array->bound[i]);
|
||||
prn = isl_printer_print_str(prn, ") * ");
|
||||
}
|
||||
prn = isl_printer_print_str(prn, "sizeof(");
|
||||
prn = isl_printer_print_str(prn, array->type);
|
||||
prn = isl_printer_print_str(prn, ")");
|
||||
|
||||
return prn;
|
||||
}
|
||||
|
||||
/* Print the declaration of a non-linearized array argument.
|
||||
*/
|
||||
static __isl_give isl_printer *print_non_linearized_declaration_argument(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array)
|
||||
{
|
||||
int i;
|
||||
|
||||
p = isl_printer_print_str(p, array->type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
|
||||
for (i = 0; i < array->n_index; i++) {
|
||||
p = isl_printer_print_str(p, "[");
|
||||
p = isl_printer_print_pw_aff(p, array->bound[i]);
|
||||
p = isl_printer_print_str(p, "]");
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the declaration of an array argument.
|
||||
* "memory_space" allows to specify a memory space prefix.
|
||||
*/
|
||||
__isl_give isl_printer *gpu_array_info_print_declaration_argument(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array,
|
||||
const char *memory_space)
|
||||
{
|
||||
if (gpu_array_is_read_only_scalar(array)) {
|
||||
p = isl_printer_print_str(p, array->type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
return p;
|
||||
}
|
||||
|
||||
if (memory_space) {
|
||||
p = isl_printer_print_str(p, memory_space);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
}
|
||||
|
||||
if (array->n_index != 0 && !array->linearize)
|
||||
return print_non_linearized_declaration_argument(p, array);
|
||||
|
||||
p = isl_printer_print_str(p, array->type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
p = isl_printer_print_str(p, "*");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print the call of an array argument.
|
||||
*/
|
||||
__isl_give isl_printer *gpu_array_info_print_call_argument(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array)
|
||||
{
|
||||
if (gpu_array_is_read_only_scalar(array))
|
||||
return isl_printer_print_str(p, array->name);
|
||||
|
||||
p = isl_printer_print_str(p, "dev_");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print an access to the element in the private/shared memory copy
|
||||
* described by "stmt". The index of the copy is recorded in
|
||||
* stmt->local_index as an access to the array.
|
||||
*/
|
||||
static __isl_give isl_printer *stmt_print_local_index(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt)
|
||||
{
|
||||
return isl_printer_print_ast_expr(p, stmt->u.c.local_index);
|
||||
}
|
||||
|
||||
/* Print an access to the element in the global memory copy
|
||||
* described by "stmt". The index of the copy is recorded in
|
||||
* stmt->index as an access to the array.
|
||||
*
|
||||
* The copy in global memory has been linearized, so we need to take
|
||||
* the array size into account.
|
||||
*/
|
||||
static __isl_give isl_printer *stmt_print_global_index(
|
||||
__isl_take isl_printer *p, struct ppcg_kernel_stmt *stmt)
|
||||
{
|
||||
int i;
|
||||
struct gpu_array_info *array = stmt->u.c.array;
|
||||
struct gpu_local_array_info *local = stmt->u.c.local_array;
|
||||
isl_ast_expr *index;
|
||||
|
||||
if (gpu_array_is_scalar(array)) {
|
||||
if (!gpu_array_is_read_only_scalar(array))
|
||||
p = isl_printer_print_str(p, "*");
|
||||
p = isl_printer_print_str(p, array->name);
|
||||
return p;
|
||||
}
|
||||
|
||||
index = isl_ast_expr_copy(stmt->u.c.index);
|
||||
if (array->linearize)
|
||||
index = gpu_local_array_info_linearize_index(local, index);
|
||||
|
||||
p = isl_printer_print_ast_expr(p, index);
|
||||
isl_ast_expr_free(index);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a copy statement.
|
||||
*
|
||||
* A read copy statement is printed as
|
||||
*
|
||||
* local = global;
|
||||
*
|
||||
* while a write copy statement is printed as
|
||||
*
|
||||
* global = local;
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_kernel_print_copy(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
if (stmt->u.c.read) {
|
||||
p = stmt_print_local_index(p, stmt);
|
||||
p = isl_printer_print_str(p, " = ");
|
||||
p = stmt_print_global_index(p, stmt);
|
||||
} else {
|
||||
p = stmt_print_global_index(p, stmt);
|
||||
p = isl_printer_print_str(p, " = ");
|
||||
p = stmt_print_local_index(p, stmt);
|
||||
}
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
__isl_give isl_printer *ppcg_kernel_print_domain(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt)
|
||||
{
|
||||
return pet_stmt_print_body(stmt->u.d.stmt->stmt, p, stmt->u.d.ref2expr);
|
||||
}
|
||||
|
||||
/* Was the definition of "type" printed before?
|
||||
* That is, does its name appear in the list of printed types "types"?
|
||||
*/
|
||||
static int already_printed(struct gpu_types *types,
|
||||
struct pet_type *type)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < types->n; ++i)
|
||||
if (!strcmp(types->name[i], type->name))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Print the definitions of all types prog->scop that have not been
|
||||
* printed before (according to "types") on "p".
|
||||
* Extend the list of printed types "types" with the newly printed types.
|
||||
*/
|
||||
__isl_give isl_printer *gpu_print_types(__isl_take isl_printer *p,
|
||||
struct gpu_types *types, struct gpu_prog *prog)
|
||||
{
|
||||
int i, n;
|
||||
isl_ctx *ctx;
|
||||
char **name;
|
||||
|
||||
n = prog->scop->pet->n_type;
|
||||
|
||||
if (n == 0)
|
||||
return p;
|
||||
|
||||
ctx = isl_printer_get_ctx(p);
|
||||
name = isl_realloc_array(ctx, types->name, char *, types->n + n);
|
||||
if (!name)
|
||||
return isl_printer_free(p);
|
||||
types->name = name;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
struct pet_type *type = prog->scop->pet->types[i];
|
||||
|
||||
if (already_printed(types, type))
|
||||
continue;
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, type->definition);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
types->name[types->n++] = strdup(type->name);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#ifndef GPU_PRINT_H
|
||||
#define GPU_PRINT_H
|
||||
|
||||
#include "gpu.h"
|
||||
|
||||
__isl_give isl_printer *gpu_print_local_declarations(__isl_take isl_printer *p,
|
||||
struct gpu_prog *prog);
|
||||
|
||||
__isl_give isl_printer *gpu_print_types(__isl_take isl_printer *p,
|
||||
struct gpu_types *types, struct gpu_prog *prog);
|
||||
|
||||
__isl_give isl_printer *gpu_array_info_print_size(__isl_take isl_printer *prn,
|
||||
struct gpu_array_info *array);
|
||||
__isl_give isl_printer *gpu_array_info_print_declaration_argument(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array,
|
||||
const char *memory_space);
|
||||
__isl_give isl_printer *gpu_array_info_print_call_argument(
|
||||
__isl_take isl_printer *p, struct gpu_array_info *array);
|
||||
|
||||
__isl_give isl_printer *ppcg_kernel_print_copy(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt);
|
||||
__isl_give isl_printer *ppcg_kernel_print_domain(__isl_take isl_printer *p,
|
||||
struct ppcg_kernel_stmt *stmt);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,542 @@
|
|||
/*
|
||||
* Copyright 2013 Ecole Normale Superieure
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege,
|
||||
* Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <isl/set.h>
|
||||
#include <isl/union_set.h>
|
||||
|
||||
#include "gpu_tree.h"
|
||||
|
||||
/* The functions in this file are used to navigate part of a schedule tree
|
||||
* that is mapped to blocks. Initially, this part consists of a linear
|
||||
* branch segment with a mark node with name "kernel" on the outer end
|
||||
* and a mark node with name "thread" on the inner end.
|
||||
* During the mapping to blocks, branching may be introduced, but only
|
||||
* one of the elements in each sequence contains the "thread" mark.
|
||||
* The filter of this element (and only this filter) contains
|
||||
* domain elements identified by the "core" argument of the functions
|
||||
* that move down this tree.
|
||||
*
|
||||
* Synchronization statements have a name that starts with "sync" and
|
||||
* a user pointer pointing to the kernel that contains the synchronization.
|
||||
* The functions inserting or detecting synchronizations take a ppcg_kernel
|
||||
* argument to be able to create or identify such statements.
|
||||
* They may also use two fields in this structure, the "core" field
|
||||
* to move around in the tree and the "n_sync" field to make sure that
|
||||
* each synchronization has a different name (within the kernel).
|
||||
*/
|
||||
|
||||
/* Is "node" a mark node with an identifier called "name"?
|
||||
*/
|
||||
static int is_marked(__isl_keep isl_schedule_node *node, const char *name)
|
||||
{
|
||||
isl_id *mark;
|
||||
int has_name;
|
||||
|
||||
if (!node)
|
||||
return -1;
|
||||
|
||||
if (isl_schedule_node_get_type(node) != isl_schedule_node_mark)
|
||||
return 0;
|
||||
|
||||
mark = isl_schedule_node_mark_get_id(node);
|
||||
if (!mark)
|
||||
return -1;
|
||||
|
||||
has_name = !strcmp(isl_id_get_name(mark), name);
|
||||
isl_id_free(mark);
|
||||
|
||||
return has_name;
|
||||
}
|
||||
|
||||
/* Is "node" a mark node with an identifier called "kernel"?
|
||||
*/
|
||||
int gpu_tree_node_is_kernel(__isl_keep isl_schedule_node *node)
|
||||
{
|
||||
return is_marked(node, "kernel");
|
||||
}
|
||||
|
||||
/* Is "node" a mark node with an identifier called "thread"?
|
||||
*/
|
||||
static int node_is_thread(__isl_keep isl_schedule_node *node)
|
||||
{
|
||||
return is_marked(node, "thread");
|
||||
}
|
||||
|
||||
/* Assuming "node" is a filter node, does it correspond to the branch
|
||||
* that contains the "thread" mark, i.e., does it contain any elements
|
||||
* in "core"?
|
||||
*/
|
||||
static int node_is_core(__isl_keep isl_schedule_node *node,
|
||||
__isl_keep isl_union_set *core)
|
||||
{
|
||||
int disjoint;
|
||||
isl_union_set *filter;
|
||||
|
||||
filter = isl_schedule_node_filter_get_filter(node);
|
||||
disjoint = isl_union_set_is_disjoint(filter, core);
|
||||
isl_union_set_free(filter);
|
||||
if (disjoint < 0)
|
||||
return -1;
|
||||
|
||||
return !disjoint;
|
||||
}
|
||||
|
||||
/* Move to the only child of "node" that has the "thread" mark as descendant,
|
||||
* where the branch containing this mark is identified by the domain elements
|
||||
* in "core".
|
||||
*
|
||||
* If "node" is not a sequence, then it only has one child and we move
|
||||
* to that single child.
|
||||
* Otherwise, we check each of the filters in the children, pick
|
||||
* the one that corresponds to "core" and return a pointer to the child
|
||||
* of the filter node.
|
||||
*/
|
||||
static __isl_give isl_schedule_node *core_child(
|
||||
__isl_take isl_schedule_node *node, __isl_keep isl_union_set *core)
|
||||
{
|
||||
int i, n;
|
||||
|
||||
if (isl_schedule_node_get_type(node) != isl_schedule_node_sequence)
|
||||
return isl_schedule_node_child(node, 0);
|
||||
|
||||
n = isl_schedule_node_n_children(node);
|
||||
for (i = 0; i < n; ++i) {
|
||||
int is_core;
|
||||
|
||||
node = isl_schedule_node_child(node, i);
|
||||
is_core = node_is_core(node, core);
|
||||
|
||||
if (is_core < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (is_core)
|
||||
return isl_schedule_node_child(node, 0);
|
||||
|
||||
node = isl_schedule_node_parent(node);
|
||||
}
|
||||
|
||||
isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
|
||||
"core child not found", return isl_schedule_node_free(node));
|
||||
}
|
||||
|
||||
/* Move down the branch between "kernel" and "thread" until
|
||||
* the "thread" mark is reached, where the branch containing the "thread"
|
||||
* mark is identified by the domain elements in "core".
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_down_to_thread(
|
||||
__isl_take isl_schedule_node *node, __isl_keep isl_union_set *core)
|
||||
{
|
||||
int is_thread;
|
||||
|
||||
while ((is_thread = node_is_thread(node)) == 0)
|
||||
node = core_child(node, core);
|
||||
if (is_thread < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Move up the tree underneath the "thread" mark until
|
||||
* the "thread" mark is reached.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_up_to_thread(
|
||||
__isl_take isl_schedule_node *node)
|
||||
{
|
||||
int is_thread;
|
||||
|
||||
while ((is_thread = node_is_thread(node)) == 0)
|
||||
node = isl_schedule_node_parent(node);
|
||||
if (is_thread < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Move up the tree underneath the "kernel" mark until
|
||||
* the "kernel" mark is reached.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_up_to_kernel(
|
||||
__isl_take isl_schedule_node *node)
|
||||
{
|
||||
int is_kernel;
|
||||
|
||||
while ((is_kernel = gpu_tree_node_is_kernel(node)) == 0)
|
||||
node = isl_schedule_node_parent(node);
|
||||
if (is_kernel < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Move down from the "kernel" mark (or at least a node with schedule
|
||||
* depth smaller than or equal to "depth") to a band node at schedule
|
||||
* depth "depth". The "thread" mark is assumed to have a schedule
|
||||
* depth greater than or equal to "depth". The branch containing the
|
||||
* "thread" mark is identified by the domain elements in "core".
|
||||
*
|
||||
* If the desired schedule depth is in the middle of band node,
|
||||
* then the band node is split into two pieces, the second piece
|
||||
* at the desired schedule depth.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_down_to_depth(
|
||||
__isl_take isl_schedule_node *node, int depth,
|
||||
__isl_keep isl_union_set *core)
|
||||
{
|
||||
int is_thread;
|
||||
|
||||
while (node && isl_schedule_node_get_schedule_depth(node) < depth) {
|
||||
if (isl_schedule_node_get_type(node) ==
|
||||
isl_schedule_node_band) {
|
||||
int node_depth, node_dim;
|
||||
node_depth = isl_schedule_node_get_schedule_depth(node);
|
||||
node_dim = isl_schedule_node_band_n_member(node);
|
||||
if (node_depth + node_dim > depth)
|
||||
node = isl_schedule_node_band_split(node,
|
||||
depth - node_depth);
|
||||
}
|
||||
node = core_child(node, core);
|
||||
}
|
||||
while ((is_thread = node_is_thread(node)) == 0 &&
|
||||
isl_schedule_node_get_type(node) != isl_schedule_node_band)
|
||||
node = core_child(node, core);
|
||||
if (is_thread < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Create a union set containing a single set with a tuple identifier
|
||||
* called "syncX" and user pointer equal to "kernel".
|
||||
*/
|
||||
static __isl_give isl_union_set *create_sync_domain(struct ppcg_kernel *kernel)
|
||||
{
|
||||
isl_space *space;
|
||||
isl_id *id;
|
||||
char name[40];
|
||||
|
||||
space = isl_space_set_alloc(kernel->ctx, 0, 0);
|
||||
snprintf(name, sizeof(name), "sync%d", kernel->n_sync++);
|
||||
id = isl_id_alloc(kernel->ctx, name, kernel);
|
||||
space = isl_space_set_tuple_id(space, isl_dim_set, id);
|
||||
return isl_union_set_from_set(isl_set_universe(space));
|
||||
}
|
||||
|
||||
/* Is "id" the identifier of a synchronization statement inside "kernel"?
|
||||
* That is, does its name start with "sync" and does it point to "kernel"?
|
||||
*/
|
||||
int gpu_tree_id_is_sync(__isl_keep isl_id *id, struct ppcg_kernel *kernel)
|
||||
{
|
||||
const char *name;
|
||||
|
||||
name = isl_id_get_name(id);
|
||||
if (!name)
|
||||
return 0;
|
||||
else if (strncmp(name, "sync", 4))
|
||||
return 0;
|
||||
return isl_id_get_user(id) == kernel;
|
||||
}
|
||||
|
||||
/* Does "domain" consist of a single set with a tuple identifier
|
||||
* corresponding to a synchronization for "kernel"?
|
||||
*/
|
||||
static int domain_is_sync(__isl_keep isl_union_set *domain,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int is_sync;
|
||||
isl_id *id;
|
||||
isl_set *set;
|
||||
|
||||
if (isl_union_set_n_set(domain) != 1)
|
||||
return 0;
|
||||
set = isl_set_from_union_set(isl_union_set_copy(domain));
|
||||
id = isl_set_get_tuple_id(set);
|
||||
is_sync = gpu_tree_id_is_sync(id, kernel);
|
||||
isl_id_free(id);
|
||||
isl_set_free(set);
|
||||
|
||||
return is_sync;
|
||||
}
|
||||
|
||||
/* Does "node" point to a filter selecting a synchronization statement
|
||||
* for "kernel"?
|
||||
*/
|
||||
static int node_is_sync_filter(__isl_keep isl_schedule_node *node,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int is_sync;
|
||||
enum isl_schedule_node_type type;
|
||||
isl_union_set *domain;
|
||||
|
||||
if (!node)
|
||||
return -1;
|
||||
type = isl_schedule_node_get_type(node);
|
||||
if (type != isl_schedule_node_filter)
|
||||
return 0;
|
||||
domain = isl_schedule_node_filter_get_filter(node);
|
||||
is_sync = domain_is_sync(domain, kernel);
|
||||
isl_union_set_free(domain);
|
||||
|
||||
return is_sync;
|
||||
}
|
||||
|
||||
/* Is "node" part of a sequence with a previous synchronization statement
|
||||
* for "kernel"?
|
||||
* That is, is the parent of "node" a filter such that there is
|
||||
* a previous filter that picks out exactly such a synchronization statement?
|
||||
*/
|
||||
static int has_preceding_sync(__isl_keep isl_schedule_node *node,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int found = 0;
|
||||
|
||||
node = isl_schedule_node_copy(node);
|
||||
node = isl_schedule_node_parent(node);
|
||||
while (!found && isl_schedule_node_has_previous_sibling(node)) {
|
||||
node = isl_schedule_node_previous_sibling(node);
|
||||
if (!node)
|
||||
break;
|
||||
found = node_is_sync_filter(node, kernel);
|
||||
}
|
||||
if (!node)
|
||||
found = -1;
|
||||
isl_schedule_node_free(node);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/* Is "node" part of a sequence with a subsequent synchronization statement
|
||||
* for "kernel"?
|
||||
* That is, is the parent of "node" a filter such that there is
|
||||
* a subsequent filter that picks out exactly such a synchronization statement?
|
||||
*/
|
||||
static int has_following_sync(__isl_keep isl_schedule_node *node,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int found = 0;
|
||||
|
||||
node = isl_schedule_node_copy(node);
|
||||
node = isl_schedule_node_parent(node);
|
||||
while (!found && isl_schedule_node_has_next_sibling(node)) {
|
||||
node = isl_schedule_node_next_sibling(node);
|
||||
if (!node)
|
||||
break;
|
||||
found = node_is_sync_filter(node, kernel);
|
||||
}
|
||||
if (!node)
|
||||
found = -1;
|
||||
isl_schedule_node_free(node);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
/* Does the subtree rooted at "node" (which is a band node) contain
|
||||
* any synchronization statement for "kernel" that precedes
|
||||
* the core computation of "kernel" (identified by the elements
|
||||
* in kernel->core)?
|
||||
*/
|
||||
static int has_sync_before_core(__isl_keep isl_schedule_node *node,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync = 0;
|
||||
int is_thread;
|
||||
|
||||
node = isl_schedule_node_copy(node);
|
||||
while ((is_thread = node_is_thread(node)) == 0) {
|
||||
node = core_child(node, kernel->core);
|
||||
has_sync = has_preceding_sync(node, kernel);
|
||||
if (has_sync < 0 || has_sync)
|
||||
break;
|
||||
}
|
||||
if (is_thread < 0 || !node)
|
||||
has_sync = -1;
|
||||
isl_schedule_node_free(node);
|
||||
|
||||
return has_sync;
|
||||
}
|
||||
|
||||
/* Does the subtree rooted at "node" (which is a band node) contain
|
||||
* any synchronization statement for "kernel" that follows
|
||||
* the core computation of "kernel" (identified by the elements
|
||||
* in kernel->core)?
|
||||
*/
|
||||
static int has_sync_after_core(__isl_keep isl_schedule_node *node,
|
||||
struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync = 0;
|
||||
int is_thread;
|
||||
|
||||
node = isl_schedule_node_copy(node);
|
||||
while ((is_thread = node_is_thread(node)) == 0) {
|
||||
node = core_child(node, kernel->core);
|
||||
has_sync = has_following_sync(node, kernel);
|
||||
if (has_sync < 0 || has_sync)
|
||||
break;
|
||||
}
|
||||
if (is_thread < 0 || !node)
|
||||
has_sync = -1;
|
||||
isl_schedule_node_free(node);
|
||||
|
||||
return has_sync;
|
||||
}
|
||||
|
||||
/* Insert (or extend) an extension on top of "node" that puts
|
||||
* a synchronization node for "kernel" before "node".
|
||||
* Return a pointer to the original node in the updated schedule tree.
|
||||
*/
|
||||
static __isl_give isl_schedule_node *insert_sync_before(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
isl_union_set *domain;
|
||||
isl_schedule_node *graft;
|
||||
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
domain = create_sync_domain(kernel);
|
||||
graft = isl_schedule_node_from_domain(domain);
|
||||
node = isl_schedule_node_graft_before(node, graft);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Insert (or extend) an extension on top of "node" that puts
|
||||
* a synchronization node for "kernel" afater "node".
|
||||
* Return a pointer to the original node in the updated schedule tree.
|
||||
*/
|
||||
static __isl_give isl_schedule_node *insert_sync_after(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
isl_union_set *domain;
|
||||
isl_schedule_node *graft;
|
||||
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
domain = create_sync_domain(kernel);
|
||||
graft = isl_schedule_node_from_domain(domain);
|
||||
node = isl_schedule_node_graft_after(node, graft);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Insert an extension on top of "node" that puts a synchronization node
|
||||
* for "kernel" before "node" unless there already is
|
||||
* such a synchronization node.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_ensure_preceding_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync;
|
||||
|
||||
has_sync = has_preceding_sync(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
return insert_sync_before(node, kernel);
|
||||
}
|
||||
|
||||
/* Insert an extension on top of "node" that puts a synchronization node
|
||||
* for "kernel" after "node" unless there already is
|
||||
* such a synchronization node.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_ensure_following_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync;
|
||||
|
||||
has_sync = has_following_sync(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
return insert_sync_after(node, kernel);
|
||||
}
|
||||
|
||||
/* Insert an extension on top of "node" that puts a synchronization node
|
||||
* for "kernel" after "node" unless there already is such a sync node or
|
||||
* "node" itself already * contains a synchronization node following
|
||||
* the core computation of "kernel".
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_ensure_sync_after_core(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync;
|
||||
|
||||
has_sync = has_sync_after_core(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
has_sync = has_following_sync(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
return insert_sync_after(node, kernel);
|
||||
}
|
||||
|
||||
/* Move left in the sequence on top of "node" to a synchronization node
|
||||
* for "kernel".
|
||||
* If "node" itself contains a synchronization node preceding
|
||||
* the core computation of "kernel", then return "node" itself.
|
||||
* Otherwise, if "node" does not have a preceding synchronization node,
|
||||
* then create one first.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_left_to_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync;
|
||||
int is_sync;
|
||||
|
||||
has_sync = has_sync_before_core(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
node = gpu_tree_ensure_preceding_sync(node, kernel);
|
||||
node = isl_schedule_node_parent(node);
|
||||
while ((is_sync = node_is_sync_filter(node, kernel)) == 0)
|
||||
node = isl_schedule_node_previous_sibling(node);
|
||||
if (is_sync < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
node = isl_schedule_node_child(node, 0);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Move right in the sequence on top of "node" to a synchronization node
|
||||
* for "kernel".
|
||||
* If "node" itself contains a synchronization node following
|
||||
* the core computation of "kernel", then return "node" itself.
|
||||
* Otherwise, if "node" does not have a following synchronization node,
|
||||
* then create one first.
|
||||
*/
|
||||
__isl_give isl_schedule_node *gpu_tree_move_right_to_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel)
|
||||
{
|
||||
int has_sync;
|
||||
int is_sync;
|
||||
|
||||
has_sync = has_sync_after_core(node, kernel);
|
||||
if (has_sync < 0)
|
||||
return isl_schedule_node_free(node);
|
||||
if (has_sync)
|
||||
return node;
|
||||
node = gpu_tree_ensure_following_sync(node, kernel);
|
||||
node = isl_schedule_node_parent(node);
|
||||
while ((is_sync = node_is_sync_filter(node, kernel)) == 0)
|
||||
node = isl_schedule_node_next_sibling(node);
|
||||
if (is_sync < 0)
|
||||
node = isl_schedule_node_free(node);
|
||||
node = isl_schedule_node_child(node, 0);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef GPU_TREE_H
|
||||
#define GPU_TREE_H
|
||||
|
||||
#include <isl/schedule_node.h>
|
||||
|
||||
#include "gpu.h"
|
||||
|
||||
int gpu_tree_node_is_kernel(__isl_keep isl_schedule_node *node);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_up_to_thread(
|
||||
__isl_take isl_schedule_node *node);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_down_to_thread(
|
||||
__isl_take isl_schedule_node *node, __isl_keep isl_union_set *core);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_up_to_kernel(
|
||||
__isl_take isl_schedule_node *node);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_down_to_depth(
|
||||
__isl_take isl_schedule_node *node, int depth,
|
||||
__isl_keep isl_union_set *core);
|
||||
|
||||
int gpu_tree_id_is_sync(__isl_keep isl_id *id, struct ppcg_kernel *kernel);
|
||||
__isl_give isl_schedule_node *gpu_tree_ensure_sync_after_core(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel);
|
||||
__isl_give isl_schedule_node *gpu_tree_ensure_following_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_left_to_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel);
|
||||
__isl_give isl_schedule_node *gpu_tree_move_right_to_sync(
|
||||
__isl_take isl_schedule_node *node, struct ppcg_kernel *kernel);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,527 @@
|
|||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2011-01-19.21; # UTC
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# `make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
nl='
|
||||
'
|
||||
IFS=" "" $nl"
|
||||
|
||||
# set DOITPROG to echo to test this script
|
||||
|
||||
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||
doit=${DOITPROG-}
|
||||
if test -z "$doit"; then
|
||||
doit_exec=exec
|
||||
else
|
||||
doit_exec=$doit
|
||||
fi
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_glob='?'
|
||||
initialize_posix_glob='
|
||||
test "$posix_glob" != "?" || {
|
||||
if (set -f) 2>/dev/null; then
|
||||
posix_glob=
|
||||
else
|
||||
posix_glob=:
|
||||
fi
|
||||
}
|
||||
'
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
no_target_directory=
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *' '* | *'
|
||||
'* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-t) dst_arg=$2
|
||||
# Protect names problematic for `test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-T) no_target_directory=true;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
# Protect names problematic for `test' and other utilities.
|
||||
case $dst_arg in
|
||||
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call `install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
do_exit='(exit $ret); exit $ret'
|
||||
trap "ret=129; $do_exit" 1
|
||||
trap "ret=130; $do_exit" 2
|
||||
trap "ret=141; $do_exit" 13
|
||||
trap "ret=143; $do_exit" 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names problematic for `test' and other utilities.
|
||||
case $src in
|
||||
-* | [=\(\)!]) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
dst=$dst_arg
|
||||
|
||||
# If destination is a directory, append the input filename; won't work
|
||||
# if double slashes aren't ignored.
|
||||
if test -d "$dst"; then
|
||||
if test -n "$no_target_directory"; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dst=$dstdir/`basename "$src"`
|
||||
dstdir_status=0
|
||||
else
|
||||
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||
dstdir=`
|
||||
(dirname "$dst") 2>/dev/null ||
|
||||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||
X"$dst" : 'X\(//\)[^/]' \| \
|
||||
X"$dst" : 'X\(//\)$' \| \
|
||||
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||
echo X"$dst" |
|
||||
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)[^/].*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\).*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
s/.*/./; q'
|
||||
`
|
||||
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
if (umask $mkdir_umask &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writeable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/d" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
[-=\(\)!]*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
eval "$initialize_posix_glob"
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
$posix_glob set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
$posix_glob set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test X"$d" = X && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=$dstdir/_inst.$$_
|
||||
rmtmp=$dstdir/_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
|
||||
eval "$initialize_posix_glob" &&
|
||||
$posix_glob set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
$posix_glob set +f &&
|
||||
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,25 @@
|
|||
# Check if OpenCL is available and that it supports a CPU device.
|
||||
# The check for a CPU device is the same check that is performed
|
||||
# by opencl_create_device in ocl_utilities.c
|
||||
AC_DEFUN([AX_CHECK_OPENCL], [
|
||||
AC_SUBST(HAVE_OPENCL)
|
||||
HAVE_OPENCL=no
|
||||
AC_CHECK_HEADER([CL/opencl.h], [
|
||||
AC_CHECK_LIB([OpenCL], [clGetPlatformIDs], [
|
||||
SAVE_LIBS=$LIBS
|
||||
LIBS="$LIBS -lOpenCL"
|
||||
AC_MSG_CHECKING([for OpenCL CPU device])
|
||||
AC_RUN_IFELSE([AC_LANG_PROGRAM(
|
||||
[[#include <CL/opencl.h>]], [[
|
||||
cl_platform_id platform;
|
||||
cl_device_id dev;
|
||||
|
||||
if (clGetPlatformIDs(1, &platform, NULL) < 0)
|
||||
return 1;
|
||||
if (clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL) < 0)
|
||||
return 1;
|
||||
]])], [HAVE_OPENCL=yes])
|
||||
AC_MSG_RESULT($HAVE_OPENCL)
|
||||
LIBS=$SAVE_LIBS
|
||||
])])
|
||||
])
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
# Check if $CC supports openmp.
|
||||
AC_DEFUN([AX_CHECK_OPENMP], [
|
||||
AC_SUBST(HAVE_OPENMP)
|
||||
HAVE_OPENMP=no
|
||||
AC_MSG_CHECKING([for OpenMP support by $CC])
|
||||
echo | $CC -x c - -fsyntax-only -fopenmp -Werror >/dev/null 2>/dev/null
|
||||
if test $? -eq 0; then
|
||||
HAVE_OPENMP=yes
|
||||
fi
|
||||
])
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
AC_DEFUN([AX_DETECT_GIT_HEAD], [
|
||||
AC_SUBST(GIT_HEAD_ID)
|
||||
AC_SUBST(GIT_HEAD)
|
||||
AC_SUBST(GIT_HEAD_VERSION)
|
||||
if test -f $srcdir/.git/HEAD; then
|
||||
GIT_HEAD="$srcdir/.git/index"
|
||||
GIT_REPO="$srcdir/.git"
|
||||
GIT_HEAD_ID=`GIT_DIR=$GIT_REPO git describe --always`
|
||||
elif test -f $srcdir/GIT_HEAD_ID; then
|
||||
GIT_HEAD_ID=`cat $srcdir/GIT_HEAD_ID`
|
||||
else
|
||||
mysrcdir=`(cd $srcdir; pwd)`
|
||||
head=`basename $mysrcdir | sed -e 's/.*-//'`
|
||||
head2=`echo $head | sed -e 's/[^0-9a-f]//'`
|
||||
head3=`echo $head2 | sed -e 's/........................................//'`
|
||||
if test "x$head3" = "x" -a "x$head" = "x$head2"; then
|
||||
GIT_HEAD_ID="$head"
|
||||
else
|
||||
GIT_HEAD_ID="UNKNOWN"
|
||||
fi
|
||||
fi
|
||||
if test -z "$GIT_REPO" ; then
|
||||
GIT_HEAD_VERSION="$GIT_HEAD_ID"
|
||||
else
|
||||
GIT_HEAD_VERSION="\`GIT_DIR=$GIT_REPO git describe --always\`"
|
||||
fi
|
||||
])
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
AC_DEFUN([_AX_SUBMODULE],
|
||||
[
|
||||
|
||||
m4_if(m4_bregexp($3,|,choice),choice,
|
||||
[AC_ARG_WITH($2,
|
||||
[AS_HELP_STRING([--with-$1=$3],
|
||||
[Which $1 to use [default=$4]])])])
|
||||
case "system" in
|
||||
$3)
|
||||
AC_ARG_WITH($2_prefix,
|
||||
[AS_HELP_STRING([--with-$1-prefix=DIR],
|
||||
[Prefix of $1 installation])])
|
||||
AC_ARG_WITH($2_exec_prefix,
|
||||
[AS_HELP_STRING([--with-$1-exec-prefix=DIR],
|
||||
[Exec prefix of $1 installation])])
|
||||
esac
|
||||
m4_if(m4_bregexp($3,build,build),build,
|
||||
[AC_ARG_WITH($2_builddir,
|
||||
[AS_HELP_STRING([--with-$1-builddir=DIR],
|
||||
[Location of $1 builddir])])])
|
||||
if test "x$with_$2_prefix" != "x" -a "x$with_$2_exec_prefix" = "x"; then
|
||||
with_$2_exec_prefix=$with_$2_prefix
|
||||
fi
|
||||
if test "x$with_$2_prefix" != "x" -o "x$with_$2_exec_prefix" != "x"; then
|
||||
if test "x$with_$2" != "x" -a "x$with_$2" != "xsystem"; then
|
||||
AC_MSG_ERROR([Setting $with_$2_prefix implies use of system $1])
|
||||
fi
|
||||
with_$2="system"
|
||||
fi
|
||||
if test "x$with_$2_builddir" != "x"; then
|
||||
if test "x$with_$2" != "x" -a "x$with_$2" != "xbuild"; then
|
||||
AC_MSG_ERROR([Setting $with_$2_builddir implies use of build $1])
|
||||
fi
|
||||
with_$2="build"
|
||||
$2_srcdir=`echo @abs_srcdir@ | $with_$2_builddir/config.status --file=-`
|
||||
AC_MSG_NOTICE($1 sources in $$2_srcdir)
|
||||
fi
|
||||
if test "x$with_$2_exec_prefix" != "x"; then
|
||||
export PKG_CONFIG_PATH="$with_$2_exec_prefix/lib/pkgconfig${PKG_CONFIG_PATH+:$PKG_CONFIG_PATH}"
|
||||
fi
|
||||
case "$with_$2" in
|
||||
$3)
|
||||
;;
|
||||
*)
|
||||
case "$4" in
|
||||
bundled)
|
||||
if test -d $srcdir/.git -a \
|
||||
-d $srcdir/$1 -a \
|
||||
"`cd $srcdir; git submodule status $1 | cut -c1`" = '-'; then
|
||||
AC_MSG_WARN([git repo detected, but submodule $1 not initialized])
|
||||
AC_MSG_WARN([You may want to run])
|
||||
AC_MSG_WARN([ git submodule init])
|
||||
AC_MSG_WARN([ git submodule update])
|
||||
AC_MSG_WARN([ sh autogen.sh])
|
||||
fi
|
||||
if test -f $srcdir/$1/configure; then
|
||||
with_$2="bundled"
|
||||
else
|
||||
case "system" in
|
||||
$3)
|
||||
with_$2="system"
|
||||
;;
|
||||
*)
|
||||
with_$2="no"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
with_$2="$4"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
AC_MSG_CHECKING([which $1 to use])
|
||||
AC_MSG_RESULT($with_$2)
|
||||
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_SUBMODULE], [
|
||||
_AX_SUBMODULE($1, m4_bpatsubst([$1],
|
||||
[[^_abcdefghijklmnopqrstuvwxyz0123456789]],[_]), $2, $3)
|
||||
])
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,384 @@
|
|||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
|
||||
# Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 7 ltoptions.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
|
||||
|
||||
|
||||
# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ------------------------------------------
|
||||
m4_define([_LT_MANGLE_OPTION],
|
||||
[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ---------------------------------------
|
||||
# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
|
||||
# matching handler defined, dispatch to it. Other OPTION-NAMEs are
|
||||
# saved as a flag.
|
||||
m4_define([_LT_SET_OPTION],
|
||||
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
|
||||
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
[m4_warning([Unknown $1 option `$2'])])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
|
||||
# ------------------------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
m4_define([_LT_IF_OPTION],
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
|
||||
|
||||
|
||||
# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
|
||||
# -------------------------------------------------------
|
||||
# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
|
||||
# are set.
|
||||
m4_define([_LT_UNLESS_OPTIONS],
|
||||
[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
|
||||
[m4_define([$0_found])])])[]dnl
|
||||
m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
|
||||
])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
|
||||
# ----------------------------------------
|
||||
# OPTION-LIST is a space-separated list of Libtool options associated
|
||||
# with MACRO-NAME. If any OPTION has a matching handler declared with
|
||||
# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
|
||||
# the unknown option and exit.
|
||||
m4_defun([_LT_SET_OPTIONS],
|
||||
[# Set options
|
||||
m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[_LT_SET_OPTION([$1], _LT_Option)])
|
||||
|
||||
m4_if([$1],[LT_INIT],[
|
||||
dnl
|
||||
dnl Simply set some default values (i.e off) if boolean options were not
|
||||
dnl specified:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
|
||||
])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
|
||||
])
|
||||
dnl
|
||||
dnl If no reference was made to various pairs of opposing options, then
|
||||
dnl we run the default mode handler for the pair. For example, if neither
|
||||
dnl `shared' nor `disable-shared' was passed, we enable building of shared
|
||||
dnl archives by default:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
|
||||
[_LT_ENABLE_FAST_INSTALL])
|
||||
])
|
||||
])# _LT_SET_OPTIONS
|
||||
|
||||
|
||||
## --------------------------------- ##
|
||||
## Macros to handle LT_INIT options. ##
|
||||
## --------------------------------- ##
|
||||
|
||||
# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
|
||||
# -----------------------------------------
|
||||
m4_define([_LT_MANGLE_DEFUN],
|
||||
[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
|
||||
# -----------------------------------------------
|
||||
m4_define([LT_OPTION_DEFINE],
|
||||
[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
|
||||
])# LT_OPTION_DEFINE
|
||||
|
||||
|
||||
# dlopen
|
||||
# ------
|
||||
LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_DLOPEN],
|
||||
[_LT_SET_OPTION([LT_INIT], [dlopen])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `dlopen' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
|
||||
|
||||
|
||||
# win32-dll
|
||||
# ---------
|
||||
# Declare package support for building win32 dll's.
|
||||
LT_OPTION_DEFINE([LT_INIT], [win32-dll],
|
||||
[enable_win32_dll=yes
|
||||
|
||||
case $host in
|
||||
*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
|
||||
AC_CHECK_TOOL(AS, as, false)
|
||||
AC_CHECK_TOOL(DLLTOOL, dlltool, false)
|
||||
AC_CHECK_TOOL(OBJDUMP, objdump, false)
|
||||
;;
|
||||
esac
|
||||
|
||||
test -z "$AS" && AS=as
|
||||
_LT_DECL([], [AS], [1], [Assembler program])dnl
|
||||
|
||||
test -z "$DLLTOOL" && DLLTOOL=dlltool
|
||||
_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
|
||||
|
||||
test -z "$OBJDUMP" && OBJDUMP=objdump
|
||||
_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
|
||||
])# win32-dll
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
|
||||
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
|
||||
_LT_SET_OPTION([LT_INIT], [win32-dll])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `win32-dll' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
|
||||
|
||||
|
||||
# _LT_ENABLE_SHARED([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-shared flag, and supports the `shared' and
|
||||
# `disable-shared' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_SHARED],
|
||||
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([shared],
|
||||
[AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
|
||||
[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_shared=yes ;;
|
||||
no) enable_shared=no ;;
|
||||
*)
|
||||
enable_shared=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_shared=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
|
||||
|
||||
_LT_DECL([build_libtool_libs], [enable_shared], [0],
|
||||
[Whether or not to build shared libraries])
|
||||
])# _LT_ENABLE_SHARED
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-shared])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
|
||||
AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_SHARED], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_SHARED], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_STATIC([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-static flag, and support the `static' and
|
||||
# `disable-static' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_STATIC],
|
||||
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([static],
|
||||
[AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
|
||||
[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_static=yes ;;
|
||||
no) enable_static=no ;;
|
||||
*)
|
||||
enable_static=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_static=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
|
||||
|
||||
_LT_DECL([build_old_libs], [enable_static], [0],
|
||||
[Whether or not to build static libraries])
|
||||
])# _LT_ENABLE_STATIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-static])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
|
||||
AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_STATIC], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_STATIC], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
|
||||
# ----------------------------------
|
||||
# implement the --enable-fast-install flag, and support the `fast-install'
|
||||
# and `disable-fast-install' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_FAST_INSTALL],
|
||||
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([fast-install],
|
||||
[AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
|
||||
[optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_fast_install=yes ;;
|
||||
no) enable_fast_install=no ;;
|
||||
*)
|
||||
enable_fast_install=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_fast_install=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
|
||||
|
||||
_LT_DECL([fast_install], [enable_fast_install], [0],
|
||||
[Whether or not to optimize for fast installation])dnl
|
||||
])# _LT_ENABLE_FAST_INSTALL
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
|
||||
|
||||
# Old names:
|
||||
AU_DEFUN([AC_ENABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the `fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the `disable-fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
|
||||
|
||||
|
||||
# _LT_WITH_PIC([MODE])
|
||||
# --------------------
|
||||
# implement the --with-pic flag, and support the `pic-only' and `no-pic'
|
||||
# LT_INIT options.
|
||||
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
|
||||
m4_define([_LT_WITH_PIC],
|
||||
[AC_ARG_WITH([pic],
|
||||
[AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
|
||||
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
|
||||
[lt_p=${PACKAGE-default}
|
||||
case $withval in
|
||||
yes|no) pic_mode=$withval ;;
|
||||
*)
|
||||
pic_mode=default
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for lt_pkg in $withval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$lt_pkg" = "X$lt_p"; then
|
||||
pic_mode=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[pic_mode=default])
|
||||
|
||||
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
|
||||
|
||||
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
|
||||
])# _LT_WITH_PIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
|
||||
|
||||
# Old name:
|
||||
AU_DEFUN([AC_LIBTOOL_PICMODE],
|
||||
[_LT_SET_OPTION([LT_INIT], [pic-only])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `pic-only' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
|
||||
|
||||
## ----------------- ##
|
||||
## LTDL_INIT Options ##
|
||||
## ----------------- ##
|
||||
|
||||
m4_define([_LTDL_MODE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
|
||||
[m4_define([_LTDL_MODE], [nonrecursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [recursive],
|
||||
[m4_define([_LTDL_MODE], [recursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [subproject],
|
||||
[m4_define([_LTDL_MODE], [subproject])])
|
||||
|
||||
m4_define([_LTDL_TYPE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [installable],
|
||||
[m4_define([_LTDL_TYPE], [installable])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [convenience],
|
||||
[m4_define([_LTDL_TYPE], [convenience])])
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6 ltsugar.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
|
||||
|
||||
|
||||
# lt_join(SEP, ARG1, [ARG2...])
|
||||
# -----------------------------
|
||||
# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
|
||||
# associated separator.
|
||||
# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
|
||||
# versions in m4sugar had bugs.
|
||||
m4_define([lt_join],
|
||||
[m4_if([$#], [1], [],
|
||||
[$#], [2], [[$2]],
|
||||
[m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
m4_define([_lt_join],
|
||||
[m4_if([$#$2], [2], [],
|
||||
[m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
|
||||
|
||||
# lt_car(LIST)
|
||||
# lt_cdr(LIST)
|
||||
# ------------
|
||||
# Manipulate m4 lists.
|
||||
# These macros are necessary as long as will still need to support
|
||||
# Autoconf-2.59 which quotes differently.
|
||||
m4_define([lt_car], [[$1]])
|
||||
m4_define([lt_cdr],
|
||||
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
|
||||
[$#], 1, [],
|
||||
[m4_dquote(m4_shift($@))])])
|
||||
m4_define([lt_unquote], $1)
|
||||
|
||||
|
||||
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
|
||||
# ------------------------------------------
|
||||
# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
|
||||
# Note that neither SEPARATOR nor STRING are expanded; they are appended
|
||||
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
|
||||
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
|
||||
# than defined and empty).
|
||||
#
|
||||
# This macro is needed until we can rely on Autoconf 2.62, since earlier
|
||||
# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
|
||||
m4_define([lt_append],
|
||||
[m4_define([$1],
|
||||
m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
|
||||
|
||||
|
||||
|
||||
# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
|
||||
# ----------------------------------------------------------
|
||||
# Produce a SEP delimited list of all paired combinations of elements of
|
||||
# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
|
||||
# has the form PREFIXmINFIXSUFFIXn.
|
||||
# Needed until we can rely on m4_combine added in Autoconf 2.62.
|
||||
m4_define([lt_combine],
|
||||
[m4_if(m4_eval([$# > 3]), [1],
|
||||
[m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
|
||||
[[m4_foreach([_Lt_prefix], [$2],
|
||||
[m4_foreach([_Lt_suffix],
|
||||
]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
|
||||
[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
|
||||
|
||||
|
||||
# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
|
||||
# -----------------------------------------------------------------------
|
||||
# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
|
||||
# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
|
||||
m4_define([lt_if_append_uniq],
|
||||
[m4_ifdef([$1],
|
||||
[m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
|
||||
[lt_append([$1], [$2], [$3])$4],
|
||||
[$5])],
|
||||
[lt_append([$1], [$2], [$3])$4])])
|
||||
|
||||
|
||||
# lt_dict_add(DICT, KEY, VALUE)
|
||||
# -----------------------------
|
||||
m4_define([lt_dict_add],
|
||||
[m4_define([$1($2)], [$3])])
|
||||
|
||||
|
||||
# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
|
||||
# --------------------------------------------
|
||||
m4_define([lt_dict_add_subkey],
|
||||
[m4_define([$1($2:$3)], [$4])])
|
||||
|
||||
|
||||
# lt_dict_fetch(DICT, KEY, [SUBKEY])
|
||||
# ----------------------------------
|
||||
m4_define([lt_dict_fetch],
|
||||
[m4_ifval([$3],
|
||||
m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
|
||||
m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
|
||||
|
||||
|
||||
# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
|
||||
# -----------------------------------------------------------------
|
||||
m4_define([lt_if_dict_fetch],
|
||||
[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
|
||||
[$5],
|
||||
[$6])])
|
||||
|
||||
|
||||
# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
|
||||
# --------------------------------------------------------------
|
||||
m4_define([lt_dict_filter],
|
||||
[m4_if([$5], [], [],
|
||||
[lt_join(m4_quote(m4_default([$4], [[, ]])),
|
||||
lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
|
||||
[lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
|
||||
])
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# ltversion.m4 -- version numbers -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# @configure_input@
|
||||
|
||||
# serial 3337 ltversion.m4
|
||||
# This file is part of GNU Libtool
|
||||
|
||||
m4_define([LT_PACKAGE_VERSION], [2.4.2])
|
||||
m4_define([LT_PACKAGE_REVISION], [1.3337])
|
||||
|
||||
AC_DEFUN([LTVERSION_VERSION],
|
||||
[macro_version='2.4.2'
|
||||
macro_revision='1.3337'
|
||||
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
|
||||
_LT_DECL(, macro_revision, 0)
|
||||
])
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 5 lt~obsolete.m4
|
||||
|
||||
# These exist entirely to fool aclocal when bootstrapping libtool.
|
||||
#
|
||||
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
|
||||
# which have later been changed to m4_define as they aren't part of the
|
||||
# exported API, or moved to Autoconf or Automake where they belong.
|
||||
#
|
||||
# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
|
||||
# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
|
||||
# using a macro with the same name in our local m4/libtool.m4 it'll
|
||||
# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
|
||||
# and doesn't know about Autoconf macros at all.)
|
||||
#
|
||||
# So we provide this file, which has a silly filename so it's always
|
||||
# included after everything else. This provides aclocal with the
|
||||
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
|
||||
# because those macros already exist, or will be overwritten later.
|
||||
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
|
||||
#
|
||||
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
|
||||
# Yes, that means every name once taken will need to remain here until
|
||||
# we give up compatibility with versions before 1.7, at which point
|
||||
# we need to keep only those names which we still refer to.
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
|
||||
|
||||
m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
|
||||
m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
|
||||
m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
|
||||
m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
|
||||
m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
|
||||
m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
|
||||
m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
|
||||
m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
|
||||
m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
|
||||
m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
|
||||
m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
|
||||
m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
|
||||
m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
|
||||
m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
|
||||
m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
|
||||
m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
|
||||
m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
|
||||
m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
|
||||
m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
|
||||
m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
|
||||
m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
|
||||
m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
|
||||
m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
|
||||
m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
|
||||
m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
|
||||
m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
|
||||
m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
|
||||
m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
|
||||
m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
|
||||
m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
|
||||
m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
|
||||
m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
|
||||
m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])])
|
||||
m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
|
||||
m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
|
||||
m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])])
|
||||
m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
|
||||
m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
|
||||
|
|
@ -0,0 +1,331 @@
|
|||
#! /bin/sh
|
||||
# Common stub for a few missing GNU programs while installing.
|
||||
|
||||
scriptversion=2012-01-06.13; # UTC
|
||||
|
||||
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
|
||||
# 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run=:
|
||||
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
|
||||
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
|
||||
|
||||
# In the cases where this matters, `missing' is being run in the
|
||||
# srcdir already.
|
||||
if test -f configure.ac; then
|
||||
configure_ac=configure.ac
|
||||
else
|
||||
configure_ac=configure.in
|
||||
fi
|
||||
|
||||
msg="missing on your system"
|
||||
|
||||
case $1 in
|
||||
--run)
|
||||
# Try to run requested program, and just exit if it succeeds.
|
||||
run=
|
||||
shift
|
||||
"$@" && exit 0
|
||||
# Exit code 63 means version mismatch. This often happens
|
||||
# when the user try to use an ancient version of a tool on
|
||||
# a file that requires a minimum version. In this case we
|
||||
# we should proceed has if the program had been absent, or
|
||||
# if --run hadn't been passed.
|
||||
if test $? = 63; then
|
||||
run=:
|
||||
msg="probably too old"
|
||||
fi
|
||||
;;
|
||||
|
||||
-h|--h|--he|--hel|--help)
|
||||
echo "\
|
||||
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||
|
||||
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
||||
error status if there is no known handling for PROGRAM.
|
||||
|
||||
Options:
|
||||
-h, --help display this help and exit
|
||||
-v, --version output version information and exit
|
||||
--run try to run the given command, and emulate it if it fails
|
||||
|
||||
Supported PROGRAM values:
|
||||
aclocal touch file \`aclocal.m4'
|
||||
autoconf touch file \`configure'
|
||||
autoheader touch file \`config.h.in'
|
||||
autom4te touch the output file, or create a stub one
|
||||
automake touch all \`Makefile.in' files
|
||||
bison create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
flex create \`lex.yy.c', if possible, from existing .c
|
||||
help2man touch the output file
|
||||
lex create \`lex.yy.c', if possible, from existing .c
|
||||
makeinfo touch the output file
|
||||
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
|
||||
Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
|
||||
\`g' are ignored when checking the name.
|
||||
|
||||
Send bug reports to <bug-automake@gnu.org>."
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||
echo "missing $scriptversion (GNU Automake)"
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-*)
|
||||
echo 1>&2 "$0: Unknown \`$1' option"
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# normalize program name to check for.
|
||||
program=`echo "$1" | sed '
|
||||
s/^gnu-//; t
|
||||
s/^gnu//; t
|
||||
s/^g//; t'`
|
||||
|
||||
# Now exit if we have it, but it failed. Also exit now if we
|
||||
# don't have it and --version was passed (most likely to detect
|
||||
# the program). This is about non-GNU programs, so use $1 not
|
||||
# $program.
|
||||
case $1 in
|
||||
lex*|yacc*)
|
||||
# Not GNU programs, they don't have --version.
|
||||
;;
|
||||
|
||||
*)
|
||||
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
||||
# We have it, but it failed.
|
||||
exit 1
|
||||
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||
# Could not run --version or --help. This is probably someone
|
||||
# running `$TOOL --version' or `$TOOL --help' to check whether
|
||||
# $TOOL exists and not knowing $TOOL uses missing.
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# If it does not exist, or fails to run (possibly an outdated version),
|
||||
# try to emulate it.
|
||||
case $program in
|
||||
aclocal*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
|
||||
to install the \`Automake' and \`Perl' packages. Grab them from
|
||||
any GNU archive site."
|
||||
touch aclocal.m4
|
||||
;;
|
||||
|
||||
autoconf*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`${configure_ac}'. You might want to install the
|
||||
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
|
||||
archive site."
|
||||
touch configure
|
||||
;;
|
||||
|
||||
autoheader*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acconfig.h' or \`${configure_ac}'. You might want
|
||||
to install the \`Autoconf' and \`GNU m4' packages. Grab them
|
||||
from any GNU archive site."
|
||||
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
||||
test -z "$files" && files="config.h"
|
||||
touch_files=
|
||||
for f in $files; do
|
||||
case $f in
|
||||
*:*) touch_files="$touch_files "`echo "$f" |
|
||||
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
||||
*) touch_files="$touch_files $f.in";;
|
||||
esac
|
||||
done
|
||||
touch $touch_files
|
||||
;;
|
||||
|
||||
automake*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
|
||||
You might want to install the \`Automake' and \`Perl' packages.
|
||||
Grab them from any GNU archive site."
|
||||
find . -type f -name Makefile.am -print |
|
||||
sed 's/\.am$/.in/' |
|
||||
while read f; do touch "$f"; done
|
||||
;;
|
||||
|
||||
autom4te*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, but is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them.
|
||||
You can get \`$1' as part of \`Autoconf' from any GNU
|
||||
archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo "#! /bin/sh"
|
||||
echo "# Created by GNU Automake missing as a replacement of"
|
||||
echo "# $ $@"
|
||||
echo "exit 0"
|
||||
chmod +x $file
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
bison*|yacc*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' $msg. You should only need it if
|
||||
you modified a \`.y' file. You may need the \`Bison' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Bison' from any GNU archive site."
|
||||
rm -f y.tab.c y.tab.h
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG=\${$#}
|
||||
case $LASTARG in
|
||||
*.y)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.c
|
||||
fi
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.h
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f y.tab.h; then
|
||||
echo >y.tab.h
|
||||
fi
|
||||
if test ! -f y.tab.c; then
|
||||
echo 'main() { return 0; }' >y.tab.c
|
||||
fi
|
||||
;;
|
||||
|
||||
lex*|flex*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.l' file. You may need the \`Flex' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Flex' from any GNU archive site."
|
||||
rm -f lex.yy.c
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG=\${$#}
|
||||
case $LASTARG in
|
||||
*.l)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" lex.yy.c
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f lex.yy.c; then
|
||||
echo 'main() { return 0; }' >lex.yy.c
|
||||
fi
|
||||
;;
|
||||
|
||||
help2man*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a dependency of a manual page. You may need the
|
||||
\`Help2man' package in order for those modifications to take
|
||||
effect. You can get \`Help2man' from any GNU archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo ".ab help2man is required to generate this page"
|
||||
exit $?
|
||||
fi
|
||||
;;
|
||||
|
||||
makeinfo*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.texi' or \`.texinfo' file, or any other file
|
||||
indirectly affecting the aspect of the manual. The spurious
|
||||
call might also be the consequence of using a buggy \`make' (AIX,
|
||||
DU, IRIX). You might want to install the \`Texinfo' package or
|
||||
the \`GNU make' package. Grab either from any GNU archive site."
|
||||
# The file to touch is that specified with -o ...
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -z "$file"; then
|
||||
# ... or it is the one specified with @setfilename ...
|
||||
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
||||
file=`sed -n '
|
||||
/^@setfilename/{
|
||||
s/.* \([^ ]*\) *$/\1/
|
||||
p
|
||||
q
|
||||
}' $infile`
|
||||
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
||||
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
||||
fi
|
||||
# If the file does not exist, the user really needs makeinfo;
|
||||
# let's fail without touching anything.
|
||||
test -f $file || exit 1
|
||||
touch $file
|
||||
;;
|
||||
|
||||
*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, and is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them. Check the \`README' file,
|
||||
it often tells you about the needed prerequisites for installing
|
||||
this package. You may also peek at any GNU archive site, in case
|
||||
some other package would contain this missing \`$1' program."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "ocl_utilities.h"
|
||||
|
||||
/* Return the OpenCL error string for a given error number.
|
||||
*/
|
||||
const char *opencl_error_string(cl_int error)
|
||||
{
|
||||
int errorCount;
|
||||
int index;
|
||||
|
||||
static const char *errorString[] = {
|
||||
[CL_SUCCESS] = "CL_SUCCESS",
|
||||
[-CL_DEVICE_NOT_FOUND] = "CL_DEVICE_NOT_FOUND",
|
||||
[-CL_DEVICE_NOT_AVAILABLE] = "CL_DEVICE_NOT_AVAILABLE",
|
||||
[-CL_COMPILER_NOT_AVAILABLE] = "CL_COMPILER_NOT_AVAILABLE",
|
||||
[-CL_MEM_OBJECT_ALLOCATION_FAILURE] =
|
||||
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
|
||||
[-CL_OUT_OF_RESOURCES] = "CL_OUT_OF_RESOURCES",
|
||||
[-CL_OUT_OF_HOST_MEMORY] = "CL_OUT_OF_HOST_MEMORY",
|
||||
[-CL_PROFILING_INFO_NOT_AVAILABLE] =
|
||||
"CL_PROFILING_INFO_NOT_AVAILABLE",
|
||||
[-CL_MEM_COPY_OVERLAP] = "CL_MEM_COPY_OVERLAP",
|
||||
[-CL_IMAGE_FORMAT_MISMATCH] = "CL_IMAGE_FORMAT_MISMATCH",
|
||||
[-CL_IMAGE_FORMAT_NOT_SUPPORTED] =
|
||||
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
|
||||
[-CL_BUILD_PROGRAM_FAILURE] = "CL_BUILD_PROGRAM_FAILURE",
|
||||
[-CL_MAP_FAILURE] = "CL_MAP_FAILURE",
|
||||
[-CL_INVALID_VALUE] = "CL_INVALID_VALUE",
|
||||
[-CL_INVALID_DEVICE_TYPE] = "CL_INVALID_DEVICE_TYPE",
|
||||
[-CL_INVALID_PLATFORM] = "CL_INVALID_PLATFORM",
|
||||
[-CL_INVALID_DEVICE] = "CL_INVALID_DEVICE",
|
||||
[-CL_INVALID_CONTEXT] = "CL_INVALID_CONTEXT",
|
||||
[-CL_INVALID_QUEUE_PROPERTIES] = "CL_INVALID_QUEUE_PROPERTIES",
|
||||
[-CL_INVALID_COMMAND_QUEUE] = "CL_INVALID_COMMAND_QUEUE",
|
||||
[-CL_INVALID_HOST_PTR] = "CL_INVALID_HOST_PTR",
|
||||
[-CL_INVALID_MEM_OBJECT] = "CL_INVALID_MEM_OBJECT",
|
||||
[-CL_INVALID_IMAGE_FORMAT_DESCRIPTOR] =
|
||||
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
|
||||
[-CL_INVALID_IMAGE_SIZE] = "CL_INVALID_IMAGE_SIZE",
|
||||
[-CL_INVALID_SAMPLER] = "CL_INVALID_SAMPLER",
|
||||
[-CL_INVALID_BINARY] = "CL_INVALID_BINARY",
|
||||
[-CL_INVALID_BUILD_OPTIONS] = "CL_INVALID_BUILD_OPTIONS",
|
||||
[-CL_INVALID_PROGRAM] = "CL_INVALID_PROGRAM",
|
||||
[-CL_INVALID_PROGRAM_EXECUTABLE] =
|
||||
"CL_INVALID_PROGRAM_EXECUTABLE",
|
||||
[-CL_INVALID_KERNEL_NAME] = "CL_INVALID_KERNEL_NAME",
|
||||
[-CL_INVALID_KERNEL_DEFINITION] =
|
||||
"CL_INVALID_KERNEL_DEFINITION",
|
||||
[-CL_INVALID_KERNEL] = "CL_INVALID_KERNEL",
|
||||
[-CL_INVALID_ARG_INDEX] = "CL_INVALID_ARG_INDEX",
|
||||
[-CL_INVALID_ARG_VALUE] = "CL_INVALID_ARG_VALUE",
|
||||
[-CL_INVALID_ARG_SIZE] = "CL_INVALID_ARG_SIZE",
|
||||
[-CL_INVALID_KERNEL_ARGS] = "CL_INVALID_KERNEL_ARGS",
|
||||
[-CL_INVALID_WORK_DIMENSION] = "CL_INVALID_WORK_DIMENSION",
|
||||
[-CL_INVALID_WORK_GROUP_SIZE] = "CL_INVALID_WORK_GROUP_SIZE",
|
||||
[-CL_INVALID_WORK_ITEM_SIZE] = "CL_INVALID_WORK_ITEM_SIZE",
|
||||
[-CL_INVALID_GLOBAL_OFFSET] = "CL_INVALID_GLOBAL_OFFSET",
|
||||
[-CL_INVALID_EVENT_WAIT_LIST] = "CL_INVALID_EVENT_WAIT_LIST",
|
||||
[-CL_INVALID_EVENT] = "CL_INVALID_EVENT",
|
||||
[-CL_INVALID_OPERATION] = "CL_INVALID_OPERATION",
|
||||
[-CL_INVALID_GL_OBJECT] = "CL_INVALID_GL_OBJECT",
|
||||
[-CL_INVALID_BUFFER_SIZE] = "CL_INVALID_BUFFER_SIZE",
|
||||
[-CL_INVALID_MIP_LEVEL] = "CL_INVALID_MIP_LEVEL",
|
||||
[-CL_INVALID_GLOBAL_WORK_SIZE] = "CL_INVALID_GLOBAL_WORK_SIZE",
|
||||
[-CL_INVALID_PROPERTY] = "CL_INVALID_PROPERTY"
|
||||
};
|
||||
|
||||
errorCount = sizeof(errorString) / sizeof(errorString[0]);
|
||||
index = -error;
|
||||
|
||||
return (index >= 0 && index < errorCount) ?
|
||||
errorString[index] : "Unspecified Error";
|
||||
}
|
||||
|
||||
/* Find a GPU or a CPU associated with the first available platform.
|
||||
* If use_gpu is set, then this function first tries to look for a GPU
|
||||
* in the first available platform.
|
||||
* If this fails or if use_gpu is not set, then it tries to use the CPU.
|
||||
*/
|
||||
cl_device_id opencl_create_device(int use_gpu)
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id dev;
|
||||
int err;
|
||||
|
||||
err = clGetPlatformIDs(1, &platform, NULL);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Error %s while looking for a platform.\n",
|
||||
opencl_error_string(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
err = CL_DEVICE_NOT_FOUND;
|
||||
if (use_gpu)
|
||||
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev,
|
||||
NULL);
|
||||
if (err == CL_DEVICE_NOT_FOUND)
|
||||
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev,
|
||||
NULL);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Error %s while looking for a device.\n",
|
||||
opencl_error_string(err));
|
||||
exit(1);
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
/* Create an OpenCL program from a string and compile it.
|
||||
*/
|
||||
cl_program opencl_build_program_from_string(cl_context ctx, cl_device_id dev,
|
||||
const char *program_source, size_t program_size,
|
||||
const char *opencl_options)
|
||||
{
|
||||
int err;
|
||||
cl_program program;
|
||||
char *program_log;
|
||||
size_t log_size;
|
||||
|
||||
program = clCreateProgramWithSource(ctx, 1,
|
||||
&program_source, &program_size, &err);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Could not create the program\n");
|
||||
exit(1);
|
||||
}
|
||||
err = clBuildProgram(program, 0, NULL, opencl_options, NULL, NULL);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "Could not build the program.\n");
|
||||
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 0,
|
||||
NULL, &log_size);
|
||||
program_log = (char *) malloc(log_size + 1);
|
||||
program_log[log_size] = '\0';
|
||||
clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG,
|
||||
log_size + 1, program_log, NULL);
|
||||
fprintf(stderr, "%s\n", program_log);
|
||||
free(program_log);
|
||||
exit(1);
|
||||
}
|
||||
return program;
|
||||
}
|
||||
|
||||
/* Create an OpenCL program from a source file and compile it.
|
||||
*/
|
||||
cl_program opencl_build_program_from_file(cl_context ctx, cl_device_id dev,
|
||||
const char* filename, const char* opencl_options)
|
||||
{
|
||||
cl_program program;
|
||||
FILE *program_file;
|
||||
char *program_source;
|
||||
size_t program_size, read;
|
||||
|
||||
program_file = fopen(filename, "r");
|
||||
if (program_file == NULL) {
|
||||
fprintf(stderr, "Could not find the source file.\n");
|
||||
exit(1);
|
||||
}
|
||||
fseek(program_file, 0, SEEK_END);
|
||||
program_size = ftell(program_file);
|
||||
rewind(program_file);
|
||||
program_source = (char *) malloc(program_size + 1);
|
||||
program_source[program_size] = '\0';
|
||||
read = fread(program_source, sizeof(char), program_size, program_file);
|
||||
if (read != program_size) {
|
||||
fprintf(stderr, "Error while reading the kernel.\n");
|
||||
exit(1);
|
||||
}
|
||||
fclose(program_file);
|
||||
|
||||
program = opencl_build_program_from_string(ctx, dev, program_source,
|
||||
program_size, opencl_options);
|
||||
free(program_source);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef OCL_UTILITIES_H
|
||||
#define OCL_UTILITIES_H
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
/* Return the OpenCL error string for a given error number.
|
||||
*/
|
||||
const char *opencl_error_string(cl_int error);
|
||||
|
||||
/* Find a GPU or a CPU associated with the first available platform.
|
||||
* If use_gpu is set, then this function first tries to look for a GPU
|
||||
* in the first available platform.
|
||||
* If this fails or if use_gpu is not set, then it tries to use the CPU.
|
||||
*/
|
||||
cl_device_id opencl_create_device(int use_gpu);
|
||||
|
||||
/* Create an OpenCL program from a string and compile it.
|
||||
*/
|
||||
cl_program opencl_build_program_from_string(cl_context ctx, cl_device_id dev,
|
||||
const char *program_source, size_t program_size,
|
||||
const char *opencl_options);
|
||||
|
||||
/* Create an OpenCL program from a source file and compile it.
|
||||
*/
|
||||
cl_program opencl_build_program_from_file(cl_context ctx, cl_device_id dev,
|
||||
const char* filename, const char* opencl_options);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,11 @@
|
|||
#ifndef _OPENCL_H
|
||||
#define _OPENCL_H
|
||||
|
||||
#include <pet.h>
|
||||
#include "ppcg_options.h"
|
||||
#include "ppcg.h"
|
||||
|
||||
int generate_opencl(isl_ctx *ctx, struct ppcg_options *options,
|
||||
const char *input, const char *output);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
#!/bin/sh
|
||||
|
||||
keep=no
|
||||
|
||||
for option; do
|
||||
case "$option" in
|
||||
--keep)
|
||||
keep=yes
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
EXEEXT=@EXEEXT@
|
||||
VERSION=@GIT_HEAD_VERSION@
|
||||
CC="@CC@"
|
||||
CFLAGS="--std=gnu99"
|
||||
srcdir="@srcdir@"
|
||||
|
||||
if [ $keep = "yes" ]; then
|
||||
OUTDIR="opencl_test.$VERSION"
|
||||
mkdir "$OUTDIR" || exit 1
|
||||
else
|
||||
if test "x$TMPDIR" = "x"; then
|
||||
TMPDIR=/tmp
|
||||
fi
|
||||
OUTDIR=`mktemp -d $TMPDIR/ppcg.XXXXXXXXXX` || exit 1
|
||||
fi
|
||||
|
||||
run_tests () {
|
||||
subdir=$1
|
||||
ppcg_options=$2
|
||||
|
||||
echo Test with PPCG options \'$ppcg_options\'
|
||||
mkdir ${OUTDIR}/${subdir} || exit 1
|
||||
for i in $srcdir/tests/*.c; do
|
||||
echo $i
|
||||
name=`basename $i`
|
||||
name="${name%.c}"
|
||||
out_c="${OUTDIR}/${subdir}/$name.ppcg.c"
|
||||
out="${OUTDIR}/${subdir}/$name.ppcg$EXEEXT"
|
||||
options="--target=opencl --opencl-no-use-gpu $ppcg_options"
|
||||
functions="$srcdir/tests/${name}_opencl_functions.cl"
|
||||
if test -f $functions; then
|
||||
options="$options --opencl-include-file=$functions"
|
||||
options="$options --opencl-compiler-options=-I."
|
||||
fi
|
||||
./ppcg$EXEEXT $options $i -o "$out_c" || exit
|
||||
$CC $CFLAGS -I "$srcdir" "$srcdir/ocl_utilities.c" -lOpenCL \
|
||||
-I. "$out_c" -o "$out" || exit
|
||||
$out || exit
|
||||
done
|
||||
}
|
||||
|
||||
run_tests default
|
||||
run_tests embed --opencl-embed-kernel-code
|
||||
|
||||
if [ $keep = "no" ]; then
|
||||
rm -r "${OUTDIR}"
|
||||
fi
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
#!/bin/sh
|
||||
|
||||
keep=no
|
||||
verbose=no
|
||||
|
||||
for option; do
|
||||
case "$option" in
|
||||
--keep)
|
||||
keep=yes
|
||||
;;
|
||||
--verbose)
|
||||
verbose=yes
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
EXEEXT=@EXEEXT@
|
||||
DIR=@POLYBENCH_DIR@
|
||||
VERSION=@GIT_HEAD_VERSION@
|
||||
SIZE=-DMINI_DATASET
|
||||
CC="@CC@"
|
||||
HAVE_OPENCL=@HAVE_OPENCL@
|
||||
HAVE_OPENMP=@HAVE_OPENMP@
|
||||
srcdir="@srcdir@"
|
||||
if [ $keep = "yes" ]; then
|
||||
OUTDIR="out.$VERSION"
|
||||
mkdir "$OUTDIR" || exit 1
|
||||
else
|
||||
if test "x$TMPDIR" = "x"; then
|
||||
TMPDIR=/tmp
|
||||
fi
|
||||
OUTDIR=`mktemp -d $TMPDIR/ppcg.XXXXXXXXXX` || exit 1
|
||||
fi
|
||||
CPPFLAGS="-DPOLYBENCH_USE_C99_PROTO -DPOLYBENCH_DUMP_ARRAYS"
|
||||
CPPFLAGS="$CPPFLAGS $SIZE -I $DIR/utilities"
|
||||
CFLAGS="-lm --std=gnu99"
|
||||
|
||||
echo "Running tests in folder ${OUTDIR}"
|
||||
|
||||
run_tests () {
|
||||
ext=$1
|
||||
|
||||
ppcg_options=$2
|
||||
cc_options=$3
|
||||
|
||||
if [ "x$ppcg_options" = "x" ]; then
|
||||
ppcg_option_str="none"
|
||||
else
|
||||
ppcg_option_str=$ppcg_options
|
||||
fi
|
||||
|
||||
if [ "x$cc_options" = "x" ]; then
|
||||
cc_option_str="none"
|
||||
else
|
||||
cc_option_str=$cc_options
|
||||
fi
|
||||
|
||||
echo Test: $ext, ppcg options: $ppcg_option_str, CC options: $cc_option_str
|
||||
for i in `cat $DIR/utilities/benchmark_list`; do
|
||||
echo $i
|
||||
name=`basename $i`
|
||||
name=${name%.c}
|
||||
source_opt="${OUTDIR}/$name.$ext.c"
|
||||
prog_orig=${OUTDIR}/$name.orig${EXEEXT}
|
||||
prog_opt=${OUTDIR}/$name.$ext${EXEEXT}
|
||||
output_orig=${OUTDIR}/$name.orig.out
|
||||
output_opt=${OUTDIR}/$name.$ext.out
|
||||
dir=`dirname $i`
|
||||
if [ $verbose = "yes" ]; then
|
||||
echo ./ppcg$EXEEXT -I $DIR/$dir $DIR/$i \
|
||||
$CPPFLAGS -o $source_opt $ppcg_options
|
||||
fi
|
||||
./ppcg$EXEEXT -I $DIR/$dir $DIR/$i $CPPFLAGS \
|
||||
-o $source_opt $ppcg_options || exit
|
||||
$CC -I $DIR/$dir $CPPFLAGS $DIR/$i -o $prog_orig \
|
||||
$DIR/utilities/polybench.c $CFLAGS
|
||||
$prog_orig 2> $output_orig
|
||||
if [ $verbose = "yes" ]; then
|
||||
echo $CC -I $DIR/$dir $CPPFLAGS $source_opt \
|
||||
-o $prog_opt $DIR/utilities/polybench.c \
|
||||
$CFLAGS $cc_options
|
||||
fi
|
||||
$CC -I $DIR/$dir $CPPFLAGS $source_opt -o $prog_opt \
|
||||
$DIR/utilities/polybench.c $CFLAGS $cc_options || exit
|
||||
|
||||
$prog_opt 2> $output_opt
|
||||
cmp $output_orig $output_opt || exit
|
||||
done
|
||||
}
|
||||
|
||||
run_tests ppcg --target=c
|
||||
run_tests ppcg_live "--target=c --no-live-range-reordering"
|
||||
|
||||
# Test OpenMP code, if compiler supports openmp
|
||||
if [ $HAVE_OPENMP = "yes" ]; then
|
||||
run_tests ppcg_omp "--target=c --openmp" -fopenmp
|
||||
echo Introduced `grep -R 'omp parallel' "${OUTDIR}" | wc -l` '"pragma omp parallel for"'
|
||||
else
|
||||
echo Compiler does not support OpenMP. Skipping OpenMP tests.
|
||||
fi
|
||||
|
||||
if [ $HAVE_OPENCL = "yes" ]; then
|
||||
run_tests ppcg_opencl "--target=opencl --opencl-no-use-gpu" \
|
||||
"-I $srcdir $srcdir/ocl_utilities.c -lOpenCL"
|
||||
fi
|
||||
|
||||
if [ $keep = "no" ]; then
|
||||
rm -r "${OUTDIR}"
|
||||
fi
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,117 @@
|
|||
#ifndef PPCG_H
|
||||
#define PPCG_H
|
||||
|
||||
#include <isl/schedule.h>
|
||||
#include <isl/set.h>
|
||||
#include <isl/union_set.h>
|
||||
#include <isl/union_map.h>
|
||||
#include <isl/id_to_ast_expr.h>
|
||||
#include <pet.h>
|
||||
|
||||
#include "ppcg_options.h"
|
||||
|
||||
const char *ppcg_base_name(const char *filename);
|
||||
int ppcg_extract_base_name(char *name, const char *input);
|
||||
|
||||
/* Representation of the scop for use inside PPCG.
|
||||
*
|
||||
* "options" are the options specified by the user.
|
||||
* Some fields in this structure may depend on some of the options.
|
||||
*
|
||||
* "start" and "end" are file offsets of the corresponding program text.
|
||||
* "context" represents constraints on the parameters.
|
||||
* "domain" is the union of all iteration domains.
|
||||
* "call" contains the iteration domains of statements with a call expression.
|
||||
* "reads" contains all potential read accesses.
|
||||
* "tagged_reads" is the same as "reads", except that the domain is a wrapped
|
||||
* relation mapping an iteration domain to a reference identifier
|
||||
* "live_in" contains the potential read accesses that potentially
|
||||
* have no corresponding writes in the scop.
|
||||
* "may_writes" contains all potential write accesses.
|
||||
* "tagged_may_writes" is the same as "may_writes", except that the domain
|
||||
* is a wrapped relation mapping an iteration domain
|
||||
* to a reference identifier
|
||||
* "must_writes" contains all definite write accesses.
|
||||
* "tagged_must_writes" is the same as "must_writes", except that the domain
|
||||
* is a wrapped relation mapping an iteration domain
|
||||
* to a reference identifier
|
||||
* "live_out" contains the potential write accesses that are potentially
|
||||
* not killed by any kills or any other writes.
|
||||
* "tagged_must_kills" contains all definite kill accesses with
|
||||
* a reference identifier in the domain.
|
||||
*
|
||||
* "tagger" maps tagged iteration domains to the corresponding untagged
|
||||
* iteration domain.
|
||||
*
|
||||
* "independence" is the union of all independence filters.
|
||||
*
|
||||
* "dep_flow" represents the potential flow dependences.
|
||||
* "tagged_dep_flow" is the same as "dep_flow", except that both domain and
|
||||
* range are wrapped relations mapping an iteration domain to
|
||||
* a reference identifier. May be NULL if not computed.
|
||||
* "dep_false" represents the potential false (anti and output) dependences.
|
||||
* "dep_forced" represents the validity constraints that should be enforced
|
||||
* even when live-range reordering is used.
|
||||
* In particular, these constraints ensure that all live-in
|
||||
* accesses remain live-in and that all live-out accesses remain live-out
|
||||
* and that multiple potential sources for the same read are
|
||||
* executed in the original order.
|
||||
* "dep_order"/"tagged_dep_order" represents the order dependences between
|
||||
* the live range intervals in "dep_flow"/"tagged_dep_flow".
|
||||
* It is only used if the live_range_reordering
|
||||
* option is set. Otherwise it is NULL.
|
||||
* If "dep_order" is used, then "dep_false" only contains a limited
|
||||
* set of anti and output dependences.
|
||||
* "schedule" represents the (original) schedule.
|
||||
*
|
||||
* "names" contains all variable names that are in use by the scop.
|
||||
* The names are mapped to a dummy value.
|
||||
*
|
||||
* "pet" is the original pet_scop.
|
||||
*/
|
||||
struct ppcg_scop {
|
||||
struct ppcg_options *options;
|
||||
|
||||
unsigned start;
|
||||
unsigned end;
|
||||
|
||||
isl_set *context;
|
||||
isl_union_set *domain;
|
||||
isl_union_set *call;
|
||||
isl_union_map *tagged_reads;
|
||||
isl_union_map *reads;
|
||||
isl_union_map *live_in;
|
||||
isl_union_map *tagged_may_writes;
|
||||
isl_union_map *may_writes;
|
||||
isl_union_map *tagged_must_writes;
|
||||
isl_union_map *must_writes;
|
||||
isl_union_map *live_out;
|
||||
isl_union_map *tagged_must_kills;
|
||||
|
||||
isl_union_pw_multi_aff *tagger;
|
||||
|
||||
isl_union_map *independence;
|
||||
|
||||
isl_union_map *dep_flow;
|
||||
isl_union_map *tagged_dep_flow;
|
||||
isl_union_map *dep_false;
|
||||
isl_union_map *dep_forced;
|
||||
isl_union_map *dep_order;
|
||||
isl_union_map *tagged_dep_order;
|
||||
isl_schedule *schedule;
|
||||
|
||||
isl_id_to_ast_expr *names;
|
||||
|
||||
struct pet_scop *pet;
|
||||
};
|
||||
|
||||
int ppcg_scop_any_hidden_declarations(struct ppcg_scop *scop);
|
||||
__isl_give isl_id_list *ppcg_scop_generate_names(struct ppcg_scop *scop,
|
||||
int n, const char *prefix);
|
||||
|
||||
int ppcg_transform(isl_ctx *ctx, const char *input, FILE *out,
|
||||
struct ppcg_options *options,
|
||||
__isl_give isl_printer *(*fn)(__isl_take isl_printer *p,
|
||||
struct ppcg_scop *scop, void *user), void *user);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright 2010-2011 INRIA Saclay
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
|
||||
* Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
|
||||
* 91893 Orsay, France
|
||||
*/
|
||||
|
||||
#include "ppcg_options.h"
|
||||
|
||||
static struct isl_arg_choice target[] = {
|
||||
{"c", PPCG_TARGET_C},
|
||||
{"cuda", PPCG_TARGET_CUDA},
|
||||
{"opencl", PPCG_TARGET_OPENCL},
|
||||
{0}
|
||||
};
|
||||
|
||||
ISL_ARGS_START(struct ppcg_debug_options, ppcg_debug_options_args)
|
||||
ISL_ARG_BOOL(struct ppcg_debug_options, dump_schedule_constraints, 0,
|
||||
"dump-schedule-constraints", 0, "dump schedule constraints")
|
||||
ISL_ARG_BOOL(struct ppcg_debug_options, dump_schedule, 0,
|
||||
"dump-schedule", 0, "dump isl computed schedule")
|
||||
ISL_ARG_BOOL(struct ppcg_debug_options, dump_final_schedule, 0,
|
||||
"dump-final-schedule", 0, "dump PPCG computed schedule")
|
||||
ISL_ARG_BOOL(struct ppcg_debug_options, dump_sizes, 0,
|
||||
"dump-sizes", 0,
|
||||
"dump effectively used per kernel tile, grid and block sizes")
|
||||
ISL_ARG_BOOL(struct ppcg_debug_options, verbose, 'v', "verbose", 0, NULL)
|
||||
ISL_ARGS_END
|
||||
|
||||
ISL_ARGS_START(struct ppcg_options, ppcg_opencl_options_args)
|
||||
ISL_ARG_STR(struct ppcg_options, opencl_compiler_options, 0, "compiler-options",
|
||||
"options", NULL, "options to pass to the OpenCL compiler")
|
||||
ISL_ARG_BOOL(struct ppcg_options, opencl_use_gpu, 0, "use-gpu", 1,
|
||||
"use GPU device (if available)")
|
||||
ISL_ARG_STR_LIST(struct ppcg_options, opencl_n_include_file,
|
||||
opencl_include_files, 0, "include-file", "filename",
|
||||
"file to #include in generated OpenCL code")
|
||||
ISL_ARG_BOOL(struct ppcg_options, opencl_print_kernel_types, 0,
|
||||
"print-kernel-types", 1,
|
||||
"print definitions of types in the kernel file")
|
||||
ISL_ARG_BOOL(struct ppcg_options, opencl_embed_kernel_code, 0,
|
||||
"embed-kernel-code", 0, "embed kernel code into host code")
|
||||
ISL_ARGS_END
|
||||
|
||||
ISL_ARGS_START(struct ppcg_options, ppcg_options_args)
|
||||
ISL_ARG_CHILD(struct ppcg_options, debug, NULL, &ppcg_debug_options_args,
|
||||
"debugging options")
|
||||
ISL_ARG_BOOL(struct ppcg_options, reschedule, 0, "reschedule", 1,
|
||||
"replace original schedule by isl computed schedule (except C target)")
|
||||
ISL_ARG_BOOL(struct ppcg_options, scale_tile_loops, 0,
|
||||
"scale-tile-loops", 1, NULL)
|
||||
ISL_ARG_BOOL(struct ppcg_options, wrap, 0, "wrap", 1, NULL)
|
||||
ISL_ARG_BOOL(struct ppcg_options, use_shared_memory, 0, "shared-memory", 1,
|
||||
"use shared memory in kernel code")
|
||||
ISL_ARG_BOOL(struct ppcg_options, use_private_memory, 0, "private-memory", 1,
|
||||
"use private memory in kernel code")
|
||||
ISL_ARG_STR(struct ppcg_options, ctx, 0, "ctx", "context", NULL,
|
||||
"Constraints on parameters")
|
||||
ISL_ARG_BOOL(struct ppcg_options, non_negative_parameters, 0,
|
||||
"assume-non-negative-parameters", 0,
|
||||
"assume all parameters are non-negative)")
|
||||
ISL_ARG_INT(struct ppcg_options, tile_size, 'S', "tile-size", "size", 32, NULL)
|
||||
ISL_ARG_STR(struct ppcg_options, sizes, 0, "sizes", "sizes", NULL,
|
||||
"Per kernel tile, grid and block sizes")
|
||||
ISL_ARG_INT(struct ppcg_options, max_shared_memory, 0,
|
||||
"max-shared-memory", "size", 8192, "maximal amount of shared memory")
|
||||
ISL_ARG_BOOL(struct ppcg_options, openmp, 0, "openmp", 0,
|
||||
"Generate OpenMP macros (only for C target)")
|
||||
ISL_ARG_CHOICE(struct ppcg_options, target, 0, "target", target,
|
||||
PPCG_TARGET_CUDA, "the target to generate code for")
|
||||
ISL_ARG_BOOL(struct ppcg_options, linearize_device_arrays, 0,
|
||||
"linearize-device-arrays", 1,
|
||||
"linearize all device arrays, even those of fixed size")
|
||||
ISL_ARG_BOOL(struct ppcg_options, live_range_reordering, 0,
|
||||
"live-range-reordering", 1,
|
||||
"allow successive live ranges on the same memory element "
|
||||
"to be reordered")
|
||||
ISL_ARG_GROUP("opencl", &ppcg_opencl_options_args, "OpenCL options")
|
||||
ISL_ARG_STR(struct ppcg_options, save_schedule_file, 0, "save-schedule",
|
||||
"file", NULL, "save isl computed schedule to <file>")
|
||||
ISL_ARG_STR(struct ppcg_options, load_schedule_file, 0, "load-schedule",
|
||||
"file", NULL, "load schedule from <file>, "
|
||||
"using it instead of an isl computed schedule")
|
||||
ISL_ARGS_END
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
#ifndef PPCG_OPTIONS_H
|
||||
#define PPCG_OPTIONS_H
|
||||
|
||||
#include <isl/arg.h>
|
||||
|
||||
struct ppcg_debug_options {
|
||||
int dump_schedule_constraints;
|
||||
int dump_schedule;
|
||||
int dump_final_schedule;
|
||||
int dump_sizes;
|
||||
int verbose;
|
||||
};
|
||||
|
||||
struct ppcg_options {
|
||||
struct ppcg_debug_options *debug;
|
||||
|
||||
/* Use isl to compute a schedule replacing the original schedule. */
|
||||
int reschedule;
|
||||
int scale_tile_loops;
|
||||
int wrap;
|
||||
|
||||
/* Assume all parameters are non-negative. */
|
||||
int non_negative_parameters;
|
||||
char *ctx;
|
||||
char *sizes;
|
||||
|
||||
int tile_size;
|
||||
|
||||
/* Take advantage of private memory. */
|
||||
int use_private_memory;
|
||||
|
||||
/* Take advantage of shared memory. */
|
||||
int use_shared_memory;
|
||||
|
||||
/* Maximal amount of shared memory. */
|
||||
int max_shared_memory;
|
||||
|
||||
/* The target we generate code for. */
|
||||
int target;
|
||||
|
||||
/* Generate OpenMP macros (C target only). */
|
||||
int openmp;
|
||||
|
||||
/* Linearize all device arrays. */
|
||||
int linearize_device_arrays;
|
||||
|
||||
/* Allow live range to be reordered. */
|
||||
int live_range_reordering;
|
||||
|
||||
/* Options to pass to the OpenCL compiler. */
|
||||
char *opencl_compiler_options;
|
||||
/* Prefer GPU device over CPU. */
|
||||
int opencl_use_gpu;
|
||||
/* Number of files to include. */
|
||||
int opencl_n_include_file;
|
||||
/* Files to include. */
|
||||
const char **opencl_include_files;
|
||||
/* Print definitions of types in kernels. */
|
||||
int opencl_print_kernel_types;
|
||||
/* Embed OpenCL kernel code in host code. */
|
||||
int opencl_embed_kernel_code;
|
||||
|
||||
/* Name of file for saving isl computed schedule or NULL. */
|
||||
char *save_schedule_file;
|
||||
/* Name of file for loading schedule or NULL. */
|
||||
char *load_schedule_file;
|
||||
};
|
||||
|
||||
ISL_ARG_DECL(ppcg_debug_options, struct ppcg_debug_options,
|
||||
ppcg_debug_options_args)
|
||||
ISL_ARG_DECL(ppcg_options, struct ppcg_options, ppcg_options_args)
|
||||
|
||||
#define PPCG_TARGET_C 0
|
||||
#define PPCG_TARGET_CUDA 1
|
||||
#define PPCG_TARGET_OPENCL 2
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* Copyright 2012-2013 Ecole Normale Superieure
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege,
|
||||
* Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France
|
||||
*/
|
||||
|
||||
#include <isl/aff.h>
|
||||
#include <isl/ast_build.h>
|
||||
|
||||
#include "print.h"
|
||||
|
||||
__isl_give isl_printer *ppcg_start_block(__isl_take isl_printer *p)
|
||||
{
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "{");
|
||||
p = isl_printer_end_line(p);
|
||||
p = isl_printer_indent(p, 2);
|
||||
return p;
|
||||
}
|
||||
|
||||
__isl_give isl_printer *ppcg_end_block(__isl_take isl_printer *p)
|
||||
{
|
||||
p = isl_printer_indent(p, -2);
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, "}");
|
||||
p = isl_printer_end_line(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
static int print_macro(enum isl_ast_op_type type, void *user)
|
||||
{
|
||||
isl_printer **p = user;
|
||||
|
||||
if (type == isl_ast_op_fdiv_q)
|
||||
return 0;
|
||||
|
||||
*p = isl_ast_op_type_print_macro(type, *p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Print the required macros for "node", except one for floord.
|
||||
* The caller is assumed to have printed a macro for floord already
|
||||
* as it may also appear in the declarations and the statements.
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_print_macros(__isl_take isl_printer *p,
|
||||
__isl_keep isl_ast_node *node)
|
||||
{
|
||||
if (isl_ast_node_foreach_ast_op_type(node, &print_macro, &p) < 0)
|
||||
return isl_printer_free(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print "extent" as a sequence of
|
||||
*
|
||||
* [1 + maximal_value]
|
||||
*
|
||||
* one for each dimension.
|
||||
* "build" is used to simplify the size expressions, if any.
|
||||
*/
|
||||
static __isl_give isl_printer *print_extent(__isl_take isl_printer *p,
|
||||
__isl_keep isl_set *extent, __isl_keep isl_ast_build *build)
|
||||
{
|
||||
int i, n;
|
||||
|
||||
n = isl_set_dim(extent, isl_dim_set);
|
||||
if (n == 0)
|
||||
return p;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
isl_set *dom;
|
||||
isl_local_space *ls;
|
||||
isl_aff *one;
|
||||
isl_pw_aff *bound;
|
||||
isl_ast_expr *expr;
|
||||
|
||||
bound = isl_set_dim_max(isl_set_copy(extent), i);
|
||||
dom = isl_pw_aff_domain(isl_pw_aff_copy(bound));
|
||||
ls = isl_local_space_from_space(isl_set_get_space(dom));
|
||||
one = isl_aff_zero_on_domain(ls);
|
||||
one = isl_aff_add_constant_si(one, 1);
|
||||
bound = isl_pw_aff_add(bound, isl_pw_aff_alloc(dom, one));
|
||||
|
||||
p = isl_printer_print_str(p, "[");
|
||||
expr = isl_ast_build_expr_from_pw_aff(build, bound);
|
||||
p = isl_printer_print_ast_expr(p, expr);
|
||||
p = isl_printer_print_str(p, "]");
|
||||
|
||||
isl_ast_expr_free(expr);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a declaration for array "array" to "p", using "build"
|
||||
* to simplify any size expressions.
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_print_declaration(__isl_take isl_printer *p,
|
||||
struct pet_array *array, __isl_keep isl_ast_build *build)
|
||||
{
|
||||
const char *name;
|
||||
|
||||
if (!array)
|
||||
return isl_printer_free(p);
|
||||
|
||||
name = isl_set_get_tuple_name(array->extent);
|
||||
|
||||
p = isl_printer_start_line(p);
|
||||
p = isl_printer_print_str(p, array->element_type);
|
||||
p = isl_printer_print_str(p, " ");
|
||||
p = isl_printer_print_str(p, name);
|
||||
p = print_extent(p, array->extent, build);
|
||||
p = isl_printer_print_str(p, ";");
|
||||
p = isl_printer_end_line(p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print declarations for the arrays in "scop" that are declared
|
||||
* and that are exposed (if exposed == 1) or not exposed (if exposed == 0).
|
||||
*/
|
||||
static __isl_give isl_printer *print_declarations(__isl_take isl_printer *p,
|
||||
struct ppcg_scop *scop, int exposed)
|
||||
{
|
||||
int i;
|
||||
isl_ast_build *build;
|
||||
|
||||
if (!scop)
|
||||
return isl_printer_free(p);
|
||||
|
||||
build = isl_ast_build_from_context(isl_set_copy(scop->context));
|
||||
for (i = 0; i < scop->pet->n_array; ++i) {
|
||||
struct pet_array *array = scop->pet->arrays[i];
|
||||
|
||||
if (!array->declared)
|
||||
continue;
|
||||
if (array->exposed != exposed)
|
||||
continue;
|
||||
|
||||
p = ppcg_print_declaration(p, array, build);
|
||||
}
|
||||
isl_ast_build_free(build);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print declarations for the arrays in "scop" that are declared
|
||||
* and exposed to the code after the scop.
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_print_exposed_declarations(
|
||||
__isl_take isl_printer *p, struct ppcg_scop *scop)
|
||||
{
|
||||
return print_declarations(p, scop, 1);
|
||||
}
|
||||
|
||||
/* Print declarations for the arrays in "scop" that are declared,
|
||||
* but not exposed to the code after the scop.
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_print_hidden_declarations(
|
||||
__isl_take isl_printer *p, struct ppcg_scop *scop)
|
||||
{
|
||||
return print_declarations(p, scop, 0);
|
||||
}
|
||||
|
||||
/* Internal data structure for print_guarded_user.
|
||||
*
|
||||
* fn is the function that should be called to print the body.
|
||||
* user is the argument that should be passed to this function.
|
||||
*/
|
||||
struct ppcg_print_guarded_data {
|
||||
__isl_give isl_printer *(*fn)(__isl_take isl_printer *p, void *user);
|
||||
void *user;
|
||||
};
|
||||
|
||||
/* Print the body of the if statement expressing the guard passed
|
||||
* to "ppcg_print_guarded" by calling data->fn.
|
||||
*/
|
||||
static __isl_give isl_printer *print_guarded_user(__isl_take isl_printer *p,
|
||||
__isl_take isl_ast_print_options *options,
|
||||
__isl_keep isl_ast_node *node, void *user)
|
||||
{
|
||||
struct ppcg_print_guarded_data *data = user;
|
||||
|
||||
p = data->fn(p, data->user);
|
||||
|
||||
isl_ast_print_options_free(options);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Print a condition for the given "guard" within the given "context"
|
||||
* on "p", calling "fn" with "user" to print the body of the if statement.
|
||||
* If the guard is implied by the context, then no if statement is printed
|
||||
* and the body is printed directly to "p".
|
||||
*
|
||||
* Both "guard" and "context" are assumed to be parameter sets.
|
||||
*
|
||||
* We slightly abuse the AST generator to print this guard.
|
||||
* In particular, we create a trivial schedule for an iteration
|
||||
* domain with a single instance, restricted by the guard.
|
||||
*/
|
||||
__isl_give isl_printer *ppcg_print_guarded(__isl_take isl_printer *p,
|
||||
__isl_take isl_set *guard, __isl_take isl_set *context,
|
||||
__isl_give isl_printer *(*fn)(__isl_take isl_printer *p, void *user),
|
||||
void *user)
|
||||
{
|
||||
struct ppcg_print_guarded_data data = { fn, user };
|
||||
isl_ctx *ctx;
|
||||
isl_union_map *schedule;
|
||||
isl_ast_build *build;
|
||||
isl_ast_node *tree;
|
||||
isl_ast_print_options *options;
|
||||
|
||||
ctx = isl_printer_get_ctx(p);
|
||||
guard = isl_set_from_params(guard);
|
||||
schedule = isl_union_map_from_map(isl_map_from_domain(guard));
|
||||
build = isl_ast_build_from_context(context);
|
||||
tree = isl_ast_build_node_from_schedule_map(build, schedule);
|
||||
isl_ast_build_free(build);
|
||||
|
||||
options = isl_ast_print_options_alloc(ctx);
|
||||
options = isl_ast_print_options_set_print_user(options,
|
||||
&print_guarded_user, &data);
|
||||
p = isl_ast_node_print(tree, p, options);
|
||||
isl_ast_node_free(tree);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef PRINT_H
|
||||
#define PRINT_H
|
||||
|
||||
#include <isl/ast.h>
|
||||
|
||||
#include "ppcg.h"
|
||||
|
||||
__isl_give isl_printer *ppcg_start_block(__isl_take isl_printer *p);
|
||||
__isl_give isl_printer *ppcg_end_block(__isl_take isl_printer *p);
|
||||
|
||||
__isl_give isl_printer *ppcg_print_macros(__isl_take isl_printer *p,
|
||||
__isl_keep isl_ast_node *node);
|
||||
|
||||
__isl_give isl_printer *ppcg_print_declaration(__isl_take isl_printer *p,
|
||||
struct pet_array *array, __isl_keep isl_ast_build *build);
|
||||
__isl_give isl_printer *ppcg_print_exposed_declarations(
|
||||
__isl_take isl_printer *p, struct ppcg_scop *scop);
|
||||
__isl_give isl_printer *ppcg_print_hidden_declarations(
|
||||
__isl_take isl_printer *p, struct ppcg_scop *scop);
|
||||
|
||||
__isl_give isl_printer *ppcg_print_guarded(__isl_take isl_printer *p,
|
||||
__isl_take isl_set *guard, __isl_take isl_set *context,
|
||||
__isl_give isl_printer *(*fn)(__isl_take isl_printer *p, void *user),
|
||||
void *user);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
/*
|
||||
* Copyright 2010-2011 INRIA Saclay
|
||||
*
|
||||
* Use of this software is governed by the MIT license
|
||||
*
|
||||
* Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
|
||||
* Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
|
||||
* 91893 Orsay, France
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <isl/set.h>
|
||||
#include <isl/map.h>
|
||||
#include <isl/constraint.h>
|
||||
|
||||
#include "schedule.h"
|
||||
|
||||
/* Construct a map from a len-dimensional domain to
|
||||
* a (len-n)-dimensional domain that projects out the n coordinates
|
||||
* starting at first.
|
||||
* "dim" prescribes the parameters.
|
||||
*/
|
||||
__isl_give isl_map *project_out(__isl_take isl_space *dim,
|
||||
int len, int first, int n)
|
||||
{
|
||||
int i, j;
|
||||
isl_basic_map *bmap;
|
||||
|
||||
dim = isl_space_add_dims(dim, isl_dim_in, len);
|
||||
dim = isl_space_add_dims(dim, isl_dim_out, len - n);
|
||||
bmap = isl_basic_map_universe(dim);
|
||||
|
||||
for (i = 0, j = 0; i < len; ++i) {
|
||||
if (i >= first && i < first + n)
|
||||
continue;
|
||||
bmap = isl_basic_map_equate(bmap, isl_dim_in, i, isl_dim_out, j);
|
||||
++j;
|
||||
}
|
||||
|
||||
return isl_map_from_basic_map(bmap);
|
||||
}
|
||||
|
||||
/* Construct a projection that maps a src_len dimensional domain
|
||||
* to its first dst_len coordinates.
|
||||
* "dim" prescribes the parameters.
|
||||
*/
|
||||
__isl_give isl_map *projection(__isl_take isl_space *dim,
|
||||
int src_len, int dst_len)
|
||||
{
|
||||
return project_out(dim, src_len, dst_len, src_len - dst_len);
|
||||
}
|
||||
|
||||
/* Add parameters with identifiers "ids" to "set".
|
||||
*/
|
||||
static __isl_give isl_set *add_params(__isl_take isl_set *set,
|
||||
__isl_keep isl_id_list *ids)
|
||||
{
|
||||
int i, n;
|
||||
unsigned nparam;
|
||||
|
||||
n = isl_id_list_n_id(ids);
|
||||
|
||||
nparam = isl_set_dim(set, isl_dim_param);
|
||||
set = isl_set_add_dims(set, isl_dim_param, n);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
isl_id *id;
|
||||
|
||||
id = isl_id_list_get_id(ids, i);
|
||||
set = isl_set_set_dim_id(set, isl_dim_param, nparam + i, id);
|
||||
}
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
/* Equate the dimensions of "set" starting at "first" to
|
||||
* freshly created parameters with identifiers "ids".
|
||||
* The number of equated dimensions is equal to the number of elements in "ids".
|
||||
*/
|
||||
static __isl_give isl_set *parametrize(__isl_take isl_set *set,
|
||||
int first, __isl_keep isl_id_list *ids)
|
||||
{
|
||||
int i, n;
|
||||
unsigned nparam;
|
||||
|
||||
nparam = isl_set_dim(set, isl_dim_param);
|
||||
|
||||
set = add_params(set, ids);
|
||||
|
||||
n = isl_id_list_n_id(ids);
|
||||
for (i = 0; i < n; ++i)
|
||||
set = isl_set_equate(set, isl_dim_param, nparam + i,
|
||||
isl_dim_set, first + i);
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
/* Given a parameter space "space", create a set of dimension "len"
|
||||
* of which the dimensions starting at "first" are equated to
|
||||
* freshly created parameters with identifiers "ids".
|
||||
*/
|
||||
__isl_give isl_set *parametrization(__isl_take isl_space *space,
|
||||
int len, int first, __isl_keep isl_id_list *ids)
|
||||
{
|
||||
isl_set *set;
|
||||
|
||||
space = isl_space_set_from_params(space);
|
||||
space = isl_space_add_dims(space, isl_dim_set, len);
|
||||
set = isl_set_universe(space);
|
||||
|
||||
return parametrize(set, first, ids);
|
||||
}
|
||||
|
||||
/* Extend "set" with unconstrained coordinates to a total length of "dst_len".
|
||||
*/
|
||||
__isl_give isl_set *extend(__isl_take isl_set *set, int dst_len)
|
||||
{
|
||||
int n_set;
|
||||
isl_space *dim;
|
||||
isl_map *map;
|
||||
|
||||
dim = isl_set_get_space(set);
|
||||
n_set = isl_space_dim(dim, isl_dim_set);
|
||||
dim = isl_space_drop_dims(dim, isl_dim_set, 0, n_set);
|
||||
map = projection(dim, dst_len, n_set);
|
||||
map = isl_map_reverse(map);
|
||||
|
||||
return isl_set_apply(set, map);
|
||||
}
|
||||
|
||||
/* Set max_out to the maximal number of output dimensions over
|
||||
* all maps.
|
||||
*/
|
||||
static isl_stat update_max_out(__isl_take isl_map *map, void *user)
|
||||
{
|
||||
int *max_out = user;
|
||||
int n_out = isl_map_dim(map, isl_dim_out);
|
||||
|
||||
if (n_out > *max_out)
|
||||
*max_out = n_out;
|
||||
|
||||
isl_map_free(map);
|
||||
return isl_stat_ok;
|
||||
}
|
||||
|
||||
struct align_range_data {
|
||||
int max_out;
|
||||
isl_union_map *res;
|
||||
};
|
||||
|
||||
/* Extend the dimension of the range of the given map to data->max_out and
|
||||
* then add the result to data->res.
|
||||
*/
|
||||
static isl_stat map_align_range(__isl_take isl_map *map, void *user)
|
||||
{
|
||||
struct align_range_data *data = user;
|
||||
int i;
|
||||
isl_space *dim;
|
||||
isl_map *proj;
|
||||
int n_out = isl_map_dim(map, isl_dim_out);
|
||||
|
||||
dim = isl_union_map_get_space(data->res);
|
||||
proj = isl_map_reverse(projection(dim, data->max_out, n_out));
|
||||
for (i = n_out; i < data->max_out; ++i)
|
||||
proj = isl_map_fix_si(proj, isl_dim_out, i, 0);
|
||||
|
||||
map = isl_map_apply_range(map, proj);
|
||||
|
||||
data->res = isl_union_map_add_map(data->res, map);
|
||||
|
||||
return isl_stat_ok;
|
||||
}
|
||||
|
||||
/* Extend the ranges of the maps in the union map such they all have
|
||||
* the same dimension.
|
||||
*/
|
||||
__isl_give isl_union_map *align_range(__isl_take isl_union_map *umap)
|
||||
{
|
||||
struct align_range_data data;
|
||||
|
||||
data.max_out = 0;
|
||||
isl_union_map_foreach_map(umap, &update_max_out, &data.max_out);
|
||||
|
||||
data.res = isl_union_map_empty(isl_union_map_get_space(umap));
|
||||
isl_union_map_foreach_map(umap, &map_align_range, &data);
|
||||
|
||||
isl_union_map_free(umap);
|
||||
return data.res;
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
#ifndef _SCHEDULE_H
|
||||
#define _SCHEDULE_H
|
||||
|
||||
#include <isl/id.h>
|
||||
#include <isl/set_type.h>
|
||||
#include <isl/map_type.h>
|
||||
#include <isl/union_map_type.h>
|
||||
|
||||
#include <pet.h>
|
||||
|
||||
/* An access to an outer array element or an iterator.
|
||||
* Accesses to iterators have an access relation that maps to an unnamed space.
|
||||
* An access may be both read and write.
|
||||
* If the access relation is empty, then the output dimension may
|
||||
* not be equal to the dimension of the corresponding array.
|
||||
*/
|
||||
struct gpu_stmt_access {
|
||||
/* Access reads elements */
|
||||
int read;
|
||||
/* Access writes elements */
|
||||
int write;
|
||||
/* All writes are definite writes. */
|
||||
int exact_write;
|
||||
/* The number of index expressions specified in the access. */
|
||||
int n_index;
|
||||
|
||||
/* May access relation */
|
||||
isl_map *access;
|
||||
/* May access relation with as domain a mapping from iteration domain
|
||||
* to a reference identifier.
|
||||
*/
|
||||
isl_map *tagged_access;
|
||||
/* The reference id of the corresponding pet_expr. */
|
||||
isl_id *ref_id;
|
||||
|
||||
struct gpu_stmt_access *next;
|
||||
};
|
||||
|
||||
struct gpu_stmt {
|
||||
isl_id *id;
|
||||
struct pet_stmt *stmt;
|
||||
|
||||
/* Linked list of accesses. */
|
||||
struct gpu_stmt_access *accesses;
|
||||
};
|
||||
|
||||
__isl_give isl_map *project_out(__isl_take isl_space *dim,
|
||||
int len, int first, int n);
|
||||
__isl_give isl_map *projection(__isl_take isl_space *dim,
|
||||
int src_len, int dst_len);
|
||||
__isl_give isl_set *parametrization(__isl_take isl_space *space,
|
||||
int len, int first, __isl_keep isl_id_list *names);
|
||||
__isl_give isl_set *extend(__isl_take isl_set *set, int dst_len);
|
||||
__isl_give isl_union_map *align_range(__isl_take isl_union_map *umap);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
#! /bin/sh
|
||||
# test-driver - basic testsuite driver script.
|
||||
|
||||
scriptversion=2013-07-13.22; # UTC
|
||||
|
||||
# Copyright (C) 2011-2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
# Make unconditional expansion of undefined variables an error. This
|
||||
# helps a lot in preventing typo-related bugs.
|
||||
set -u
|
||||
|
||||
usage_error ()
|
||||
{
|
||||
echo "$0: $*" >&2
|
||||
print_usage >&2
|
||||
exit 2
|
||||
}
|
||||
|
||||
print_usage ()
|
||||
{
|
||||
cat <<END
|
||||
Usage:
|
||||
test-driver --test-name=NAME --log-file=PATH --trs-file=PATH
|
||||
[--expect-failure={yes|no}] [--color-tests={yes|no}]
|
||||
[--enable-hard-errors={yes|no}] [--]
|
||||
TEST-SCRIPT [TEST-SCRIPT-ARGUMENTS]
|
||||
The '--test-name', '--log-file' and '--trs-file' options are mandatory.
|
||||
END
|
||||
}
|
||||
|
||||
test_name= # Used for reporting.
|
||||
log_file= # Where to save the output of the test script.
|
||||
trs_file= # Where to save the metadata of the test run.
|
||||
expect_failure=no
|
||||
color_tests=no
|
||||
enable_hard_errors=yes
|
||||
while test $# -gt 0; do
|
||||
case $1 in
|
||||
--help) print_usage; exit $?;;
|
||||
--version) echo "test-driver $scriptversion"; exit $?;;
|
||||
--test-name) test_name=$2; shift;;
|
||||
--log-file) log_file=$2; shift;;
|
||||
--trs-file) trs_file=$2; shift;;
|
||||
--color-tests) color_tests=$2; shift;;
|
||||
--expect-failure) expect_failure=$2; shift;;
|
||||
--enable-hard-errors) enable_hard_errors=$2; shift;;
|
||||
--) shift; break;;
|
||||
-*) usage_error "invalid option: '$1'";;
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
missing_opts=
|
||||
test x"$test_name" = x && missing_opts="$missing_opts --test-name"
|
||||
test x"$log_file" = x && missing_opts="$missing_opts --log-file"
|
||||
test x"$trs_file" = x && missing_opts="$missing_opts --trs-file"
|
||||
if test x"$missing_opts" != x; then
|
||||
usage_error "the following mandatory options are missing:$missing_opts"
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
usage_error "missing argument"
|
||||
fi
|
||||
|
||||
if test $color_tests = yes; then
|
||||
# Keep this in sync with 'lib/am/check.am:$(am__tty_colors)'.
|
||||
red='[0;31m' # Red.
|
||||
grn='[0;32m' # Green.
|
||||
lgn='[1;32m' # Light green.
|
||||
blu='[1;34m' # Blue.
|
||||
mgn='[0;35m' # Magenta.
|
||||
std='[m' # No color.
|
||||
else
|
||||
red= grn= lgn= blu= mgn= std=
|
||||
fi
|
||||
|
||||
do_exit='rm -f $log_file $trs_file; (exit $st); exit $st'
|
||||
trap "st=129; $do_exit" 1
|
||||
trap "st=130; $do_exit" 2
|
||||
trap "st=141; $do_exit" 13
|
||||
trap "st=143; $do_exit" 15
|
||||
|
||||
# Test script is run here.
|
||||
"$@" >$log_file 2>&1
|
||||
estatus=$?
|
||||
if test $enable_hard_errors = no && test $estatus -eq 99; then
|
||||
estatus=1
|
||||
fi
|
||||
|
||||
case $estatus:$expect_failure in
|
||||
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
|
||||
0:*) col=$grn res=PASS recheck=no gcopy=no;;
|
||||
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
|
||||
99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
|
||||
*:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
|
||||
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
|
||||
esac
|
||||
|
||||
# Report outcome to console.
|
||||
echo "${col}${res}${std}: $test_name"
|
||||
|
||||
# Register the test result, and other relevant metadata.
|
||||
echo ":test-result: $res" > $trs_file
|
||||
echo ":global-test-result: $res" >> $trs_file
|
||||
echo ":recheck: $recheck" >> $trs_file
|
||||
echo ":copy-in-global-log: $gcopy" >> $trs_file
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int A[2][1000][1000];
|
||||
int B[2][1000][1000];
|
||||
|
||||
#pragma scop
|
||||
{
|
||||
for (int i = 0; i < 256; ++i)
|
||||
for (int j = 0; j < 256; ++j)
|
||||
if (j % 8 <= 2 || j % 8 >= 6)
|
||||
A[1][i][j] = B[1][j][i];
|
||||
}
|
||||
#pragma endscop
|
||||
|
||||
/*
|
||||
|
||||
When compiled with:
|
||||
|
||||
./ppcg tests/allow-sparse-copy-in.c --no-linearize-device-arrays
|
||||
--on-error=abort --sizes='{kernel[i]->tile[8,8]; kernel[i]->block[1,8]}'
|
||||
--max-shared-memory=-1 --unroll-copy-shared
|
||||
|
||||
this originally resulted in the following copy-in code:
|
||||
|
||||
shared_B[0][0][t1] = B[1][8 * b1][8 * b0 + t1];
|
||||
shared_B[0][1][t1] = B[1][8 * b1 + 1][8 * b0 + t1];
|
||||
shared_B[0][2][t1] = B[1][8 * b1 + 2][8 * b0 + t1];
|
||||
shared_B[0][3][t1] = B[1][8 * b1 + 3][8 * b0 + t1];
|
||||
shared_B[0][4][t1] = B[1][8 * b1 + 4][8 * b0 + t1];
|
||||
shared_B[0][5][t1] = B[1][8 * b1 + 5][8 * b0 + t1];
|
||||
shared_B[0][6][t1] = B[1][8 * b1 + 6][8 * b0 + t1];
|
||||
shared_B[0][7][t1] = B[1][8 * b1 + 7][8 * b0 + t1];
|
||||
|
||||
whereas we only want to only perform copies that are actually needed:
|
||||
|
||||
shared_B[0][0][t1] = B[1][8 * b1][8 * b0 + t1];
|
||||
shared_B[0][1][t1] = B[1][8 * b1 + 1][8 * b0 + t1];
|
||||
shared_B[0][2][t1] = B[1][8 * b1 + 2][8 * b0 + t1];
|
||||
shared_B[0][6][t1] = B[1][8 * b1 + 6][8 * b0 + t1];
|
||||
shared_B[0][7][t1] = B[1][8 * b1 + 7][8 * b0 + t1];
|
||||
*/
|
||||
for (int i = 0; i < 100; ++i)
|
||||
if (A[1][0][i] != i)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void copy_summary(int b[1000], int a[1000], int pos)
|
||||
{
|
||||
b[pos] = 0;
|
||||
int c = a[pos];
|
||||
}
|
||||
|
||||
#ifdef pencil_access
|
||||
__attribute__((pencil_access(copy_summary)))
|
||||
#endif
|
||||
void copy(int b[1000], int a[1000], int pos);
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[1000], b[1000];
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
a[i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
copy(b, a, i);
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
if (b[i] != a[i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void copy_summary(int b[1000], int a[1000], int pos)
|
||||
{
|
||||
b[pos] = 0;
|
||||
int c = a[pos];
|
||||
}
|
||||
|
||||
#ifdef pencil_access
|
||||
__attribute__((pencil_access(copy_summary)))
|
||||
#endif
|
||||
void copy(int b[1000], int a[1000], int pos);
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[2][1000];
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
a[0][i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
copy(a[1], a[0], i);
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
if (a[1][i] != a[0][i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
void copy(__global int b[1000], __global int a[1000], int pos)
|
||||
{
|
||||
b[pos] = a[pos];
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void copy_summary(int b[100], int a[100])
|
||||
{
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
b[i] = 0;
|
||||
int c = a[i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef pencil_access
|
||||
__attribute__((pencil_access(copy_summary)))
|
||||
#endif
|
||||
void copy(int b[100], int a[100]);
|
||||
|
||||
int main()
|
||||
{
|
||||
int A[100][100], B[100];
|
||||
|
||||
for (int i = 0; i < 100; ++i)
|
||||
B[i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 100; ++i)
|
||||
copy(A[i], B);
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 100; ++i)
|
||||
for (int j = 0; j < 100; ++j)
|
||||
if (A[j][i] != B[i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
void copy(__global int b[100], __global int a[100])
|
||||
{
|
||||
for (int i = 0; i < 100; ++i)
|
||||
b[i] = a[i];
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
void copy(__global int b[1000], __global int a[1000], int pos)
|
||||
{
|
||||
b[pos] = a[pos];
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[1000], b[1000];
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
a[i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
int c;
|
||||
int d;
|
||||
c = a[i];
|
||||
d = c;
|
||||
b[i] = c;
|
||||
}
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
if (b[i] != a[i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[1000], b[1000];
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
a[i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
b[i] = a[i];
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
if (b[i] != a[i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void copy_summary(int b[1000], int a[1000], int pos, int c[1000])
|
||||
{
|
||||
b[pos] = 0;
|
||||
int d = a[pos];
|
||||
}
|
||||
|
||||
#ifdef pencil_access
|
||||
__attribute__((pencil_access(copy_summary)))
|
||||
#endif
|
||||
void copy(int b[1000], int a[1000], int pos, int c[1000]);
|
||||
|
||||
int main()
|
||||
{
|
||||
int a[1000], b[1000], c[1000];
|
||||
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
a[i] = i;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
copy(b, a, i, c);
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 1000; ++i)
|
||||
if (b[i] != a[i])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
void copy(__global int b[1000], __global int a[1000], int pos,
|
||||
__global int c[1000])
|
||||
{
|
||||
b[pos] = a[pos];
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
int a;
|
||||
#pragma scop
|
||||
a = 1;
|
||||
#pragma endscop
|
||||
if (a != 1)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
/* Check that the sources of live ranges with the same sink
|
||||
* are executed in order.
|
||||
*/
|
||||
int main()
|
||||
{
|
||||
int A[128];
|
||||
int n = 128;
|
||||
|
||||
A[0] = 0;
|
||||
#pragma scop
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int set = 0;
|
||||
if (A[i] < 2)
|
||||
set = 1;
|
||||
if (set)
|
||||
A[i] = 2;
|
||||
}
|
||||
#pragma endscop
|
||||
if (A[0] != 2)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
struct s {
|
||||
int c[10][10];
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
struct s a[10][10], b[10][10];
|
||||
|
||||
for (int i = 0; i < 10; ++i)
|
||||
for (int j = 0; j < 10; ++j)
|
||||
for (int k = 0; k < 10; ++k)
|
||||
for (int l = 0; l < 10; ++l)
|
||||
a[i][j].c[k][l] = i + j + k + l;
|
||||
#pragma scop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
for (int j = 0; j < 10; ++j)
|
||||
for (int k = 0; k < 10; ++k)
|
||||
for (int l = 0; l < 10; ++l)
|
||||
b[i][j].c[k][l] = i + j + k + l;
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
for (int j = 0; j < 10; ++j)
|
||||
for (int k = 0; k < 10; ++k)
|
||||
for (int l = 0; l < 10; ++l)
|
||||
if (b[i][j].c[k][l] != a[i][j].c[k][l])
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
struct s {
|
||||
int a;
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
struct s a, b[10];
|
||||
|
||||
#pragma scop
|
||||
a.a = 42;
|
||||
for (int i = 0; i < 10; ++i)
|
||||
b[i].a = a.a;
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
if (b[i].a != 42)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
struct s {
|
||||
int a;
|
||||
int b;
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
struct s a, b[10];
|
||||
|
||||
a.b = 57;
|
||||
#pragma scop
|
||||
a.a = 42;
|
||||
for (int i = 0; i < 10; ++i)
|
||||
b[i] = a;
|
||||
#pragma endscop
|
||||
for (int i = 0; i < 10; ++i)
|
||||
if (b[i].a != 42)
|
||||
return EXIT_FAILURE;
|
||||
if (a.b != 57)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef UTIL_H
|
||||
#define UTIL_H
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/* Compare the prefix of "s" to "prefix" up to the length of "prefix".
|
||||
*/
|
||||
static inline int prefixcmp(const char *s, const char *prefix)
|
||||
{
|
||||
return strncmp(s, prefix, strlen(prefix));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
#include "gitversion.h"
|
||||
|
||||
const char *ppcg_version(void)
|
||||
{
|
||||
return GIT_HEAD_ID"\n";
|
||||
}
|
||||
Loading…
Reference in New Issue