/* * config.h * * Created on: 18/09/2014 * Author: pedro */ #ifndef SRC_CONFIG_H_ #define SRC_CONFIG_H_ #define MAX_(x, y) (((x) > (y)) ? (x) : (y)) #define MIN_(x, y) (((x) < (y)) ? (x) : (y)) // Host configuration #ifndef __OPENCL_VERSION__ #include #include #include #include "CL/cl.h" #include "CL/cl_platform.h" #include "domains.h" #include "kernels/cl_variables.h" // Compiling options #define PRE_FILTER 1 // set to 1 to enable pre-filtering previous to starting exploration, or to 0 to disable it. Disabling it, may induce errors #define SS_MULTIPLIER 1 // set to 1 to enable a sub-search space multiplier that may be applied to increase the number of // sub-search spaces inside each device when blocks are small, or to 0 to disable it #define USE_BOOLEAN_VS 1 // set to 1 to distinguish boolean variables in the CSP #define USE_CS_IGNORE 1 // set to 1 to when a constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations // only for bitmap domains (not for intervals) #define CL_COMP_OPT 1 // set to 1 to compile the OpenCL with optimization, or to 0 for not #ifndef COMPILE_FZN #define COMPILE_FZN 1 // set to 0 if the flatzinc/minizinc interpreter is not needed or mzn2fzn, flex or bison is not available #endif #define CHECK_SOL_N_VALS 1 // set to 1 if the solution (1 solution or best) must be checked for the number of values on the domain of each variable #define VERIFY_SOL 1 // set to 1 to verify if the optimal or the single solution found is valid. Will give error for variables with more than 1 value // that are marked for labeling, even if they are part of that solution #define SORT_BY_LABEL 1 // set to 1 to sort variables by the ones that may be labeled. Disabling it, may induce errors #define SORT_BY_LABEL_LESS_VALS 0 // set to 1 to sort variables by the ones that may be labeled and that have less values on their domains #define SORT_BY_LABEL_MORE_VALS 0 // set to 1 to sort variables by the ones that may be labeled and that have more values on their domains #define SORT_BY_MOST_USED_CONSTR 0 // set to 1 to sort constraints on each variable by the constraint that is more common on the CSP #define USE_LOCAL_MEM 2 // set to 0 if devices should use only global memory, to 1 if they should use also local memory, or to 2 to decide by default #define USE_CONSTANT_MEM 0 // set to 1 to use OpenCL constant memory. Disabled due to causing errors in Nvidia GPUs #define USE_MORE_BUFFERS 1 // set to 0 to use only one buffer for backtracking, or to 1 to use more buffers (more buffers allow to use buffer times more global memory) #define USE_MORE_BUFFERS_P 0.8 // % of each allowed max buffer size to use on each of the backtracking buffers #define SHARED_SS 0 // BETA // set to 1 to enable work-sharing inside each device, or to 0 to disable it #define PRE_LABELING 0 // BETA // set to 0, 1 or 2. 1 to propagate the last labeled variable with all the remaining values before backtracking it, 0 to not, // and 2 to use if any propagator is capable of propagating variables with more than one value in its domain // for kernel debugging purposes #define DEBUG 0 // set to 1 to enable debug inside kernel (Only for Intel CPUs) #define RUN_IN_CUDA 0 // Run kernel with CUDA. Only for an Nvidia GPU #define DEBUG_IN_CUDA 0 // Requires RUN_IN_CUDA to be set to 1. Allows debugging the kernel in Nvidia GPUs #define CL_CHECK_ERRORS 0 // set to 1 to compile kernel with code for some error checking, or 0 for none (only for bitmaps) #define COMPILE_ALL_CS 0 // set to 1 to compile all propagators, including the ones for reification #define CL_VERIFY_SOLS 0 // set to 1 to verify if all the variables are assigned when a solution is found in the kernel (done on the host for // optimization and 1 solution when CHECK_SOL_N_VALS and VERIFY_SOL are set). #define USE_TTL 0 // set to 1 to define a time to live to terminate a kernel #define TTL_LIMIT 10000 // maximum value of the TTL to terminate the kernel #define MAX_CSP_DIM_ARGS 10 // maximum number of command line arguments indicating the dimensions of the CSP to solve #define TO_LABEL_THRESHOLD 0 // Maximum number of variables that may need to be labeled beyond the ones marked for labeling // load balancing parameters #define GPU_DEFAULT_N_WG 512 // default number of work-groups to create for a GPU #define GPU_DEFAULT_N_WI 128 // default number of work-items per work-group to create for a GPU #define GPU_CUTOFF 8 // Approximated number of times that a GPU core (SM) is slower than a CPU core #define ACC_CUTOFF 4 // Approximated number of times that a ACC core is slower than a CPU core #define CNT_INIT_PERC 0.20 // Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when // counting all the solutions #define OPT_ONE_INIT_PERC 0.01 // Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when // optimizing or finding the first solution #define MAX_SS 1000000 // Maximum number of SS to create #define SS_GPU 500000 // Number of SS to create if only one GPU is to be used #define SS_ACC 250000 // Number of SS to create if only one ACC is to be used #define SS_CPU 5000 // Number of SS to create per compute unit, if only one CPU #define N_FIRST_BLOCKS 3 // Number of blocks explored before calculating RANK #define MS_HALF_FIRST_BLOCKS 1000.0 // Minimum milliseconds taken to solve the previous block (of the N_FIRST_BLOCKS-1) to reduce the size of the next block to half #define MS_TAKE_ALL 2000.0 // Milliseconds estimated for a device to solve all the remaining sub-search spaces, to make it take all of them in the next block #define PERCENT_REM_SS_DOUBLE 0.2 // Percentage of the remaining SS to be considered for the calculation of the size of the next block for the device that // finished N_FIRST_BLOCKS first #define PERCENT_REM_SS_RANK_GPU 0.3 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for GPUs #define PERCENT_REM_SS_RANK_CPU 0.8 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for CPUs #define PERCENT_REM_SS_RANK_ACC 0.6 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for ACCs #define FAST_BLOCKS_MS_OPT 2000.0 // Time taken to solve a block during optimization to be considered exceptionally easy to explore #define FAST_BLOCKS_MS_ONE 2000.0 // Time taken to solve a block when looking for one solution to be considered exceptionally easy to explore #define N_FAST_BLOCKS_OPT 3 // Number of blocks in a row that took less than EMPTY_BLOCKS_MS_OPT s to solve (each), to double the size of the next block #define PERCENT_BLOCKS_ADD 0.2 // percentage of the last block to add to the next block when the previous N_EMPTY_BLOCKS_OPT were EMPTY_BLOCKS_MS_OPT #define N_EMPTY_BLOCKS 1 // Number of blocks with 0 SS that a device would receive to be terminated #define TIMES_USED_TRESHOLD 10 // Number of times that a device must be used in order to double the number of sub-search spaces on the next block, when OPT or ONE until SS_REM_PERC_TRESHOLD #define SS_REM_PERC_TRESHOLD 0.3 // When to stop doubling the size of the block when TIMES_USED_TRESHOLD for OPT and ONE #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) typedef long int __time_t; typedef long __suseconds_t; #endif typedef struct statistics { cl_ulong nodes_fail; cl_ulong nodes_expl; cl_ulong backtracks; cl_ulong labels; cl_ulong pruning; cl_ulong props_ok; cl_ulong max_depth; cl_ulong n_solutions; cl_ulong solve_time; cl_ulong search_spaces; } statistics; // Heuristic for selecting the variable for labeling and for propagating typedef enum { // just "BITMAP" conflicts with windows libraries BITMAP_, INTERVAL, DEFAULT } d_type; // Type of work to be done typedef enum { ONE, // find one solution CNT, // count all solutions OPT, // optimization DEFAULT_W } work; // Type of optimization to be done typedef enum { DECREASE, // decrease value INCREASE, // increase value NONE // initial value } opt; // Heuristic for selecting the variable for labeling typedef enum { FIRST_FAIL, // less values INPUT_ORDER, // first created OCCURRENCE, // more constrained MAX_REGRET, // largest difference between the two smallest values SMALLEST, // with the smallest value in its domain DEFAULT_L } label_heur; // Heuristic for selecting the value of the variable to assign typedef enum { MIN_VAL, // minimum value MAX_VAL, // maximum value SPLIT_VALS, // splits the domain about the mean between minimum and maximum value DEFAULT_A } assign_heur; // to be used when modeling the CSP for printing results extern bool OPTIMIZING; // true if optimizing extern bool FINDING_ONE_SOLUTION; // true if finding one solution extern bool COUNTING_SOLUTIONS; // true if counting all the solutions extern bool PRINT_STATS; // True if statistic data should be collected and printed extern statistics STATS; // statistics data on host extern bool ALL_DEVS; // argument if all devices should be used extern bool VERBOSE; // argument indicating if the Solver must print more info extern bool QUIET; // argument indicating if the Solver must print only the number of solutions that were found, or the solution or the best solution extern char* CSP; // argument indicating the name of the CSP to solve extern char* FZN_FILE_NAME; // argument indicating the name of the flatzinc file extern char* MZN_FILE_NAME; // argument indicating the name of the minizinc file extern char* DZN_FILE_NAME; // argument indicating the name of the minizinc file containing the CSP dimensions extern unsigned int D_MAX; // maximum domain value on variables CSP extern unsigned int D_MIN; // minimum domain value on variables CSP extern unsigned int CL_BITS_; // number of bits to use on devices, calculated after creating all the CSP variables extern unsigned int CL_WORD_; // number of bits in one word of the bitmap use on devices, calculated after creating all the CSP variables in // init_csp_and_d_bits() in config.c extern unsigned int CL_N_WORDS_; // number of words that compose one bitmap on the devices, calculated after creating all the CSP variables in //init_csp_and_d_bits() in config.c extern int* CONST_VS_ID; // to store the id of the variables that have only one value on the domain extern unsigned int N_VS; // number of CSP variables extern unsigned int V_ID_CNTR; // Unique identifier of each CSP variable extern unsigned int N_CS; // number of CSP constraints extern unsigned int C_ID_CNTR; // Unique identifier of each CSP constraint extern unsigned int N_DEVS; // number of devices to use extern unsigned int N_GPUs; // requested number of GPUs to use extern unsigned int N_CPUs; // requested number of CPUs to use extern unsigned int N_ACCs; // requested number of ACCs to use extern unsigned int N_SS; // optional number of sub-search spaces to create extern cl_uint VAL_TO_OPT; // current minimum or maximum value to optimize (not yet found) extern unsigned int VAR_ID_TO_OPT; // ID of the variable to optimize extern unsigned int* EXP_VALUES; // Number of values expanded to achieve the required number of sub-search spaces extern bool PRINT_SOLUTIONS; // set to 1 to print all solutions (the value of each CSP variable) when using only one work-item extern bool PRINT_CSP; // set to 1 to print all CSP variables with their domains, and all the constraints with the respective variables identified extern bool MZN2FZN_ONLY; // if the MZN must be converted to FZN, but without solving the CSP extern bool REV; // set to 1 to propagate the last labeled variable with all the remaining values before backtracking it, or 0 to not extern int N_VS_TO_LABEL; // Number of variables marked for labeling extern bool BOOLEAN_VS; // 1 if the CSP use boolean variables extern unsigned int N_VS_ORIGINAL; // number of CSP variables in the model (before filtering) extern unsigned int N_CS_ORIGINAL; // number of CSP constraints in the model (before filtering) extern __time_t init_sec; // Seconds when the Solver started extern __suseconds_t init_usec; // Microseconds when the Solver started extern d_type DOMAIN_TYPE; // domain representation to use extern size_t DOMAIN_SIZE; // size of the domain type used on this device extern label_heur LABEL_MODE_D; // argument indicating the default mode for selecting the variable to label extern assign_heur ASSIGN_MODE_D; // argument indicating the default mode for selecting the value to assign to the variable for labeling extern label_heur LABEL_MODE; // argument indicating the mode for selecting the variable to label extern assign_heur ASSIGN_MODE; // argument indicating the mode for selecting the value to assign to the variable for labeling extern label_heur LABEL_MODE_COM; // argument indicating the mode for selecting the variable to label inserted in command line extern assign_heur ASSIGN_MODE_COM; // argument indicating the mode for selecting the value to assign to the variable for labeling inserted in command line extern work WORK; // argument indicating if one solution, all solutions or optimization is wanted extern work WORK_D; // argument indicating if one solution, all solutions or optimization is default extern opt OPT_MODE; // if optimization is to reduce a value, or to increase it extern bool CS_IGNORE; // 1 to when constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations // only for bitmap domains (not for intervals) extern bool CAN_USE_INTERVALS; // true if variable domains are contiguous void print_help(); void load_args(int *argc, char **argv[], int* csp_dims, char* csp_inst_name); void parse_args(int *argc, char *argv[], int* csp_dims, char* csp_inst_name); void init_csp_and_d_bits(); void print_CSP(); void clear_csp(); char* get_label_heur(); char* get_assign_heur(); int get_int_len(int x); bool can_run_command(const char *cmd); #define FUNC_PREFIX static inline // never used. Just for compilation and syntax correction #define CL_WORK CL_OPT #define VARS cl_var_bitmap #define VARS_PROP cl_var_p_bitmap #define __global #define __constant #define __local #define CL_INTS_MEM #define CL_MEMORY #define CL_CS_MEM #define CL_VS_MEM #define CL_B_DS_MEM #define CL_ONE 0 #define CL_CNT 1 #define CL_OPT 2 #define CL_FIRST_FAIL 0 #define CL_INPUT_ORDER 1 #define CL_OCCURRENCE 2 #define CL_MAX_REGRET 3 #define CL_SMALLEST 4 #define CL_LABEL_M CL_INPUT_ORDER #define CL_MIN_VAL 0 #define CL_MAX_VAL 1 #define CL_SPLIT_VALS 2 #define CL_ASSIGN_M MIN_VAL #define CL_SPLIT_VALUES_EXT 1 #define CL_DECREASE 0 #define CL_INCREASE 1 #define CL_OPT_M CL_DECREASE #define CL_VAR_ID_TO_OPT 0 #define CL_USE_GLOBAL_MEM 0 #define CL_USE_LOCAL_MEM 1 #define CL_MEM CL_USE_LOCAL_MEM #define CL_MEMORY #define CL_LIN_TERMS 0 #define CL_LIN_V_TERMS 0 #define CL_SUM_TERMS 0 #define CL_SUM_VAR_TERMS 0 #define CL_N_VS 10 #define CL_N_CS 10 #define CL_N_VS_TO_LABEL 0 #define CL_TO_LABEL_THRESHOLD 0 #define CL_N_VS_CS 0 #define CL_N_CS_VS 0 #define CL_INTS_CONST 1 #define CL_B_DS_CONST 1 #define CL_VS_CONST 1 #define CL_CS_CONST 1 #define CL_N_DEVS 2 #define CL_STATS 1 #define CL_PRE_LABELING 0 #define CL_N_SHARED_SS 0 #define CL_SEQ_SEARCH 1 #define CL_CS_IGNORE USE_CS_IGNORE #define CL_FILTERING PRE_FILTER #define CL_BOOLEAN_VS USE_BOOLEAN_VS #define CL_USE_N_BUFFERS 1 #define CL_N_TERMS 0 // Device configuration (used during kernel compilation) #else #define FUNC_PREFIX #if CUDA_VERSION == 0 #define CUDA_FUNC #endif #ifndef CL_PRE_LABELING #define CL_PRE_LABELING 0 #endif // type of exploration objective #define CL_ONE 0 #define CL_CNT 1 #define CL_OPT 2 #ifndef CL_WORK #define CL_WORK CL_CNT #endif // Heuristic for selecting the variable for labeling #define CL_FIRST_FAIL 0 // less values #define CL_INPUT_ORDER 1 // first on the array #define CL_OCCURRENCE 2 // more constrained #define CL_MAX_REGRET 3 // largest difference between the two smallest values #define CL_SMALLEST 4 // // with the smallest value in its domain #ifndef CL_LABEL_M #define CL_LABEL_M CL_INPUT_ORDER #endif // Heuristic for selecting the value of the variable for assign on labeling #define CL_MIN_VAL 0 // minimum value #define CL_MAX_VAL 1 // maximum value #define CL_SPLIT_VALS 2 // Splits the domain about the mean between minimum and maximum value #ifndef CL_ASSIGN_M #define CL_ASSIGN_M CL_MIN_VAL #endif // Value to extend some buffers when CL_SPLIT_VALS heuristic is used #ifndef CL_SPLIT_VALUES_EXT #define CL_SPLIT_VALUES_EXT 1 #endif // Type of optimization to be done #define CL_DECREASE 0 #define CL_INCREASE 1 #ifndef CL_OPT_M #define CL_OPT_M CL_DECREASE #endif // ID of the variable to optimize #ifndef CL_VAR_ID_TO_OPT #define CL_VAR_ID_TO_OPT 0 #endif // if kernel should use also local or only global memory #define CL_USE_GLOBAL_MEM 0 #define CL_USE_LOCAL_MEM 1 #ifndef CL_MEM #define CL_MEM CL_USE_LOCAL_MEM #endif #if CL_MEM == CL_USE_LOCAL_MEM #define CL_MEMORY __local #else #define CL_MEMORY __global #endif // if each kernel constant argument fits constant buffers #if CL_INTS_CONST == 1 #define CL_INTS_MEM __constant #else #define CL_INTS_MEM __global #endif #if CL_B_DS_CONST == 1 #define CL_B_DS_MEM __constant #else #define CL_B_DS_MEM __global #endif #if CL_VS_CONST == 1 #define CL_VS_MEM __constant #else #define CL_VS_MEM __global #endif #if CL_CS_CONST == 1 #define CL_CS_MEM __constant #else #define CL_CS_MEM __global #endif // size of auxiliary array (for linear..., sum and sum_var constraints) #ifndef CL_LIN_LT_TERMS #define CL_LIN_LT_TERMS 0 #endif #ifndef CL_LIN_NE_TERMS #define CL_LIN_NE_TERMS 0 #endif #ifndef CL_LIN_TERMS #define CL_LIN_TERMS 0 #endif #ifndef CL_LIN_V_TERMS #define CL_LIN_V_TERMS 0 #endif #ifndef CL_SUM_TERMS #define CL_SUM_TERMS 0 #endif #ifndef CL_SUM_VAR_TERMS #define CL_SUM_VAR_TERMS 0 #endif // for type usage on device #define cl_uint uint #define cl_int int #define cl_ulong ulong #define cl_char char #define cl_ushort ushort // Sequential search #ifndef CL_SEQ_SEARCH #define CL_SEQ_SEARCH 0 #endif // Number of stores for work-sharing #ifndef CL_N_SHARED_SS #define CL_N_SHARED_SS 0 #endif #ifndef CL_BOOLEAN_VS #define CL_BOOLEAN_VS 0 #endif #ifndef CL_CS_IGNORE #define CL_CS_IGNORE 1 #endif #ifndef CL_USE_N_BUFFERS #define CL_USE_N_BUFFERS 1 #endif #ifndef CL_N_TERMS #define CL_N_TERMS 0 #endif #endif #endif /* SRC_CONFIG_H_ */