config.h 18.3 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
/*
 * config.h
 *
 *  Created on: 18/09/2014
 *      Author: pedro
 */

#ifndef SRC_CONFIG_H_
#define SRC_CONFIG_H_

#define MAX_(x, y) (((x) > (y)) ? (x) : (y))
#define MIN_(x, y) (((x) < (y)) ? (x) : (y))

// Host configuration
#ifndef __OPENCL_VERSION__

#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>

#include "CL/cl.h"
#include "CL/cl_platform.h"

#include "domains.h"
#include "kernels/cl_variables.h"

// Compiling options

#define PRE_FILTER 1		// set to 1 to enable pre-filtering previous to starting exploration, or to 0 to disable it. Disabling it, may induce errors
#define SS_MULTIPLIER 1		// set to 1 to enable a sub-search space multiplier that may be applied to increase the number of
								// sub-search spaces inside each device when blocks are small, or to 0 to disable it
#define USE_BOOLEAN_VS 1	// set to 1 to distinguish boolean variables in the CSP
#define USE_CS_IGNORE 1			// set to 1 to when a constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations
								// only for bitmap domains (not for intervals)

#define CL_COMP_OPT 1		// set to 1 to compile the OpenCL with optimization, or to 0 for not

#ifndef COMPILE_FZN
#define COMPILE_FZN 1	// set to 0 if the flatzinc/minizinc interpreter is not needed or mzn2fzn, flex or bison is not available
#endif

#define CHECK_SOL_N_VALS 1	// set to 1 if the solution (1 solution or best) must be checked for the number of values on the domain of each variable
#define VERIFY_SOL 1		// set to 1 to verify if the optimal or the single solution found is valid. Will give error for variables with more than 1 value
								// that are marked for labeling, even if they are part of that solution

#define SORT_BY_LABEL 1			// set to 1 to sort variables by the ones that may be labeled. Disabling it, may induce errors
#define SORT_BY_LABEL_LESS_VALS 0	// set to 1 to sort variables by the ones that may be labeled and that have less values on their domains
#define SORT_BY_LABEL_MORE_VALS 0	// set to 1 to sort variables by the ones that may be labeled and that have more values on their domains
#define SORT_BY_MOST_USED_CONSTR 0	// set to 1 to sort constraints on each variable by the constraint that is more common on the CSP

#define USE_LOCAL_MEM 2		// set to 0 if devices should use only global memory, to 1 if they should use also local memory, or to 2 to decide by default
#define USE_CONSTANT_MEM 0	// set to 1 to use OpenCL constant memory. Disabled due to causing errors in Nvidia GPUs

#define USE_MORE_BUFFERS 1			// set to 0 to use only one buffer for backtracking, or to 1 to use more buffers (more buffers allow to use buffer times more global memory)
#define USE_MORE_BUFFERS_P	0.8		// % of each allowed max buffer size to use on each of the backtracking buffers

#define SHARED_SS 0			// BETA // set to 1 to enable work-sharing inside each device, or to 0 to disable it
#define PRE_LABELING 0		// BETA // set to 0, 1 or 2. 1 to propagate the last labeled variable with all the remaining values before backtracking it, 0 to not,
								// and 2 to use if any propagator is capable of propagating variables with more than one value in its domain

// for kernel debugging purposes
#define DEBUG 0 			// set to 1 to enable debug inside kernel (Only for Intel CPUs)
#define RUN_IN_CUDA 0		// Run kernel with CUDA. Only for an Nvidia GPU
#define DEBUG_IN_CUDA 0		// Requires RUN_IN_CUDA to be set to 1. Allows debugging the kernel in Nvidia GPUs
#define CL_CHECK_ERRORS 0	// set to 1 to compile kernel with code for some error checking, or 0 for none (only for bitmaps)
#define COMPILE_ALL_CS 0	// set to 1 to compile all propagators, including the ones for reification
#define CL_VERIFY_SOLS 0	// set to 1 to verify if all the variables are assigned when a solution is found in the kernel (done on the host for
								// optimization and 1 solution when CHECK_SOL_N_VALS and VERIFY_SOL are set).
#define USE_TTL 0			// set to 1 to define a time to live to terminate a kernel
#define TTL_LIMIT 10000		// maximum value of the TTL to terminate the kernel

#define MAX_CSP_DIM_ARGS 10	// maximum number of command line arguments indicating the dimensions of the CSP to solve

#define TO_LABEL_THRESHOLD 0	// Maximum number of variables that may need to be labeled beyond the ones marked for labeling

// load balancing parameters
#define GPU_DEFAULT_N_WG 512	// default number of work-groups to create for a GPU
#define GPU_DEFAULT_N_WI 128	// default number of work-items per work-group to create for a GPU
#define GPU_CUTOFF 8	// Approximated number of times that a GPU core (SM) is slower than a CPU core
#define ACC_CUTOFF 4	// Approximated number of times that a ACC core is slower than a CPU core
#define CNT_INIT_PERC 0.20		// Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when
									// counting all the solutions
#define OPT_ONE_INIT_PERC 0.01	// Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when
									// optimizing or finding the first solution

#define MAX_SS 1000000	// Maximum number of SS to create
#define SS_GPU 500000	// Number of SS to create if only one GPU is to be used
#define SS_ACC 250000	// Number of SS to create if only one ACC is to be used
#define SS_CPU 5000	// Number of SS to create per compute unit, if only one CPU
#define N_FIRST_BLOCKS 3	// Number of blocks explored before calculating RANK
#define MS_HALF_FIRST_BLOCKS 1000.0		// Minimum milliseconds taken to solve the previous block (of the N_FIRST_BLOCKS-1) to reduce the size of the next block to half
#define MS_TAKE_ALL 2000.0	// Milliseconds estimated for a device to solve all the remaining sub-search spaces, to make it take all of them in the next block
#define PERCENT_REM_SS_DOUBLE	0.2 // Percentage of the remaining SS to be considered for the calculation of the size of the next block for the device that
										// finished N_FIRST_BLOCKS first
#define PERCENT_REM_SS_RANK_GPU	0.3 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for GPUs
#define PERCENT_REM_SS_RANK_CPU	0.8 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for CPUs
#define PERCENT_REM_SS_RANK_ACC	0.6 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for ACCs
#define FAST_BLOCKS_MS_OPT 2000.0	// Time taken to solve a block during optimization to be considered exceptionally easy to explore
#define FAST_BLOCKS_MS_ONE 2000.0	// Time taken to solve a block when looking for one solution to be considered exceptionally easy to explore
#define N_FAST_BLOCKS_OPT 3		// Number of blocks in a row that took less than EMPTY_BLOCKS_MS_OPT s to solve (each), to double the size of the next block
#define PERCENT_BLOCKS_ADD 0.2		// percentage of the last block to add to the next block when the previous N_EMPTY_BLOCKS_OPT were EMPTY_BLOCKS_MS_OPT
#define N_EMPTY_BLOCKS 1		// Number of blocks with 0 SS that a device would receive to be terminated
#define TIMES_USED_TRESHOLD	10	// Number of times that a device must be used in order to double the number of sub-search spaces on the next block, when OPT or ONE until SS_REM_PERC_TRESHOLD
#define SS_REM_PERC_TRESHOLD 0.3	// When to stop doubling the size of the block when TIMES_USED_TRESHOLD for OPT and ONE

#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
typedef long int __time_t;
typedef long __suseconds_t;
#endif

typedef struct statistics {
	cl_ulong nodes_fail;
	cl_ulong nodes_expl;
	cl_ulong backtracks;
	cl_ulong labels;
	cl_ulong pruning;
	cl_ulong props_ok;
	cl_ulong max_depth;
	cl_ulong n_solutions;
	cl_ulong solve_time;
	cl_ulong search_spaces;
} statistics;

// Heuristic for selecting the variable for labeling and for propagating
typedef enum {
	// just "BITMAP" conflicts with windows libraries
	BITMAP_,
	INTERVAL,
	DEFAULT
} d_type;

// Type of work to be done
typedef enum {
	ONE,	// find one solution
	CNT,	// count all solutions
	OPT,	// optimization
	DEFAULT_W
} work;

// Type of optimization to be done
typedef enum {
	DECREASE,	// decrease value
	INCREASE, 	// increase value
	NONE		// initial value
} opt;

// Heuristic for selecting the variable for labeling
typedef enum {
	FIRST_FAIL,		// less values
	INPUT_ORDER,	// first created
	OCCURRENCE,		// more constrained
	MAX_REGRET,		// largest difference between the two smallest values
	SMALLEST,		// with the smallest value in its domain
	DEFAULT_L
} label_heur;

// Heuristic for selecting the value of the variable to assign
typedef enum {
	MIN_VAL,	// minimum value
	MAX_VAL,	// maximum value
	SPLIT_VALS,	// splits the domain about the mean between minimum and maximum value
	DEFAULT_A
} assign_heur;

// to be used when modeling the CSP for printing results
extern bool OPTIMIZING;			// true if optimizing
extern bool FINDING_ONE_SOLUTION;	// true if finding one solution
extern bool COUNTING_SOLUTIONS;		// true if counting all the solutions
extern bool PRINT_STATS;	// True if statistic data should be collected and printed
extern statistics STATS;	// statistics data on host
extern bool ALL_DEVS;	// argument if all devices should be used
extern bool VERBOSE;	// argument indicating if the Solver must print more info
extern bool QUIET;	// argument indicating if the Solver must print only the number of solutions that were found, or the solution or the best solution
extern char* CSP;	// argument indicating the name of the CSP to solve
extern char* FZN_FILE_NAME;	// argument indicating the name of the flatzinc file
extern char* MZN_FILE_NAME;	// argument indicating the name of the minizinc file
extern char* DZN_FILE_NAME;	// argument indicating the name of the minizinc file containing the CSP dimensions
extern unsigned int D_MAX;	// maximum domain value on variables CSP
extern unsigned int D_MIN;	// minimum domain value on variables CSP
extern unsigned int CL_BITS_;	// number of bits to use on devices, calculated after creating all the CSP variables
extern unsigned int CL_WORD_;	// number of bits in one word of the bitmap use on devices, calculated after creating all the CSP variables in
									// init_csp_and_d_bits() in config.c
extern unsigned int CL_N_WORDS_;	// number of words that compose one bitmap on the devices, calculated after creating all the CSP variables in
										//init_csp_and_d_bits() in config.c
extern int* CONST_VS_ID;	// to store the id of the variables that have only one value on the domain
extern unsigned int N_VS;	// number of CSP variables
extern unsigned int V_ID_CNTR;	// Unique identifier of each CSP variable
extern unsigned int N_CS;		// number of CSP constraints
extern unsigned int C_ID_CNTR;	// Unique identifier of each CSP constraint
extern unsigned int N_DEVS;	// number of devices to use
extern unsigned int N_GPUs;	// requested number of GPUs to use
extern unsigned int N_CPUs;	// requested number of CPUs to use
extern unsigned int N_ACCs;	// requested number of ACCs to use
extern unsigned int N_SS;	// optional number of sub-search spaces to create
extern cl_uint VAL_TO_OPT;		// current minimum or maximum value to optimize (not yet found)
extern unsigned int VAR_ID_TO_OPT;	// ID of the variable to optimize
extern unsigned int* EXP_VALUES;	// Number of values expanded to achieve the required number of sub-search spaces
extern bool PRINT_SOLUTIONS;	// set to 1 to print all solutions (the value of each CSP variable) when using only one work-item
extern bool PRINT_CSP;		// set to 1 to print all CSP variables with their domains, and all the constraints with the respective variables identified
extern bool MZN2FZN_ONLY;	// if the MZN must be converted to FZN, but without solving the CSP
extern bool REV;		// set to 1 to propagate the last labeled variable with all the remaining values before backtracking it, or 0 to not
extern int N_VS_TO_LABEL;	// Number of variables marked for labeling
extern bool BOOLEAN_VS;	// 1 if the CSP use boolean variables
extern unsigned int N_VS_ORIGINAL;	// number of CSP variables in the model (before filtering)
extern unsigned int N_CS_ORIGINAL;	// number of CSP constraints in the model (before filtering)
extern __time_t init_sec;		// Seconds when the Solver started
extern __suseconds_t init_usec;	// Microseconds when the Solver started
extern d_type DOMAIN_TYPE;	// domain representation to use
extern size_t DOMAIN_SIZE;	// size of the domain type used on this device
extern label_heur LABEL_MODE_D;	// argument indicating the default mode for selecting the variable to label
extern assign_heur ASSIGN_MODE_D;	// argument indicating the default mode for selecting the value to assign to the variable for labeling
extern label_heur LABEL_MODE;	// argument indicating the mode for selecting the variable to label
extern assign_heur ASSIGN_MODE;	// argument indicating the mode for selecting the value to assign to the variable for labeling
extern label_heur LABEL_MODE_COM;	// argument indicating the mode for selecting the variable to label inserted in command line
extern assign_heur ASSIGN_MODE_COM;	// argument indicating the mode for selecting the value to assign to the variable for labeling inserted in command line
extern work WORK;	// argument indicating if one solution, all solutions or optimization is wanted
extern work WORK_D;	// argument indicating if one solution, all solutions or optimization is default
extern opt OPT_MODE;	// if optimization is to reduce a value, or to increase it
extern bool CS_IGNORE;	// 1 to when constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations
							// only for bitmap domains (not for intervals)
extern bool CAN_USE_INTERVALS;	// true if variable domains are contiguous

void print_help();
void load_args(int *argc, char **argv[], int* csp_dims, char* csp_inst_name);
void parse_args(int *argc, char *argv[], int* csp_dims, char* csp_inst_name);

void init_csp_and_d_bits();
void print_CSP();
void clear_csp();

char* get_label_heur();
char* get_assign_heur();

int get_int_len(int x);

bool can_run_command(const char *cmd);

#define FUNC_PREFIX	static inline

// never used. Just for compilation and syntax correction
#define CL_WORK CL_OPT
#define VARS cl_var_bitmap
#define VARS_PROP cl_var_p_bitmap
#define __global
#define __constant
#define __local
#define CL_INTS_MEM
#define CL_MEMORY
#define CL_CS_MEM
#define CL_VS_MEM
#define CL_B_DS_MEM
#define CL_ONE 0
#define CL_CNT 1
#define CL_OPT 2
#define CL_FIRST_FAIL 0
#define	CL_INPUT_ORDER 1
#define	CL_OCCURRENCE 2
#define	CL_MAX_REGRET 3
#define CL_SMALLEST 4
#define CL_LABEL_M CL_INPUT_ORDER
#define CL_MIN_VAL 0
#define	CL_MAX_VAL 1
#define	CL_SPLIT_VALS 2
#define CL_ASSIGN_M MIN_VAL
#define CL_SPLIT_VALUES_EXT 1
#define CL_DECREASE 0
#define CL_INCREASE 1
#define CL_OPT_M CL_DECREASE
#define CL_VAR_ID_TO_OPT 0
#define CL_USE_GLOBAL_MEM 0
#define CL_USE_LOCAL_MEM 1
#define CL_MEM CL_USE_LOCAL_MEM
#define CL_MEMORY
#define CL_LIN_TERMS 0
#define CL_LIN_V_TERMS 0
#define CL_SUM_TERMS 0
#define CL_SUM_VAR_TERMS 0
#define CL_N_VS 10
#define CL_N_CS 10
#define CL_N_VS_TO_LABEL 0
#define CL_TO_LABEL_THRESHOLD 0
#define CL_N_VS_CS 0
#define CL_N_CS_VS 0
#define CL_INTS_CONST 1
#define CL_B_DS_CONST 1
#define CL_VS_CONST 1
#define CL_CS_CONST 1
#define CL_N_DEVS 2
#define CL_STATS 1
#define CL_PRE_LABELING 0
#define CL_N_SHARED_SS 0
#define CL_SEQ_SEARCH 1
#define CL_CS_IGNORE USE_CS_IGNORE
#define CL_FILTERING PRE_FILTER
#define CL_BOOLEAN_VS USE_BOOLEAN_VS
#define CL_USE_N_BUFFERS 1
#define CL_N_TERMS 0


// Device configuration (used during kernel compilation)
#else

#define FUNC_PREFIX

#if CUDA_VERSION == 0
#define CUDA_FUNC
#endif

#ifndef CL_PRE_LABELING
#define CL_PRE_LABELING 0
#endif

// type of exploration objective
#define CL_ONE 0
#define CL_CNT 1
#define CL_OPT 2
#ifndef CL_WORK
#define CL_WORK CL_CNT
#endif

// Heuristic for selecting the variable for labeling
#define CL_FIRST_FAIL 0	// less values
#define	CL_INPUT_ORDER 1	// first on the array
#define	CL_OCCURRENCE 2	// more constrained
#define	CL_MAX_REGRET 3	// largest difference between the two smallest values
#define	CL_SMALLEST 4	// // with the smallest value in its domain
#ifndef CL_LABEL_M
#define CL_LABEL_M CL_INPUT_ORDER
#endif

// Heuristic for selecting the value of the variable for assign on labeling
#define CL_MIN_VAL 0	// minimum value
#define	CL_MAX_VAL 1	// maximum value
#define	CL_SPLIT_VALS 2	// Splits the domain about the mean between minimum and maximum value
#ifndef CL_ASSIGN_M
#define CL_ASSIGN_M CL_MIN_VAL
#endif

// Value to extend some buffers when CL_SPLIT_VALS heuristic is used
#ifndef CL_SPLIT_VALUES_EXT
#define CL_SPLIT_VALUES_EXT 1
#endif

// Type of optimization to be done
#define CL_DECREASE 0
#define CL_INCREASE 1
#ifndef CL_OPT_M
#define CL_OPT_M CL_DECREASE
#endif

// ID of the variable to optimize
#ifndef CL_VAR_ID_TO_OPT
#define CL_VAR_ID_TO_OPT 0
#endif

// if kernel should use also local or only global memory
#define CL_USE_GLOBAL_MEM 0
#define CL_USE_LOCAL_MEM 1
#ifndef CL_MEM
#define CL_MEM CL_USE_LOCAL_MEM
#endif

#if CL_MEM == CL_USE_LOCAL_MEM
#define CL_MEMORY __local
#else
#define CL_MEMORY __global
#endif

// if each kernel constant argument fits constant buffers
#if CL_INTS_CONST == 1
#define CL_INTS_MEM __constant
#else
#define CL_INTS_MEM __global
#endif

#if CL_B_DS_CONST == 1
#define CL_B_DS_MEM __constant
#else
#define CL_B_DS_MEM __global
#endif

#if CL_VS_CONST == 1
#define CL_VS_MEM __constant
#else
#define CL_VS_MEM __global
#endif

#if CL_CS_CONST == 1
#define CL_CS_MEM __constant
#else
#define CL_CS_MEM __global
#endif

// size of auxiliary array (for linear..., sum and sum_var constraints)
#ifndef CL_LIN_LT_TERMS
#define CL_LIN_LT_TERMS 0
#endif
#ifndef CL_LIN_NE_TERMS
#define CL_LIN_NE_TERMS 0
#endif
#ifndef CL_LIN_TERMS
#define CL_LIN_TERMS 0
#endif
#ifndef CL_LIN_V_TERMS
#define CL_LIN_V_TERMS 0
#endif
#ifndef CL_SUM_TERMS
#define CL_SUM_TERMS 0
#endif
#ifndef CL_SUM_VAR_TERMS
#define CL_SUM_VAR_TERMS 0
#endif

// for type usage on device
#define cl_uint uint
#define cl_int int
#define cl_ulong ulong
#define cl_char char
#define cl_ushort ushort

// Sequential search
#ifndef CL_SEQ_SEARCH
#define CL_SEQ_SEARCH 0
#endif

// Number of stores for work-sharing
#ifndef CL_N_SHARED_SS
#define CL_N_SHARED_SS 0
#endif

#ifndef CL_BOOLEAN_VS
#define CL_BOOLEAN_VS 0
#endif

#ifndef CL_CS_IGNORE
#define CL_CS_IGNORE 1
#endif

#ifndef CL_USE_N_BUFFERS
#define CL_USE_N_BUFFERS 1
#endif

#ifndef CL_N_TERMS
#define CL_N_TERMS 0
#endif

#endif

#endif /* SRC_CONFIG_H_ */