config.h
18.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
/*
* config.h
*
* Created on: 18/09/2014
* Author: pedro
*/
#ifndef SRC_CONFIG_H_
#define SRC_CONFIG_H_
#define MAX_(x, y) (((x) > (y)) ? (x) : (y))
#define MIN_(x, y) (((x) < (y)) ? (x) : (y))
// Host configuration
#ifndef __OPENCL_VERSION__
#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>
#include "CL/cl.h"
#include "CL/cl_platform.h"
#include "domains.h"
#include "kernels/cl_variables.h"
// Compiling options
#define PRE_FILTER 1 // set to 1 to enable pre-filtering previous to starting exploration, or to 0 to disable it. Disabling it, may induce errors
#define SS_MULTIPLIER 1 // set to 1 to enable a sub-search space multiplier that may be applied to increase the number of
// sub-search spaces inside each device when blocks are small, or to 0 to disable it
#define USE_BOOLEAN_VS 1 // set to 1 to distinguish boolean variables in the CSP
#define USE_CS_IGNORE 1 // set to 1 to when a constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations
// only for bitmap domains (not for intervals)
#define CL_COMP_OPT 1 // set to 1 to compile the OpenCL with optimization, or to 0 for not
#ifndef COMPILE_FZN
#define COMPILE_FZN 1 // set to 0 if the flatzinc/minizinc interpreter is not needed or mzn2fzn, flex or bison is not available
#endif
#define CHECK_SOL_N_VALS 1 // set to 1 if the solution (1 solution or best) must be checked for the number of values on the domain of each variable
#define VERIFY_SOL 1 // set to 1 to verify if the optimal or the single solution found is valid. Will give error for variables with more than 1 value
// that are marked for labeling, even if they are part of that solution
#define SORT_BY_LABEL 1 // set to 1 to sort variables by the ones that may be labeled. Disabling it, may induce errors
#define SORT_BY_LABEL_LESS_VALS 0 // set to 1 to sort variables by the ones that may be labeled and that have less values on their domains
#define SORT_BY_LABEL_MORE_VALS 0 // set to 1 to sort variables by the ones that may be labeled and that have more values on their domains
#define SORT_BY_MOST_USED_CONSTR 0 // set to 1 to sort constraints on each variable by the constraint that is more common on the CSP
#define USE_LOCAL_MEM 2 // set to 0 if devices should use only global memory, to 1 if they should use also local memory, or to 2 to decide by default
#define USE_CONSTANT_MEM 0 // set to 1 to use OpenCL constant memory. Disabled due to causing errors in Nvidia GPUs
#define USE_MORE_BUFFERS 1 // set to 0 to use only one buffer for backtracking, or to 1 to use more buffers (more buffers allow to use buffer times more global memory)
#define USE_MORE_BUFFERS_P 0.8 // % of each allowed max buffer size to use on each of the backtracking buffers
#define SHARED_SS 0 // BETA // set to 1 to enable work-sharing inside each device, or to 0 to disable it
#define PRE_LABELING 0 // BETA // set to 0, 1 or 2. 1 to propagate the last labeled variable with all the remaining values before backtracking it, 0 to not,
// and 2 to use if any propagator is capable of propagating variables with more than one value in its domain
// for kernel debugging purposes
#define DEBUG 0 // set to 1 to enable debug inside kernel (Only for Intel CPUs)
#define RUN_IN_CUDA 0 // Run kernel with CUDA. Only for an Nvidia GPU
#define DEBUG_IN_CUDA 0 // Requires RUN_IN_CUDA to be set to 1. Allows debugging the kernel in Nvidia GPUs
#define CL_CHECK_ERRORS 0 // set to 1 to compile kernel with code for some error checking, or 0 for none (only for bitmaps)
#define COMPILE_ALL_CS 0 // set to 1 to compile all propagators, including the ones for reification
#define CL_VERIFY_SOLS 0 // set to 1 to verify if all the variables are assigned when a solution is found in the kernel (done on the host for
// optimization and 1 solution when CHECK_SOL_N_VALS and VERIFY_SOL are set).
#define USE_TTL 0 // set to 1 to define a time to live to terminate a kernel
#define TTL_LIMIT 10000 // maximum value of the TTL to terminate the kernel
#define MAX_CSP_DIM_ARGS 10 // maximum number of command line arguments indicating the dimensions of the CSP to solve
#define TO_LABEL_THRESHOLD 0 // Maximum number of variables that may need to be labeled beyond the ones marked for labeling
// load balancing parameters
#define GPU_DEFAULT_N_WG 512 // default number of work-groups to create for a GPU
#define GPU_DEFAULT_N_WI 128 // default number of work-items per work-group to create for a GPU
#define GPU_CUTOFF 8 // Approximated number of times that a GPU core (SM) is slower than a CPU core
#define ACC_CUTOFF 4 // Approximated number of times that a ACC core is slower than a CPU core
#define CNT_INIT_PERC 0.20 // Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when
// counting all the solutions
#define OPT_ONE_INIT_PERC 0.01 // Percentage of the total number of SS created for calculation of the size of the first block to be sent to each device when
// optimizing or finding the first solution
#define MAX_SS 1000000 // Maximum number of SS to create
#define SS_GPU 500000 // Number of SS to create if only one GPU is to be used
#define SS_ACC 250000 // Number of SS to create if only one ACC is to be used
#define SS_CPU 5000 // Number of SS to create per compute unit, if only one CPU
#define N_FIRST_BLOCKS 3 // Number of blocks explored before calculating RANK
#define MS_HALF_FIRST_BLOCKS 1000.0 // Minimum milliseconds taken to solve the previous block (of the N_FIRST_BLOCKS-1) to reduce the size of the next block to half
#define MS_TAKE_ALL 2000.0 // Milliseconds estimated for a device to solve all the remaining sub-search spaces, to make it take all of them in the next block
#define PERCENT_REM_SS_DOUBLE 0.2 // Percentage of the remaining SS to be considered for the calculation of the size of the next block for the device that
// finished N_FIRST_BLOCKS first
#define PERCENT_REM_SS_RANK_GPU 0.3 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for GPUs
#define PERCENT_REM_SS_RANK_CPU 0.8 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for CPUs
#define PERCENT_REM_SS_RANK_ACC 0.6 // Percentage of the remaining SS to be considered for the calculation of the size of the next block with RANK for ACCs
#define FAST_BLOCKS_MS_OPT 2000.0 // Time taken to solve a block during optimization to be considered exceptionally easy to explore
#define FAST_BLOCKS_MS_ONE 2000.0 // Time taken to solve a block when looking for one solution to be considered exceptionally easy to explore
#define N_FAST_BLOCKS_OPT 3 // Number of blocks in a row that took less than EMPTY_BLOCKS_MS_OPT s to solve (each), to double the size of the next block
#define PERCENT_BLOCKS_ADD 0.2 // percentage of the last block to add to the next block when the previous N_EMPTY_BLOCKS_OPT were EMPTY_BLOCKS_MS_OPT
#define N_EMPTY_BLOCKS 1 // Number of blocks with 0 SS that a device would receive to be terminated
#define TIMES_USED_TRESHOLD 10 // Number of times that a device must be used in order to double the number of sub-search spaces on the next block, when OPT or ONE until SS_REM_PERC_TRESHOLD
#define SS_REM_PERC_TRESHOLD 0.3 // When to stop doubling the size of the block when TIMES_USED_TRESHOLD for OPT and ONE
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
typedef long int __time_t;
typedef long __suseconds_t;
#endif
typedef struct statistics {
cl_ulong nodes_fail;
cl_ulong nodes_expl;
cl_ulong backtracks;
cl_ulong labels;
cl_ulong pruning;
cl_ulong props_ok;
cl_ulong max_depth;
cl_ulong n_solutions;
cl_ulong solve_time;
cl_ulong search_spaces;
} statistics;
// Heuristic for selecting the variable for labeling and for propagating
typedef enum {
// just "BITMAP" conflicts with windows libraries
BITMAP_,
INTERVAL,
DEFAULT
} d_type;
// Type of work to be done
typedef enum {
ONE, // find one solution
CNT, // count all solutions
OPT, // optimization
DEFAULT_W
} work;
// Type of optimization to be done
typedef enum {
DECREASE, // decrease value
INCREASE, // increase value
NONE // initial value
} opt;
// Heuristic for selecting the variable for labeling
typedef enum {
FIRST_FAIL, // less values
INPUT_ORDER, // first created
OCCURRENCE, // more constrained
MAX_REGRET, // largest difference between the two smallest values
SMALLEST, // with the smallest value in its domain
DEFAULT_L
} label_heur;
// Heuristic for selecting the value of the variable to assign
typedef enum {
MIN_VAL, // minimum value
MAX_VAL, // maximum value
SPLIT_VALS, // splits the domain about the mean between minimum and maximum value
DEFAULT_A
} assign_heur;
// to be used when modeling the CSP for printing results
extern bool OPTIMIZING; // true if optimizing
extern bool FINDING_ONE_SOLUTION; // true if finding one solution
extern bool COUNTING_SOLUTIONS; // true if counting all the solutions
extern bool PRINT_STATS; // True if statistic data should be collected and printed
extern statistics STATS; // statistics data on host
extern bool ALL_DEVS; // argument if all devices should be used
extern bool VERBOSE; // argument indicating if the Solver must print more info
extern bool QUIET; // argument indicating if the Solver must print only the number of solutions that were found, or the solution or the best solution
extern char* CSP; // argument indicating the name of the CSP to solve
extern char* FZN_FILE_NAME; // argument indicating the name of the flatzinc file
extern char* MZN_FILE_NAME; // argument indicating the name of the minizinc file
extern char* DZN_FILE_NAME; // argument indicating the name of the minizinc file containing the CSP dimensions
extern unsigned int D_MAX; // maximum domain value on variables CSP
extern unsigned int D_MIN; // minimum domain value on variables CSP
extern unsigned int CL_BITS_; // number of bits to use on devices, calculated after creating all the CSP variables
extern unsigned int CL_WORD_; // number of bits in one word of the bitmap use on devices, calculated after creating all the CSP variables in
// init_csp_and_d_bits() in config.c
extern unsigned int CL_N_WORDS_; // number of words that compose one bitmap on the devices, calculated after creating all the CSP variables in
//init_csp_and_d_bits() in config.c
extern int* CONST_VS_ID; // to store the id of the variables that have only one value on the domain
extern unsigned int N_VS; // number of CSP variables
extern unsigned int V_ID_CNTR; // Unique identifier of each CSP variable
extern unsigned int N_CS; // number of CSP constraints
extern unsigned int C_ID_CNTR; // Unique identifier of each CSP constraint
extern unsigned int N_DEVS; // number of devices to use
extern unsigned int N_GPUs; // requested number of GPUs to use
extern unsigned int N_CPUs; // requested number of CPUs to use
extern unsigned int N_ACCs; // requested number of ACCs to use
extern unsigned int N_SS; // optional number of sub-search spaces to create
extern cl_uint VAL_TO_OPT; // current minimum or maximum value to optimize (not yet found)
extern unsigned int VAR_ID_TO_OPT; // ID of the variable to optimize
extern unsigned int* EXP_VALUES; // Number of values expanded to achieve the required number of sub-search spaces
extern bool PRINT_SOLUTIONS; // set to 1 to print all solutions (the value of each CSP variable) when using only one work-item
extern bool PRINT_CSP; // set to 1 to print all CSP variables with their domains, and all the constraints with the respective variables identified
extern bool MZN2FZN_ONLY; // if the MZN must be converted to FZN, but without solving the CSP
extern bool REV; // set to 1 to propagate the last labeled variable with all the remaining values before backtracking it, or 0 to not
extern int N_VS_TO_LABEL; // Number of variables marked for labeling
extern bool BOOLEAN_VS; // 1 if the CSP use boolean variables
extern unsigned int N_VS_ORIGINAL; // number of CSP variables in the model (before filtering)
extern unsigned int N_CS_ORIGINAL; // number of CSP constraints in the model (before filtering)
extern __time_t init_sec; // Seconds when the Solver started
extern __suseconds_t init_usec; // Microseconds when the Solver started
extern d_type DOMAIN_TYPE; // domain representation to use
extern size_t DOMAIN_SIZE; // size of the domain type used on this device
extern label_heur LABEL_MODE_D; // argument indicating the default mode for selecting the variable to label
extern assign_heur ASSIGN_MODE_D; // argument indicating the default mode for selecting the value to assign to the variable for labeling
extern label_heur LABEL_MODE; // argument indicating the mode for selecting the variable to label
extern assign_heur ASSIGN_MODE; // argument indicating the mode for selecting the value to assign to the variable for labeling
extern label_heur LABEL_MODE_COM; // argument indicating the mode for selecting the variable to label inserted in command line
extern assign_heur ASSIGN_MODE_COM; // argument indicating the mode for selecting the value to assign to the variable for labeling inserted in command line
extern work WORK; // argument indicating if one solution, all solutions or optimization is wanted
extern work WORK_D; // argument indicating if one solution, all solutions or optimization is default
extern opt OPT_MODE; // if optimization is to reduce a value, or to increase it
extern bool CS_IGNORE; // 1 to when constraint is fixed or its propagator is unable to propagate more, ignore it in the following propagations
// only for bitmap domains (not for intervals)
extern bool CAN_USE_INTERVALS; // true if variable domains are contiguous
void print_help();
void load_args(int *argc, char **argv[], int* csp_dims, char* csp_inst_name);
void parse_args(int *argc, char *argv[], int* csp_dims, char* csp_inst_name);
void init_csp_and_d_bits();
void print_CSP();
void clear_csp();
char* get_label_heur();
char* get_assign_heur();
int get_int_len(int x);
bool can_run_command(const char *cmd);
#define FUNC_PREFIX static inline
// never used. Just for compilation and syntax correction
#define CL_WORK CL_OPT
#define VARS cl_var_bitmap
#define VARS_PROP cl_var_p_bitmap
#define __global
#define __constant
#define __local
#define CL_INTS_MEM
#define CL_MEMORY
#define CL_CS_MEM
#define CL_VS_MEM
#define CL_B_DS_MEM
#define CL_ONE 0
#define CL_CNT 1
#define CL_OPT 2
#define CL_FIRST_FAIL 0
#define CL_INPUT_ORDER 1
#define CL_OCCURRENCE 2
#define CL_MAX_REGRET 3
#define CL_SMALLEST 4
#define CL_LABEL_M CL_INPUT_ORDER
#define CL_MIN_VAL 0
#define CL_MAX_VAL 1
#define CL_SPLIT_VALS 2
#define CL_ASSIGN_M MIN_VAL
#define CL_SPLIT_VALUES_EXT 1
#define CL_DECREASE 0
#define CL_INCREASE 1
#define CL_OPT_M CL_DECREASE
#define CL_VAR_ID_TO_OPT 0
#define CL_USE_GLOBAL_MEM 0
#define CL_USE_LOCAL_MEM 1
#define CL_MEM CL_USE_LOCAL_MEM
#define CL_MEMORY
#define CL_LIN_TERMS 0
#define CL_LIN_V_TERMS 0
#define CL_SUM_TERMS 0
#define CL_SUM_VAR_TERMS 0
#define CL_N_VS 10
#define CL_N_CS 10
#define CL_N_VS_TO_LABEL 0
#define CL_TO_LABEL_THRESHOLD 0
#define CL_N_VS_CS 0
#define CL_N_CS_VS 0
#define CL_INTS_CONST 1
#define CL_B_DS_CONST 1
#define CL_VS_CONST 1
#define CL_CS_CONST 1
#define CL_N_DEVS 2
#define CL_STATS 1
#define CL_PRE_LABELING 0
#define CL_N_SHARED_SS 0
#define CL_SEQ_SEARCH 1
#define CL_CS_IGNORE USE_CS_IGNORE
#define CL_FILTERING PRE_FILTER
#define CL_BOOLEAN_VS USE_BOOLEAN_VS
#define CL_USE_N_BUFFERS 1
#define CL_N_TERMS 0
// Device configuration (used during kernel compilation)
#else
#define FUNC_PREFIX
#if CUDA_VERSION == 0
#define CUDA_FUNC
#endif
#ifndef CL_PRE_LABELING
#define CL_PRE_LABELING 0
#endif
// type of exploration objective
#define CL_ONE 0
#define CL_CNT 1
#define CL_OPT 2
#ifndef CL_WORK
#define CL_WORK CL_CNT
#endif
// Heuristic for selecting the variable for labeling
#define CL_FIRST_FAIL 0 // less values
#define CL_INPUT_ORDER 1 // first on the array
#define CL_OCCURRENCE 2 // more constrained
#define CL_MAX_REGRET 3 // largest difference between the two smallest values
#define CL_SMALLEST 4 // // with the smallest value in its domain
#ifndef CL_LABEL_M
#define CL_LABEL_M CL_INPUT_ORDER
#endif
// Heuristic for selecting the value of the variable for assign on labeling
#define CL_MIN_VAL 0 // minimum value
#define CL_MAX_VAL 1 // maximum value
#define CL_SPLIT_VALS 2 // Splits the domain about the mean between minimum and maximum value
#ifndef CL_ASSIGN_M
#define CL_ASSIGN_M CL_MIN_VAL
#endif
// Value to extend some buffers when CL_SPLIT_VALS heuristic is used
#ifndef CL_SPLIT_VALUES_EXT
#define CL_SPLIT_VALUES_EXT 1
#endif
// Type of optimization to be done
#define CL_DECREASE 0
#define CL_INCREASE 1
#ifndef CL_OPT_M
#define CL_OPT_M CL_DECREASE
#endif
// ID of the variable to optimize
#ifndef CL_VAR_ID_TO_OPT
#define CL_VAR_ID_TO_OPT 0
#endif
// if kernel should use also local or only global memory
#define CL_USE_GLOBAL_MEM 0
#define CL_USE_LOCAL_MEM 1
#ifndef CL_MEM
#define CL_MEM CL_USE_LOCAL_MEM
#endif
#if CL_MEM == CL_USE_LOCAL_MEM
#define CL_MEMORY __local
#else
#define CL_MEMORY __global
#endif
// if each kernel constant argument fits constant buffers
#if CL_INTS_CONST == 1
#define CL_INTS_MEM __constant
#else
#define CL_INTS_MEM __global
#endif
#if CL_B_DS_CONST == 1
#define CL_B_DS_MEM __constant
#else
#define CL_B_DS_MEM __global
#endif
#if CL_VS_CONST == 1
#define CL_VS_MEM __constant
#else
#define CL_VS_MEM __global
#endif
#if CL_CS_CONST == 1
#define CL_CS_MEM __constant
#else
#define CL_CS_MEM __global
#endif
// size of auxiliary array (for linear..., sum and sum_var constraints)
#ifndef CL_LIN_LT_TERMS
#define CL_LIN_LT_TERMS 0
#endif
#ifndef CL_LIN_NE_TERMS
#define CL_LIN_NE_TERMS 0
#endif
#ifndef CL_LIN_TERMS
#define CL_LIN_TERMS 0
#endif
#ifndef CL_LIN_V_TERMS
#define CL_LIN_V_TERMS 0
#endif
#ifndef CL_SUM_TERMS
#define CL_SUM_TERMS 0
#endif
#ifndef CL_SUM_VAR_TERMS
#define CL_SUM_VAR_TERMS 0
#endif
// for type usage on device
#define cl_uint uint
#define cl_int int
#define cl_ulong ulong
#define cl_char char
#define cl_ushort ushort
// Sequential search
#ifndef CL_SEQ_SEARCH
#define CL_SEQ_SEARCH 0
#endif
// Number of stores for work-sharing
#ifndef CL_N_SHARED_SS
#define CL_N_SHARED_SS 0
#endif
#ifndef CL_BOOLEAN_VS
#define CL_BOOLEAN_VS 0
#endif
#ifndef CL_CS_IGNORE
#define CL_CS_IGNORE 1
#endif
#ifndef CL_USE_N_BUFFERS
#define CL_USE_N_BUFFERS 1
#endif
#ifndef CL_N_TERMS
#define CL_N_TERMS 0
#endif
#endif
#endif /* SRC_CONFIG_H_ */