devices.h
11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*
* devices.h
*
* Created on: 04/01/2017
* Author: Pedro
*/
#ifndef SRC_DEVICES_H_
#define SRC_DEVICES_H_
#include <stdbool.h>
#include <stddef.h>
#include "CL/cl.h"
#include "CL/cl_platform.h"
#include "config.h"
#include "kernels/cl_constraints.h"
#include "kernels/cl_variables.h"
#if RUN_IN_CUDA
#include <cuda.h>
#include <builtin_types.h>
#endif
#define MAX_DEVS 10 // maximum number of devices on a single machine
typedef struct device_info {
cl_platform_id platform_id; // platform ID
cl_device_id device_id; // device ID
cl_context context; // Context for running the kernel
cl_program prog; // Compiled program for full exploration on device
int dev_type_n; // number of the device of this type to use
cl_device_type type; // GPU, CPU or MIC
size_t def_n_wg; // default number of work-groups to use with this device
size_t def_n_wi_wg; // default number of work-items per work-group to use with this device
size_t n_wg; // number of work-groups to use with this device
size_t n_wi_wg; // number of work-item per work-group to use with this device
cl_ulong global_mem_max_alloc; // size of the global memory available to use in each OpenCL buffer
cl_ulong global_mem_size; // size of the global memory
cl_ulong local_mem_max_alloc; // size of the local memory available to use with OpenCL
cl_ulong global_mem_used; // size of the global memory of this device, in bytes
cl_ulong constant_mem_max_alloc; // size of the constant memory available to use on each constant buffer
bool use_local_mem; // true if local memory should be used
char *dev_name; // this device name
cl_uint compute_units; // number of compute units on this device
cl_uint max_freq; // maximum cores frequency in MHz
float rel_speed_expect; // expected speed when comparing the hardware of all the used devices. From 0 to 1, where 1 is the fastest.
unsigned int stores_explored; // Number of stores explored by this device
unsigned int block_size; // Number of stores send to each device at a time
unsigned int first_block_size; // Number of stores send to each device in the first block
unsigned int first_store; // Number of the first store to explore
unsigned int last_store; // Number of the last store to explore
unsigned int times_used; // number of times the kernel was executed by this device
unsigned int n_ss_mult; // number used to multiply the number of sub-search spaces sent to this device
float last_1ss_solv_time; // milliseconds needed to solve one sub-search space on the last run
float avg_1ss_solv_time; // average milliseconds needed to solve one sub-search space
float max_1ss_solv_time; // maximum milliseconds needed to solve one sub-search space
double last_explor_time; // time taken to explore the last block
unsigned int n_fast_blocks; // Number of sequential times that this device took less than 1s to solve a block;
float rank; // relative speed of this device to the other devices. From 0 to 1, where 1 is the fastest.
bool ranked; // true if this device was already ranked
bool first_time_ranked; // true if this is the first time this device will get a block with all the devices already ranked
bool working; // true if this device was not released yet
cl_ulong ms_finish_time; // time in milliseconds when the device finished all
cl_ulong ms_solve_time; // total time that kernel was executed
cl_ulong props_total; // total propagations made
cl_ulong last_props; // last block propagations made
float last_time_prop; // last block time per 1000 propagations
float avg_time_prop; // average time per 1000 propagations
float max_time_prop; // maximum time per 1000 propagations
unsigned int n_ss_mult_max; // maximum ss multiplier available for the current CSP with the current n_ss
cl_ulong sols_found; // total number of solutions found
unsigned int n_buffers; // number of buffers used for backtracking history
int n_terms; // maximum terms needed on a propagator for generic_mem
unsigned int *exp_values; // Number of values expanded to achieve the required number of sub-search spaces
unsigned int n_empty_blocks; // Number of times the device would get a block with 0 stores
} device_info;
typedef struct device_args {
cl_kernel kernel; // kernel to execute
cl_command_queue cq; // Queue for buffering devices operations
size_t wi_local; // number of work-items per work-group to create
size_t wi_total; // total number of work-items to create
unsigned int n_vs_to_label; // number of variables that must be labeled
unsigned int split_values_ext; // number of times the buffer for backtracking history has to be extended to allow split_values heuristic
unsigned int n_vs_cs; // number of all variables in all constraints
unsigned int n_cs_vs; // number of all constraints in all variables
unsigned int n_const_cs; // number of all constant values in all constraints with more than one constant value
cl_mem b_ds_mem; // to send domains as bitmaps to the device with the minimum size needed
void **b_ds; // to send domains as bitmaps to the device with the minimum size needed
size_t b_ds_size; // size of bitmaps_aux and bitmaps_aux_mem
bool b_ds_const; // true if b_ds_mem fits in constant memory
cl_var_bitmap *cl_vs_bitmaps; // structure with information about the CSP variables, when working with bitmap domains
size_t domains_size; // size of domains_mem buffer
void **bitmaps; // Buffer for variables of bitmap type on host
cl_mem ints_mem; // Buffer for variables of int type on device
cl_int *ints; // Buffer for variables of int type on host
size_t ints_size; // size of ints buffer
bool ints_const; // true if this buffer fits in constant memory
cl_mem domains_mem; // Buffer for variables of bitmap or interval type on device
interval *intervals; // Buffer for variables of interval type on host
cl_mem cl_cs_mem; // Buffer for variables of cl_constr type on device
size_t cl_cs_size; // size of cl_cs_mem buffer
bool cl_cs_const; // true if this buffer fits in constant memory
cl_mem atoms_mem; // Buffer for variables changed with atomic operations on device
cl_uint *atoms; // Buffer for variables changed with atomic operations on host
size_t atoms_size; // size of atoms buffer
cl_mem cl_vs_mem; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
size_t cl_vs_size; // size of cl_vs_mem buffer
bool cl_vs_const; // true if this buffer fits in constant memory
cl_var_interval *cl_vs_intervals; // Buffer for variables of cl_var_interval1 type on host
cl_constr *cl_cs; // Buffer for variables of cl_constr_expl type on host
cl_mem vs_id_to_prop_mem; // Buffer for vs_id_to_prop on device
cl_ushort *vs_id_to_prop; // Buffer for vs_id_to_prop on host
size_t vs_id_to_prop_size; // size of vs_id_to_prop/vs_id_to_prop_mem buffer
cl_mem cl_vs_prop_mem; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
size_t cl_vs_prop_size; // size of cl_vs_prop_mem buffer
cl_var_p_interval *cl_vs_intervals_prop; // Buffer for variables of cl_var_p_interval1 type on host
cl_mem stats_mem; // buffer for statistics data on device
cl_ulong *stats; // buffer for statistics data on host
size_t stats_size; // size of stats buffer
cl_mem props_mem; // Buffer for counting propagations done on device
cl_ulong *props; // Buffer for counting propagations done on host
size_t props_size; // size of props buffer
cl_mem shared_stores_mem; // Buffer for work-sharing on device
size_t shared_stores_size; // size of props buffer
cl_mem shared_stores_flag_mem; // Buffer for signaling the state of each work-sharing store on device
cl_uint n_shared_stores; // number of stores for work-sharing
cl_int *shared_stores_flag; // Buffer for signaling the state of each work-sharing store on host
size_t shared_stores_flag_size; // size of props buffer
cl_mem filt_domains_mem; // Buffer for variables of bitmap or interval type on device for the filtering result
size_t filt_domains_size; // size of domains_mem buffer for the filtering result
interval *filt_intervals; // Buffer for variables of interval type on host for the filtering result
void **filt_bitmaps; // Buffer for variables of bitmap type on host for the filtering result
cl_mem filt_cs_mem; // Buffer for constraint ignore flag on device while filtering
size_t filt_cs_size; // size of filt_cs_mem buffer for the filtering
cl_char *filt_cs; // Buffer for constraint ignore flag on host after filtering
cl_mem backtrack_mem1; // Buffer for backtracking history on device
#if USE_MORE_BUFFERS
cl_mem backtrack_mem2; // Buffer for backtracking history on device
cl_mem backtrack_mem3; // Buffer for backtracking history on device
cl_mem backtrack_mem4; // Buffer for backtracking history on device
#endif
size_t backtrack_size; // Size of the buffer for backtracking history on device
cl_mem generic_mem; // Buffer for backtracking indexes and generic integer usages on device
cl_int *generic; // Buffer for backtracking indexes and generic integer usages on host
size_t generic_size; // Size of the generic buffer
cl_mem cs_ignore_mem; // Buffer for storing the flags that indicate if a constraint can prune more on device
size_t cs_ignore_size; // Size of the cs_ignore_mem buffer on device
#if RUN_IN_CUDA
CUdevice device_cu; // CUDA device
CUcontext context_cu; // CUDA context
CUmodule module_cu; // CUDA module
CUfunction function_cu; // CUDA function
void** kernel_args_cu; // buffer for cuda kernel arguments
size_t shared_memory_size_cu; // shared memory needed for kernel
CUdeviceptr b_ds_mem_cu; // to send domains as bitmaps to the device with the minimum size needed
CUdeviceptr ints_mem_cu; // Buffer for variables of int type on device
CUdeviceptr domains_mem_cu; // Buffer for variables of bitmap or interval type on device
CUdeviceptr cl_cs_mem_cu; // Buffer for variables of cl_constr type on device
CUdeviceptr atoms_mem_cu; // Buffer for variables changed with atomic operations on device
CUdeviceptr cl_vs_mem_cu; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
CUdeviceptr vs_id_to_prop_mem_cu; // Buffer for vs_id_to_prop on device
CUdeviceptr cl_vs_prop_mem_cu; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
CUdeviceptr stats_mem_cu; // buffer for statistics data on device
CUdeviceptr props_mem_cu; // Buffer for counting propagations done on device
CUdeviceptr shared_stores_mem_cu; // Buffer for work-sharing on device
CUdeviceptr shared_stores_flag_mem_cu; // Buffer for signaling the state of each work-sharing store on device
CUdeviceptr filt_domains_mem_cu; // Buffer for variables of bitmap or interval type on device for the filtering result
CUdeviceptr filt_cs_mem_cu; // Buffer for constraint ignore flag on device while filtering
CUdeviceptr backtrack_mem1_cu; // Buffer for backtracking history on device
#if USE_MORE_BUFFERS
CUdeviceptr backtrack_mem2_cu; // Buffer for backtracking history on device
CUdeviceptr backtrack_mem3_cu; // Buffer for backtracking history on device
CUdeviceptr backtrack_mem4_cu; // Buffer for backtracking history on device
#endif
CUdeviceptr generic_mem_cu; // Buffer for backtracking history on device
CUdeviceptr cs_ignore_mem_cu; // Buffer for storing the flags that indicate if a constraint can prune more on device
#endif
} device_args;
extern device_info DEVICES_INFO[MAX_DEVS]; // Information of the devices to use
extern device_args DEVICES_ARGS[MAX_DEVS]; // Device arguments (buffers, etc.)
void calculate_rel_expect_speed(device_info *dev_info);
void set_buffs_size(device_args *dev_args, device_info *dev_info, bool filtering);
#endif /* SRC_DEVICES_H_ */