94b2b13d
Pedro Roque
PHACT source
|
1
2
3
4
|
/*
* devices.h
*
* Created on: 04/01/2017
|
4d26a735
Pedro Roque
Increased recogni...
|
5
|
* Author: pedro
|
94b2b13d
Pedro Roque
PHACT source
|
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
*/
#ifndef SRC_DEVICES_H_
#define SRC_DEVICES_H_
#include <stdbool.h>
#include <stddef.h>
#include "CL/cl.h"
#include "CL/cl_platform.h"
#include "config.h"
#include "kernels/cl_constraints.h"
#include "kernels/cl_variables.h"
#if RUN_IN_CUDA
#include <cuda.h>
#include <builtin_types.h>
#endif
#define MAX_DEVS 10 // maximum number of devices on a single machine
typedef struct device_info {
cl_platform_id platform_id; // platform ID
cl_device_id device_id; // device ID
|
4d26a735
Pedro Roque
Increased recogni...
|
31
32
33
|
cl_context context; // Context for running the kernel
cl_program prog; // Compiled program for full exploration on device
int dev_type_n; // number of the device of this type to use
|
94b2b13d
Pedro Roque
PHACT source
|
34
35
36
37
38
39
|
cl_device_type type; // GPU, CPU or MIC
size_t def_n_wg; // default number of work-groups to use with this device
size_t def_n_wi_wg; // default number of work-items per work-group to use with this device
size_t n_wg; // number of work-groups to use with this device
size_t n_wi_wg; // number of work-item per work-group to use with this device
cl_ulong global_mem_max_alloc; // size of the global memory available to use in each OpenCL buffer
|
4d26a735
Pedro Roque
Increased recogni...
|
40
|
cl_ulong global_mem_size; // size of the global memory
|
94b2b13d
Pedro Roque
PHACT source
|
41
|
cl_ulong local_mem_max_alloc; // size of the local memory available to use with OpenCL
|
4d26a735
Pedro Roque
Increased recogni...
|
42
|
cl_ulong global_mem_used; // size of the global memory of this device, in bytes
|
94b2b13d
Pedro Roque
PHACT source
|
43
|
cl_ulong constant_mem_max_alloc; // size of the constant memory available to use on each constant buffer
|
4d26a735
Pedro Roque
Increased recogni...
|
44
45
|
bool use_local_mem; // true if local memory should be used
char* dev_name; // this device name
|
94b2b13d
Pedro Roque
PHACT source
|
46
|
cl_uint compute_units; // number of compute units on this device
|
4d26a735
Pedro Roque
Increased recogni...
|
47
|
cl_uint max_freq; // maximum cores frequency in MHz
|
94b2b13d
Pedro Roque
PHACT source
|
48
49
|
float rel_speed_expect; // expected speed when comparing the hardware of all the used devices. From 0 to 1, where 1 is the fastest.
unsigned int stores_explored; // Number of stores explored by this device
|
4d26a735
Pedro Roque
Increased recogni...
|
50
|
unsigned int block_size; // Number of stores send to each device at a time
|
94b2b13d
Pedro Roque
PHACT source
|
51
52
53
54
55
56
57
58
|
unsigned int first_block_size; // Number of stores send to each device in the first block
unsigned int first_store; // Number of the first store to explore
unsigned int last_store; // Number of the last store to explore
unsigned int times_used; // number of times the kernel was executed by this device
unsigned int n_ss_mult; // number used to multiply the number of sub-search spaces sent to this device
float last_1ss_solv_time; // milliseconds needed to solve one sub-search space on the last run
float avg_1ss_solv_time; // average milliseconds needed to solve one sub-search space
float max_1ss_solv_time; // maximum milliseconds needed to solve one sub-search space
|
4d26a735
Pedro Roque
Increased recogni...
|
59
|
double last_explor_time; // time taken to explore the last block
|
94b2b13d
Pedro Roque
PHACT source
|
60
61
62
63
|
unsigned int n_fast_blocks; // Number of sequential times that this device took less than 1s to solve a block;
float rank; // relative speed of this device to the other devices. From 0 to 1, where 1 is the fastest.
bool ranked; // true if this device was already ranked
bool first_time_ranked; // true if this is the first time this device will get a block with all the devices already ranked
|
4d26a735
Pedro Roque
Increased recogni...
|
64
|
bool working; // true if this device was not released yet
|
94b2b13d
Pedro Roque
PHACT source
|
65
66
67
68
69
70
71
72
|
cl_ulong ms_finish_time; // time in milliseconds when the device finished all
cl_ulong ms_solve_time; // total time that kernel was executed
cl_ulong props_total; // total propagations made
cl_ulong last_props; // last block propagations made
float last_time_prop; // last block time per 1000 propagations
float avg_time_prop; // average time per 1000 propagations
float max_time_prop; // maximum time per 1000 propagations
unsigned int n_ss_mult_max; // maximum ss multiplier available for the current CSP with the current n_ss
|
4d26a735
Pedro Roque
Increased recogni...
|
73
|
cl_ulong sols_found; // total number of solutions found
|
94b2b13d
Pedro Roque
PHACT source
|
74
|
unsigned int n_buffers; // number of buffers used for backtracking history
|
4d26a735
Pedro Roque
Increased recogni...
|
75
76
|
int n_terms; // maximum terms needed on a propagator for generic_mem
unsigned int* exp_values; // Number of values expanded to achieve the required number of sub-search spaces
|
94b2b13d
Pedro Roque
PHACT source
|
77
78
79
80
81
82
83
84
85
|
unsigned int n_empty_blocks; // Number of times the device would get a block with 0 stores
} device_info;
typedef struct device_args {
cl_kernel kernel; // kernel to execute
cl_command_queue cq; // Queue for buffering devices operations
size_t wi_local; // number of work-items per work-group to create
size_t wi_total; // total number of work-items to create
|
4d26a735
Pedro Roque
Increased recogni...
|
86
|
unsigned int n_vs_to_label; // number of variables that must be labeled
|
94b2b13d
Pedro Roque
PHACT source
|
87
88
89
90
91
|
unsigned int split_values_ext; // number of times the buffer for backtracking history has to be extended to allow split_values heuristic
unsigned int n_vs_cs; // number of all variables in all constraints
unsigned int n_cs_vs; // number of all constraints in all variables
unsigned int n_const_cs; // number of all constant values in all constraints with more than one constant value
cl_mem b_ds_mem; // to send domains as bitmaps to the device with the minimum size needed
|
4d26a735
Pedro Roque
Increased recogni...
|
92
|
void** b_ds; // to send domains as bitmaps to the device with the minimum size needed
|
94b2b13d
Pedro Roque
PHACT source
|
93
94
|
size_t b_ds_size; // size of bitmaps_aux and bitmaps_aux_mem
bool b_ds_const; // true if b_ds_mem fits in constant memory
|
4d26a735
Pedro Roque
Increased recogni...
|
95
|
cl_var_bitmap* cl_vs_bitmaps; // structure with information about the CSP variables, when working with bitmap domains
|
94b2b13d
Pedro Roque
PHACT source
|
96
|
size_t domains_size; // size of domains_mem buffer
|
4d26a735
Pedro Roque
Increased recogni...
|
97
|
void** bitmaps; // Buffer for variables of bitmap type on host
|
94b2b13d
Pedro Roque
PHACT source
|
98
|
cl_mem ints_mem; // Buffer for variables of int type on device
|
4d26a735
Pedro Roque
Increased recogni...
|
99
|
cl_int* ints; // Buffer for variables of int type on host
|
94b2b13d
Pedro Roque
PHACT source
|
100
101
102
|
size_t ints_size; // size of ints buffer
bool ints_const; // true if this buffer fits in constant memory
cl_mem domains_mem; // Buffer for variables of bitmap or interval type on device
|
4d26a735
Pedro Roque
Increased recogni...
|
103
|
interval* intervals; // Buffer for variables of interval type on host
|
94b2b13d
Pedro Roque
PHACT source
|
104
105
106
|
cl_mem cl_cs_mem; // Buffer for variables of cl_constr type on device
size_t cl_cs_size; // size of cl_cs_mem buffer
bool cl_cs_const; // true if this buffer fits in constant memory
|
4d26a735
Pedro Roque
Increased recogni...
|
107
108
|
cl_mem atoms_mem; // Buffer for variables changed with atomic operations on device
cl_uint* atoms; // Buffer for variables changed with atomic operations on host
|
94b2b13d
Pedro Roque
PHACT source
|
109
|
size_t atoms_size; // size of atoms buffer
|
4d26a735
Pedro Roque
Increased recogni...
|
110
|
cl_mem cl_vs_mem; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
|
94b2b13d
Pedro Roque
PHACT source
|
111
112
|
size_t cl_vs_size; // size of cl_vs_mem buffer
bool cl_vs_const; // true if this buffer fits in constant memory
|
4d26a735
Pedro Roque
Increased recogni...
|
113
114
|
cl_var_interval* cl_vs_intervals; // Buffer for variables of cl_var_interval1 type on host
cl_constr* cl_cs; // Buffer for variables of cl_constr_expl type on host
|
94b2b13d
Pedro Roque
PHACT source
|
115
|
cl_mem vs_id_to_prop_mem; // Buffer for vs_id_to_prop on device
|
4d26a735
Pedro Roque
Increased recogni...
|
116
|
cl_ushort* vs_id_to_prop; // Buffer for vs_id_to_prop on host
|
94b2b13d
Pedro Roque
PHACT source
|
117
118
119
|
size_t vs_id_to_prop_size; // size of vs_id_to_prop/vs_id_to_prop_mem buffer
cl_mem cl_vs_prop_mem; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
size_t cl_vs_prop_size; // size of cl_vs_prop_mem buffer
|
4d26a735
Pedro Roque
Increased recogni...
|
120
|
cl_var_p_interval* cl_vs_intervals_prop; // Buffer for variables of cl_var_p_interval1 type on host
|
94b2b13d
Pedro Roque
PHACT source
|
121
|
cl_mem stats_mem; // buffer for statistics data on device
|
4d26a735
Pedro Roque
Increased recogni...
|
122
|
cl_ulong* stats; // buffer for statistics data on host
|
94b2b13d
Pedro Roque
PHACT source
|
123
124
|
size_t stats_size; // size of stats buffer
cl_mem props_mem; // Buffer for counting propagations done on device
|
4d26a735
Pedro Roque
Increased recogni...
|
125
|
cl_ulong* props; // Buffer for counting propagations done on host
|
94b2b13d
Pedro Roque
PHACT source
|
126
127
128
129
|
size_t props_size; // size of props buffer
cl_mem shared_stores_mem; // Buffer for work-sharing on device
size_t shared_stores_size; // size of props buffer
cl_mem shared_stores_flag_mem; // Buffer for signaling the state of each work-sharing store on device
|
4d26a735
Pedro Roque
Increased recogni...
|
130
131
|
cl_uint n_shared_stores; // number of stores for work-sharing
cl_int* shared_stores_flag; // Buffer for signaling the state of each work-sharing store on host
|
94b2b13d
Pedro Roque
PHACT source
|
132
133
134
|
size_t shared_stores_flag_size; // size of props buffer
cl_mem filt_domains_mem; // Buffer for variables of bitmap or interval type on device for the filtering result
size_t filt_domains_size; // size of domains_mem buffer for the filtering result
|
4d26a735
Pedro Roque
Increased recogni...
|
135
136
|
interval* filt_intervals; // Buffer for variables of interval type on host for the filtering result
void** filt_bitmaps; // Buffer for variables of bitmap type on host for the filtering result
|
94b2b13d
Pedro Roque
PHACT source
|
137
138
|
cl_mem filt_cs_mem; // Buffer for constraint ignore flag on device while filtering
size_t filt_cs_size; // size of filt_cs_mem buffer for the filtering
|
4d26a735
Pedro Roque
Increased recogni...
|
139
|
cl_char* filt_cs; // Buffer for constraint ignore flag on host after filtering
|
94b2b13d
Pedro Roque
PHACT source
|
140
141
142
143
144
145
146
|
cl_mem backtrack_mem1; // Buffer for backtracking history on device
#if USE_MORE_BUFFERS
cl_mem backtrack_mem2; // Buffer for backtracking history on device
cl_mem backtrack_mem3; // Buffer for backtracking history on device
cl_mem backtrack_mem4; // Buffer for backtracking history on device
#endif
size_t backtrack_size; // Size of the buffer for backtracking history on device
|
4d26a735
Pedro Roque
Increased recogni...
|
147
148
149
|
cl_mem generic_mem; // Buffer for backtracking history on device
size_t generic_size; // Size of the buffer for backtracking history on device
cl_mem cs_ignore_mem; // Buffer for storing the flags that indicate if a constraint can prune more on device
|
94b2b13d
Pedro Roque
PHACT source
|
150
151
152
153
|
size_t cs_ignore_size; // Size of the cs_ignore_mem buffer on device
#if RUN_IN_CUDA
CUdevice device_cu;
|
4d26a735
Pedro Roque
Increased recogni...
|
154
155
156
157
158
|
CUcontext context_cu;
CUmodule module_cu;
CUfunction function_cu;
void** kernel_args_cu; // buffer for cuda kernel arguments
size_t shared_memory_size_cu; // shared memory needed for kernel
|
94b2b13d
Pedro Roque
PHACT source
|
159
|
CUdeviceptr b_ds_mem_cu; // to send domains as bitmaps to the device with the minimum size needed
|
4d26a735
Pedro Roque
Increased recogni...
|
160
161
|
CUdeviceptr ints_mem_cu; // Buffer for variables of int type on device
CUdeviceptr domains_mem_cu; // Buffer for variables of bitmap or interval type on device
|
94b2b13d
Pedro Roque
PHACT source
|
162
163
164
165
166
167
|
CUdeviceptr cl_cs_mem_cu; // Buffer for variables of cl_constr type on device
CUdeviceptr atoms_mem_cu; // Buffer for variables changed with atomic operations on device
CUdeviceptr cl_vs_mem_cu; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
CUdeviceptr vs_id_to_prop_mem_cu; // Buffer for vs_id_to_prop on device
CUdeviceptr cl_vs_prop_mem_cu; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
CUdeviceptr stats_mem_cu; // buffer for statistics data on device
|
4d26a735
Pedro Roque
Increased recogni...
|
168
169
170
|
CUdeviceptr props_mem_cu; // Buffer for counting propagations done on device
CUdeviceptr shared_stores_mem_cu; // Buffer for work-sharing on device
CUdeviceptr shared_stores_flag_mem_cu; // Buffer for signaling the state of each work-sharing store on device
|
94b2b13d
Pedro Roque
PHACT source
|
171
|
CUdeviceptr filt_domains_mem_cu; // Buffer for variables of bitmap or interval type on device for the filtering result
|
4d26a735
Pedro Roque
Increased recogni...
|
172
|
CUdeviceptr filt_cs_mem_cu; // Buffer for constraint ignore flag on device while filtering
|
94b2b13d
Pedro Roque
PHACT source
|
173
174
175
176
177
178
179
|
CUdeviceptr backtrack_mem1_cu; // Buffer for backtracking history on device
#if USE_MORE_BUFFERS
CUdeviceptr backtrack_mem2_cu; // Buffer for backtracking history on device
CUdeviceptr backtrack_mem3_cu; // Buffer for backtracking history on device
CUdeviceptr backtrack_mem4_cu; // Buffer for backtracking history on device
#endif
CUdeviceptr generic_mem_cu; // Buffer for backtracking history on device
|
4d26a735
Pedro Roque
Increased recogni...
|
180
|
CUdeviceptr cs_ignore_mem_cu; // Buffer for storing the flags that indicate if a constraint can prune more on device
|
94b2b13d
Pedro Roque
PHACT source
|
181
182
183
184
185
186
187
188
|
#endif
} device_args;
extern device_info DEVICES_INFO[MAX_DEVS]; // Information of the devices to use
extern device_args DEVICES_ARGS[MAX_DEVS]; // Device arguments (buffers, etc.)
void calculate_rel_expect_speed(device_info* dev_info);
|
4d26a735
Pedro Roque
Increased recogni...
|
189
190
|
void set_buffs_size(device_args* dev_args, device_info* dev_info, bool filtering);
|
94b2b13d
Pedro Roque
PHACT source
|
191
|
#endif /* SRC_DEVICES_H_ */
|