/* * devices.h * * Created on: 04/01/2017 * Author: pedro */ #ifndef SRC_DEVICES_H_ #define SRC_DEVICES_H_ #include #include #include "CL/cl.h" #include "CL/cl_platform.h" #include "config.h" #include "kernels/cl_constraints.h" #include "kernels/cl_variables.h" #if RUN_IN_CUDA #include #include #endif #define MAX_DEVS 10 // maximum number of devices on a single machine typedef struct device_info { cl_platform_id platform_id; // platform ID cl_device_id device_id; // device ID cl_context context; // Context for running the kernel cl_program prog; // Compiled program for full exploration on device int dev_type_n; // number of the device of this type to use cl_device_type type; // GPU, CPU or MIC size_t def_n_wg; // default number of work-groups to use with this device size_t def_n_wi_wg; // default number of work-items per work-group to use with this device size_t n_wg; // number of work-groups to use with this device size_t n_wi_wg; // number of work-item per work-group to use with this device cl_ulong global_mem_max_alloc; // size of the global memory available to use in each OpenCL buffer cl_ulong global_mem_size; // size of the global memory cl_ulong local_mem_max_alloc; // size of the local memory available to use with OpenCL cl_ulong global_mem_used; // size of the global memory of this device, in bytes cl_ulong constant_mem_max_alloc; // size of the constant memory available to use on each constant buffer bool use_local_mem; // true if local memory should be used char* dev_name; // this device name cl_uint compute_units; // number of compute units on this device cl_uint max_freq; // maximum cores frequency in MHz float rel_speed_expect; // expected speed when comparing the hardware of all the used devices. From 0 to 1, where 1 is the fastest. unsigned int stores_explored; // Number of stores explored by this device unsigned int block_size; // Number of stores send to each device at a time unsigned int first_block_size; // Number of stores send to each device in the first block unsigned int first_store; // Number of the first store to explore unsigned int last_store; // Number of the last store to explore unsigned int times_used; // number of times the kernel was executed by this device unsigned int n_ss_mult; // number used to multiply the number of sub-search spaces sent to this device float last_1ss_solv_time; // milliseconds needed to solve one sub-search space on the last run float avg_1ss_solv_time; // average milliseconds needed to solve one sub-search space float max_1ss_solv_time; // maximum milliseconds needed to solve one sub-search space double last_explor_time; // time taken to explore the last block unsigned int n_fast_blocks; // Number of sequential times that this device took less than 1s to solve a block; float rank; // relative speed of this device to the other devices. From 0 to 1, where 1 is the fastest. bool ranked; // true if this device was already ranked bool first_time_ranked; // true if this is the first time this device will get a block with all the devices already ranked bool working; // true if this device was not released yet cl_ulong ms_finish_time; // time in milliseconds when the device finished all cl_ulong ms_solve_time; // total time that kernel was executed cl_ulong props_total; // total propagations made cl_ulong last_props; // last block propagations made float last_time_prop; // last block time per 1000 propagations float avg_time_prop; // average time per 1000 propagations float max_time_prop; // maximum time per 1000 propagations unsigned int n_ss_mult_max; // maximum ss multiplier available for the current CSP with the current n_ss cl_ulong sols_found; // total number of solutions found unsigned int n_buffers; // number of buffers used for backtracking history int n_terms; // maximum terms needed on a propagator for generic_mem unsigned int* exp_values; // Number of values expanded to achieve the required number of sub-search spaces unsigned int n_empty_blocks; // Number of times the device would get a block with 0 stores } device_info; typedef struct device_args { cl_kernel kernel; // kernel to execute cl_command_queue cq; // Queue for buffering devices operations size_t wi_local; // number of work-items per work-group to create size_t wi_total; // total number of work-items to create unsigned int n_vs_to_label; // number of variables that must be labeled unsigned int split_values_ext; // number of times the buffer for backtracking history has to be extended to allow split_values heuristic unsigned int n_vs_cs; // number of all variables in all constraints unsigned int n_cs_vs; // number of all constraints in all variables unsigned int n_const_cs; // number of all constant values in all constraints with more than one constant value cl_mem b_ds_mem; // to send domains as bitmaps to the device with the minimum size needed void** b_ds; // to send domains as bitmaps to the device with the minimum size needed size_t b_ds_size; // size of bitmaps_aux and bitmaps_aux_mem bool b_ds_const; // true if b_ds_mem fits in constant memory cl_var_bitmap* cl_vs_bitmaps; // structure with information about the CSP variables, when working with bitmap domains size_t domains_size; // size of domains_mem buffer void** bitmaps; // Buffer for variables of bitmap type on host cl_mem ints_mem; // Buffer for variables of int type on device cl_int* ints; // Buffer for variables of int type on host size_t ints_size; // size of ints buffer bool ints_const; // true if this buffer fits in constant memory cl_mem domains_mem; // Buffer for variables of bitmap or interval type on device interval* intervals; // Buffer for variables of interval type on host cl_mem cl_cs_mem; // Buffer for variables of cl_constr type on device size_t cl_cs_size; // size of cl_cs_mem buffer bool cl_cs_const; // true if this buffer fits in constant memory cl_mem atoms_mem; // Buffer for variables changed with atomic operations on device cl_uint* atoms; // Buffer for variables changed with atomic operations on host size_t atoms_size; // size of atoms buffer cl_mem cl_vs_mem; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device size_t cl_vs_size; // size of cl_vs_mem buffer bool cl_vs_const; // true if this buffer fits in constant memory cl_var_interval* cl_vs_intervals; // Buffer for variables of cl_var_interval1 type on host cl_constr* cl_cs; // Buffer for variables of cl_constr_expl type on host cl_mem vs_id_to_prop_mem; // Buffer for vs_id_to_prop on device cl_ushort* vs_id_to_prop; // Buffer for vs_id_to_prop on host size_t vs_id_to_prop_size; // size of vs_id_to_prop/vs_id_to_prop_mem buffer cl_mem cl_vs_prop_mem; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device size_t cl_vs_prop_size; // size of cl_vs_prop_mem buffer cl_var_p_interval* cl_vs_intervals_prop; // Buffer for variables of cl_var_p_interval1 type on host cl_mem stats_mem; // buffer for statistics data on device cl_ulong* stats; // buffer for statistics data on host size_t stats_size; // size of stats buffer cl_mem props_mem; // Buffer for counting propagations done on device cl_ulong* props; // Buffer for counting propagations done on host size_t props_size; // size of props buffer cl_mem shared_stores_mem; // Buffer for work-sharing on device size_t shared_stores_size; // size of props buffer cl_mem shared_stores_flag_mem; // Buffer for signaling the state of each work-sharing store on device cl_uint n_shared_stores; // number of stores for work-sharing cl_int* shared_stores_flag; // Buffer for signaling the state of each work-sharing store on host size_t shared_stores_flag_size; // size of props buffer cl_mem filt_domains_mem; // Buffer for variables of bitmap or interval type on device for the filtering result size_t filt_domains_size; // size of domains_mem buffer for the filtering result interval* filt_intervals; // Buffer for variables of interval type on host for the filtering result void** filt_bitmaps; // Buffer for variables of bitmap type on host for the filtering result cl_mem filt_cs_mem; // Buffer for constraint ignore flag on device while filtering size_t filt_cs_size; // size of filt_cs_mem buffer for the filtering cl_char* filt_cs; // Buffer for constraint ignore flag on host after filtering cl_mem backtrack_mem1; // Buffer for backtracking history on device #if USE_MORE_BUFFERS cl_mem backtrack_mem2; // Buffer for backtracking history on device cl_mem backtrack_mem3; // Buffer for backtracking history on device cl_mem backtrack_mem4; // Buffer for backtracking history on device #endif size_t backtrack_size; // Size of the buffer for backtracking history on device cl_mem generic_mem; // Buffer for backtracking history on device size_t generic_size; // Size of the buffer for backtracking history on device cl_mem cs_ignore_mem; // Buffer for storing the flags that indicate if a constraint can prune more on device size_t cs_ignore_size; // Size of the cs_ignore_mem buffer on device #if RUN_IN_CUDA CUdevice device_cu; CUcontext context_cu; CUmodule module_cu; CUfunction function_cu; void** kernel_args_cu; // buffer for cuda kernel arguments size_t shared_memory_size_cu; // shared memory needed for kernel CUdeviceptr b_ds_mem_cu; // to send domains as bitmaps to the device with the minimum size needed CUdeviceptr ints_mem_cu; // Buffer for variables of int type on device CUdeviceptr domains_mem_cu; // Buffer for variables of bitmap or interval type on device CUdeviceptr cl_cs_mem_cu; // Buffer for variables of cl_constr type on device CUdeviceptr atoms_mem_cu; // Buffer for variables changed with atomic operations on device CUdeviceptr cl_vs_mem_cu; // Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device CUdeviceptr vs_id_to_prop_mem_cu; // Buffer for vs_id_to_prop on device CUdeviceptr cl_vs_prop_mem_cu; // Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device CUdeviceptr stats_mem_cu; // buffer for statistics data on device CUdeviceptr props_mem_cu; // Buffer for counting propagations done on device CUdeviceptr shared_stores_mem_cu; // Buffer for work-sharing on device CUdeviceptr shared_stores_flag_mem_cu; // Buffer for signaling the state of each work-sharing store on device CUdeviceptr filt_domains_mem_cu; // Buffer for variables of bitmap or interval type on device for the filtering result CUdeviceptr filt_cs_mem_cu; // Buffer for constraint ignore flag on device while filtering CUdeviceptr backtrack_mem1_cu; // Buffer for backtracking history on device #if USE_MORE_BUFFERS CUdeviceptr backtrack_mem2_cu; // Buffer for backtracking history on device CUdeviceptr backtrack_mem3_cu; // Buffer for backtracking history on device CUdeviceptr backtrack_mem4_cu; // Buffer for backtracking history on device #endif CUdeviceptr generic_mem_cu; // Buffer for backtracking history on device CUdeviceptr cs_ignore_mem_cu; // Buffer for storing the flags that indicate if a constraint can prune more on device #endif } device_args; extern device_info DEVICES_INFO[MAX_DEVS]; // Information of the devices to use extern device_args DEVICES_ARGS[MAX_DEVS]; // Device arguments (buffers, etc.) void calculate_rel_expect_speed(device_info* dev_info); void set_buffs_size(device_args* dev_args, device_info* dev_info, bool filtering); #endif /* SRC_DEVICES_H_ */