devices.h 11.6 KB
/*
 * devices.h
 *
 *  Created on: 04/01/2017
 *      Author: Pedro
 */

#ifndef SRC_DEVICES_H_
#define SRC_DEVICES_H_

#include <stdbool.h>
#include <stddef.h>

#include "CL/cl.h"
#include "CL/cl_platform.h"

#include "config.h"
#include "kernels/cl_constraints.h"
#include "kernels/cl_variables.h"

#if RUN_IN_CUDA
#include <cuda.h>
#include <builtin_types.h>
#endif

#define MAX_DEVS 10	// maximum number of devices on a single machine

typedef struct device_info {
	cl_platform_id platform_id;	// platform ID
	cl_device_id device_id;		// device ID
	cl_context context;			// Context for running the kernel
	cl_program prog;		// Compiled program for full exploration on device
	int dev_type_n;			// number of the device of this type to use
	cl_device_type type;	// GPU, CPU or MIC
	size_t def_n_wg;	// default number of work-groups to use with this device
	size_t def_n_wi_wg;	// default number of work-items per work-group to use with this device
	size_t n_wg;		// number of work-groups to use with this device
	size_t n_wi_wg;		// number of work-item per work-group to use with this device
	cl_ulong global_mem_max_alloc;	// size of the global memory available to use in each OpenCL buffer
	cl_ulong global_mem_size;		// size of the global memory
	cl_ulong local_mem_max_alloc;	// size of the local memory available to use with OpenCL
	cl_ulong global_mem_used;		// size of the global memory of this device, in bytes
	cl_ulong constant_mem_max_alloc;	// size of the constant memory available to use on each constant buffer
	bool use_local_mem;		// true if local memory should be used
	char *dev_name;			// this device name
	cl_uint compute_units; // number of compute units on this device
	cl_uint max_freq;		// maximum cores frequency in MHz
	float rel_speed_expect;	// expected speed when comparing the hardware of all the used devices. From 0 to 1, where 1 is the fastest.
	unsigned int stores_explored; 	// Number of stores explored by this device
	unsigned int block_size;		// Number of stores send to each device at a time
	unsigned int first_block_size;	// Number of stores send to each device in the first block
	unsigned int first_store;	// Number of the first store to explore
	unsigned int last_store;	// Number of the last store to explore
	unsigned int times_used;	// number of times the kernel was executed by this device
	unsigned int n_ss_mult;		// number used to multiply the number of sub-search spaces sent to this device
	float last_1ss_solv_time;	// milliseconds needed to solve one sub-search space on the last run
	float avg_1ss_solv_time;	// average milliseconds needed to solve one sub-search space
	float max_1ss_solv_time;	// maximum milliseconds needed to solve one sub-search space
	double last_explor_time;	// time taken to explore the last block
	unsigned int n_fast_blocks;	// Number of sequential times that this device took less than 1s to solve a block;
	float rank;		// relative speed of this device to the other devices. From 0 to 1, where 1 is the fastest.
	bool ranked;	// true if this device was already ranked
	bool first_time_ranked;	// true if this is the first time this device will get a block with all the devices already ranked
	bool working;			// true if this device was not released yet
	cl_ulong ms_finish_time;	// time in milliseconds when the device finished all
	cl_ulong ms_solve_time;	// total time that kernel was executed
	cl_ulong props_total;	// total propagations made
	cl_ulong last_props;	// last block propagations made
	float last_time_prop;	// last block time per 1000 propagations
	float avg_time_prop;	// average time per 1000 propagations
	float max_time_prop;	// maximum time per 1000 propagations
	unsigned int n_ss_mult_max;	// maximum ss multiplier available for the current CSP with the current n_ss
	cl_ulong sols_found;		// total number of solutions found
	unsigned int n_buffers;		// number of buffers used for backtracking history
	int n_terms;				// maximum terms needed on a propagator for generic_mem
	unsigned int *exp_values;	// Number of values expanded to achieve the required number of sub-search spaces
	unsigned int n_empty_blocks;	// Number of times the device would get a block with 0 stores

} device_info;

typedef struct device_args {
	cl_kernel kernel;		// kernel to execute
	cl_command_queue cq;	// Queue for buffering devices operations
	size_t wi_local;	// number of work-items per work-group to create
	size_t wi_total;	// total number of work-items to create
	unsigned int n_vs_to_label;		// number of variables that must be labeled
	unsigned int split_values_ext;	// number of times the buffer for backtracking history has to be extended to allow split_values heuristic
	unsigned int n_vs_cs;	// number of all variables in all constraints
	unsigned int n_cs_vs;	// number of all constraints in all variables
	unsigned int n_const_cs;	// number of all constant values in all constraints with more than one constant value
	cl_mem b_ds_mem;	// to send domains as bitmaps to the device with the minimum size needed
	void **b_ds;		// to send domains as bitmaps to the device with the minimum size needed
	size_t b_ds_size;	// size of bitmaps_aux and bitmaps_aux_mem
	bool b_ds_const;	// true if b_ds_mem fits in constant memory
	cl_var_bitmap *cl_vs_bitmaps;	// structure with information about the CSP variables, when working with bitmap domains
	size_t domains_size;	// size of domains_mem buffer
	void **bitmaps;		// Buffer for variables of bitmap type on host
	cl_mem ints_mem;	// Buffer for variables of int type on device
	cl_int *ints;		// Buffer for variables of int type on host
	size_t ints_size;	// size of ints buffer
	bool ints_const;	// true if this buffer fits in constant memory
	cl_mem domains_mem;		// Buffer for variables of bitmap or interval type on device
	interval *intervals;	// Buffer for variables of interval type on host
	cl_mem cl_cs_mem;		// Buffer for variables of cl_constr type on device
	size_t cl_cs_size;	// size of cl_cs_mem buffer
	bool cl_cs_const;	// true if this buffer fits in constant memory
	cl_mem atoms_mem;	// Buffer for variables changed with atomic operations on device
	cl_uint *atoms;		// Buffer for variables changed with atomic operations on host
	size_t atoms_size;	// size of atoms buffer
	cl_mem cl_vs_mem;	// Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
	size_t cl_vs_size;	// size of cl_vs_mem buffer
	bool cl_vs_const;	// true if this buffer fits in constant memory
	cl_var_interval *cl_vs_intervals;	// Buffer for variables of cl_var_interval1 type on host
	cl_constr *cl_cs;	// Buffer for variables of cl_constr_expl type on host
	cl_mem vs_id_to_prop_mem;	// Buffer for vs_id_to_prop on device
	cl_ushort *vs_id_to_prop;	// Buffer for vs_id_to_prop on host
	size_t vs_id_to_prop_size;	// size of vs_id_to_prop/vs_id_to_prop_mem buffer
	cl_mem cl_vs_prop_mem;	// Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
	size_t cl_vs_prop_size;	// size of cl_vs_prop_mem buffer
	cl_var_p_interval *cl_vs_intervals_prop;	// Buffer for variables of cl_var_p_interval1 type on host
	cl_mem stats_mem;	// buffer for statistics data on device
	cl_ulong *stats;	// buffer for statistics data on host
	size_t stats_size;	// size of stats buffer
	cl_mem props_mem;	// Buffer for counting propagations done on device
	cl_ulong *props;	// Buffer for counting propagations done on host
	size_t props_size;	// size of props buffer
	cl_mem shared_stores_mem;	// Buffer for work-sharing on device
	size_t shared_stores_size;	// size of props buffer
	cl_mem shared_stores_flag_mem;	// Buffer for signaling the state of each work-sharing store on device
	cl_uint n_shared_stores;		// number of stores for work-sharing
	cl_int *shared_stores_flag;		// Buffer for signaling the state of each work-sharing store on host
	size_t shared_stores_flag_size;	// size of props buffer
	cl_mem filt_domains_mem;	// Buffer for variables of bitmap or interval type on device for the filtering result
	size_t filt_domains_size;	// size of domains_mem buffer for the filtering result
	interval *filt_intervals;	// Buffer for variables of interval type on host for the filtering result
	void **filt_bitmaps;		// Buffer for variables of bitmap type on host for the filtering result
	cl_mem filt_cs_mem;		// Buffer for constraint ignore flag on device while filtering
	size_t filt_cs_size;	// size of filt_cs_mem buffer for the filtering
	cl_char *filt_cs;		// Buffer for constraint ignore flag on host after filtering
	cl_mem backtrack_mem1;	// Buffer for backtracking history on device
#if USE_MORE_BUFFERS
	cl_mem backtrack_mem2;	// Buffer for backtracking history on device
	cl_mem backtrack_mem3;	// Buffer for backtracking history on device
	cl_mem backtrack_mem4;	// Buffer for backtracking history on device
#endif
	size_t backtrack_size;	// Size of the buffer for backtracking history on device
	cl_mem generic_mem;		// Buffer for backtracking indexes and generic integer usages on device
	cl_int *generic;		// Buffer for backtracking indexes and generic integer usages on host
	size_t generic_size;	// Size of the generic buffer
	cl_mem cs_ignore_mem;	// Buffer for storing the flags that indicate if a constraint can prune more on device
	size_t cs_ignore_size;	// Size of the cs_ignore_mem buffer on device

#if RUN_IN_CUDA
	CUdevice device_cu;			// CUDA device
	CUcontext context_cu;		// CUDA context
	CUmodule module_cu;			// CUDA module
	CUfunction function_cu;		// CUDA function
	void** kernel_args_cu;		// buffer for cuda kernel arguments
	size_t shared_memory_size_cu;	// shared memory needed for kernel
	CUdeviceptr b_ds_mem_cu;		// to send domains as bitmaps to the device with the minimum size needed
	CUdeviceptr ints_mem_cu;		// Buffer for variables of int type on device
	CUdeviceptr domains_mem_cu;		// Buffer for variables of bitmap or interval type on device
	CUdeviceptr cl_cs_mem_cu;		// Buffer for variables of cl_constr type on device
	CUdeviceptr atoms_mem_cu;		// Buffer for variables changed with atomic operations on device
	CUdeviceptr cl_vs_mem_cu;		// Buffer for variables of cl_var_bitmap or cl_var_interval1 or cl_var_interval2 type on device
	CUdeviceptr vs_id_to_prop_mem_cu;	// Buffer for vs_id_to_prop on device
	CUdeviceptr cl_vs_prop_mem_cu;	// Buffer for variables of cl_var_p_bitmap or cl_var_p_interval1 or cl_var_p_interval2 type on device
	CUdeviceptr stats_mem_cu;		// buffer for statistics data on device
	CUdeviceptr props_mem_cu;		// Buffer for counting propagations done on device
	CUdeviceptr shared_stores_mem_cu;		// Buffer for work-sharing on device
	CUdeviceptr shared_stores_flag_mem_cu;	// Buffer for signaling the state of each work-sharing store on device
	CUdeviceptr filt_domains_mem_cu;		// Buffer for variables of bitmap or interval type on device for the filtering result
	CUdeviceptr filt_cs_mem_cu;		// Buffer for constraint ignore flag on device while filtering
	CUdeviceptr backtrack_mem1_cu;	// Buffer for backtracking history on device
#if USE_MORE_BUFFERS
	CUdeviceptr backtrack_mem2_cu;	// Buffer for backtracking history on device
	CUdeviceptr backtrack_mem3_cu;	// Buffer for backtracking history on device
	CUdeviceptr backtrack_mem4_cu;	// Buffer for backtracking history on device
#endif
	CUdeviceptr generic_mem_cu;		// Buffer for backtracking history on device
	CUdeviceptr cs_ignore_mem_cu;	// Buffer for storing the flags that indicate if a constraint can prune more on device
#endif

} device_args;

extern device_info DEVICES_INFO[MAX_DEVS];	// Information of the devices to use
extern device_args DEVICES_ARGS[MAX_DEVS];	// Device arguments (buffers, etc.)

void calculate_rel_expect_speed(device_info *dev_info);
void set_buffs_size(device_args *dev_args, device_info *dev_info, bool filtering);

#endif /* SRC_DEVICES_H_ */