/* * cl_explore.cl * * Created on: 19/03/2015 * Author: Pedro */ #if CUDA_VERSION #include "../utils/cu_syntax.h" #endif #ifndef __OPENCL_VERSION__ #include #include #include #include #include "cl_explore.h" #include "cl_aux_functions.h" #include "cl_propagators.h" #include "../utils/cl_syntax.h" #else #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable //#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable // not supported by some devices //#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable // not supported by some devices #include "cl_aux_functions.c" #include "cl_propagators.c" #endif #include "../config.h" #include "../domains.h" #include "cl_constraints.h" #include "cl_variables.h" #include "cl_ttl.h" #if CL_D_TYPE == CL_BITMAP #include "cl_bitmaps.h" #elif CL_D_TYPE == CL_INTERVAL #include "cl_intervals.h" #endif /* * Explore the received sub-search spaces and return the number of solutions found or the solution found */ // atoms: (for finding all solutions) // buffer for atomics data (Most devices only have atomics for 32 bits variables) // 0 - first sub-search to explore // 1 - last sub-search to explore // 2 - n_ss // 3 - depth // 4 - WIs still working for work-sharing // 5 - 5+N_VS - n_repeat per variable // 5+N_VS...5+N_VS+N_WG*N_WI_WG - number of solutions found per work-item // // atoms: (for finding one solution) // buffer for atomics data (Most devices only have atomics for 32 bits variables) // 0 - first sub-search to explore // 1 - last sub-search to explore // 2 - n_ss // 3 - depth // 4 - WIs still working for work-sharing // 5 - 5+N_VS - n_repeat per variable // 5+N_VS - solution found flag // // atoms: (optimization) // buffer for atomics data (Most devices only have atomics for 32 bits variables) // 0 - first sub-search to explore // 1 - last sub-search to explore // 2 - n_ss // 3 - depth // 4 - WIs still working for work-sharing // 5 - 5+N_VS - n_repeat per variable // 5+N_VS - solution found flag // 6+N_VS - Value to optimize // 7+N_VS - WIs still working for saving the best solution // // ints: // 0...+cs_vs_idx - each constraint list of constrained variables ids placed per constraint order // cs_vs_idx...cs_vs_idx+vs_cs_idx - each variable list of constraints ids placed per variable order // cs_vs_idx+vs_cs_idx...cs_vs_idx+vs_cs_idx+n_const_cs - each constraint list of constants placed per constraint order // // backtrack: // 0...(n_vs_to_label+2)*CL_N_VS*CL_SPLIT_VALUES_EXT*wi_total - backtracking history // // generic: // (dev_args->n_vs_to_label + 2) * dev_args->split_values_ext) * 2 - to use in kernel (hist_labeleds_id and hist_labeleds_n_vals) // n_terms * dev_args->wi_total - to use in propagators // CL_D_MAX+1 - for ss generation // // cs_ignore // CL_N_CS*wi_total - Buffer for storing the flags that indicate if a constraint can prune more on device // // domains: (for finding one solution) // 0...N_VS - solution domains // // domains: (optimization) // 0...N_VS*(CL_D_MAX+1) - (CL_D_MAX+1) solution stores because concurrency control // // vs_id_to_prop: // 0 - first index of the vector that contains an variable ID to propagate // 1 - index of the vector where next variable ID to propagate should be added // 2...2+CL_N_VS+3 - variables ID to propagate // // stats: // 0 - (*nodes_fail) // 1 - (*nodes_expl) // 2 - (*backtracks) // 3 - (*labels) // 4 - (*pruning) // 5 - (*props_ok) // 6 - (*max_depth) // ... repeat per work-item // // flags for signaling the state of each work-sharing store // 0 - next shared SS to be picked // 1 - next shared SS to be filled // 2...number of SS already filled // 3..3+CL_N_SHARED_SS - V_ID that was labeled to generate this SS // // filt_domains: // 0...N_VS - size of domains_mem buffer for the filtering result // // filt_cs: // 0...N_CS - size of filt_cs_size buffer for the filtering __kernel void explore( __global unsigned int *atoms, // read/write to all work-items CL_INTS_MEM int *ints, // read to all work-items __global DOMAIN_ *backtrack1, // to store backtracking history #if CL_USE_N_BUFFERS > 1 __global DOMAIN_* backtrack2, // to store backtracking history #endif #if CL_USE_N_BUFFERS > 2 __global DOMAIN_* backtrack3, // to store backtracking history #endif #if CL_USE_N_BUFFERS > 3 __global DOMAIN_* backtrack4, // to store backtracking history #endif __global int *generic_mem, // multiple purposes buffer #if CL_CS_IGNORE __global char *cs_ignore_, // for cs_ignore flags #endif #if CL_WORK == CL_ONE || CL_WORK == CL_OPT __global DOMAIN_ *domains, // to store the solution #endif CL_VS_MEM VARS *vs, // variables structure common to all work-items CL_CS_MEM cl_constr *cs // constraints structure common to all work-items #if (CUDA_VERSION == 1 && CL_MEM != CL_USE_LOCAL_MEM) || CUDA_VERSION == 0 , CL_MEMORY VARS_PROP *vs_prop, // variable structures per work-item CL_MEMORY unsigned short *vs_id_to_prop // list of variables marked for propagation #endif #if CL_D_TYPE == CL_BITMAP , CL_B_DS_MEM cl_bitmap *b_ds // when using bitmaps, this array contains the original domains of all the CSP variables #endif #if CL_STATS == 1 , __global unsigned long *stats // to count statistics #endif #if CL_N_DEVS > 1 , __global unsigned long *props // 1 per work-item for counting the number of propagations made by each work-item #endif #if CL_N_SHARED_SS > 0 , __global DOMAIN_* shared_ss // stores for work-sharing , __global int* shared_ss_flags // flags for atomic operations between work-items #endif #if CL_FILTERING , __global DOMAIN_ *filt_domains // for the domains of the variables #if CL_CS_IGNORE , __global char *filt_cs // for the flag of ignoring constraints #endif #endif ) { CL_INTS_MEM int *vs_per_c_idx = ints; // each constraint list of constrained variables ids placed per constraint order CL_INTS_MEM int *cs_per_v_idx = vs_per_c_idx + CL_N_VS_CS; // each variable list of constraints ids placed per variable order #if CS_AT_LEAST == 1 || CS_AT_MOST == 1 || CS_AT_MOST_ONE == 1 || CS_EXACTLY == 1 || CS_INT_LIN_EQ == 1 || CS_INT_LIN_LE == 1 || CS_INT_LIN_NE == 1 || CS_INT_LIN_VAR == 1 || CS_ARRAY_INT_ELEMENT == 1 || CS_BOOL_LIN_EQ == 1 || CS_BOOL_LIN_LE == 1 CL_INTS_MEM int *c_consts = cs_per_v_idx + CL_N_CS_VS; // each constraint list of constants placed per constraint order #endif int wi_glob_id = get_global_id(0); // this global work item id int wi_total = get_global_size(0); // total number of work-items #if CL_N_DEVS > 1 unsigned long props_count = 0; // to calculate this device performance for work-distribution between devices #endif int hist_tree_level = 0; // current backtracking level on the tree // ID of the variable that was labeled from one level of the backtracking tree to the next one __global int *hist_labeleds_id = &generic_mem[(CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * 2 * wi_glob_id]; // Number of values on the variable that was labeled from one level of the backtracking tree to the next one __global int *hist_labeleds_n_vals = hist_labeleds_id + ((CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT); // auxiliary memory to use on propagators __global int *terms_mem = &generic_mem[(CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * 2 * wi_total + CL_N_TERMS * wi_glob_id]; // to use when generating each ss __global int *ss_aux_mem = &generic_mem[((CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * 2 * wi_total + CL_N_TERMS * wi_total) + (CL_D_MAX + 1) * wi_glob_id]; #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 // list of ordered labeling heuristics to use __global int *label_hs = &generic_mem[((CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * 2 * wi_total + CL_N_TERMS * wi_total) + (CL_D_MAX + 1) * wi_total]; int last_label_h_idx = 0; // index of labeling heuristic currently used #endif #if CL_CS_IGNORE __global char *cs_ignore = cs_ignore_ + CL_N_CS * wi_glob_id; // flags to signal if this work-item constrains can be ignored in the current ss clear_cs_ignore(cs_ignore); #endif unsigned int prop_v_id; // Next variable to propagate bool prop_ok; // flag for signaling if the last propagations was consistent bool labeled = 1; // flag for signaling if the last attempt for labeling was successful int i, j, k; #if CL_PRE_LABELING == 1 #define PROP_NORMALLY 0 // 0 - propagate normally #define PROP_PRE_LABELING 1 // 1 - propagate revising #define PRE_LABELED_OK 2 // 2 - revising propagated ok #define PRE_LABELED_NOK 3 // 3 - revising propagated not ok int revise = PROP_NORMALLY; #endif // if statistics should be collected #if CL_STATS == 1 __global unsigned long *nodes_fail = stats + wi_glob_id * 7; __global unsigned long *nodes_expl = stats + wi_glob_id * 7 + 1; __global unsigned long *backtracks = stats + wi_glob_id * 7 + 2; __global unsigned long *labels = stats + wi_glob_id * 7 + 3; __global unsigned long *pruning = stats + wi_glob_id * 7 + 4; __global unsigned long *props_ok = stats + wi_glob_id * 7 + 5; __global unsigned long *max_depth = stats + wi_glob_id * 7 + 6; #endif #if CL_MEM == CL_USE_LOCAL_MEM int wi_loc_id = get_local_id(0); // number of this work item in its work-group int wi_local_size = get_local_size(0); // number of work-items per work-group #if CUDA_VERSION == 1 extern __shared__ ushort local_mem[]; int vs_id_to_prop_size = CL_N_VS + 3; // due to shared memory alignment in CUDA if (CL_WORD == 32) { while ((vs_id_to_prop_size * 8) % 32 != 0) { vs_id_to_prop_size++; } } else { // 64 while ((vs_id_to_prop_size * 8) % 64 != 0) { vs_id_to_prop_size++; } } ushort* vs_id_to_prop = local_mem; ushort* vs_prop_begin = &local_mem[wi_local_size * vs_id_to_prop_size]; VARS_PROP* vs_prop = (VARS_PROP*)vs_prop_begin; ushort* vs_id_to_prop_ = &vs_id_to_prop[wi_loc_id * (CL_N_VS + 3)]; VARS_PROP* vs_prop_ = &vs_prop[wi_loc_id * CL_N_VS]; #else // vector with variables ID to propagate for this work-item CL_MEMORY unsigned short *vs_id_to_prop_ = vs_id_to_prop + wi_loc_id * (CL_N_VS + 3); // CSP variables for propagation in local memory for this work-item CL_MEMORY VARS_PROP *vs_prop_ = vs_prop + wi_loc_id * CL_N_VS; #endif #else // vector with variables ID to propagate for this work-item CL_MEMORY unsigned short* vs_id_to_prop_ = vs_id_to_prop + wi_glob_id * (CL_N_VS + 3); // CSP variables for propagation in local memory for this work-item CL_MEMORY VARS_PROP* vs_prop_ = vs_prop + wi_glob_id * CL_N_VS; #endif #if CL_WORK == CL_OPT __global unsigned int *wis_working_opt = atoms + 7 + CL_N_VS; // number of WIs still working for saving the best solution #endif #if CL_N_SHARED_SS > 0 __global unsigned int* wis_working = atoms + 4; // number of WIs still working for work-sharing #endif // backtracking history for this work-item __global DOMAIN_ *hist; #if CL_USE_N_BUFFERS == 1 hist = backtrack1 + wi_glob_id * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; #elif CL_USE_N_BUFFERS == 2 if (wi_glob_id < wi_total / 2) { hist = backtrack1 + wi_glob_id * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else { hist = backtrack2 + (wi_glob_id - (wi_total / 2)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } #elif CL_USE_N_BUFFERS == 3 if (wi_glob_id < wi_total / 3) { hist = backtrack1 + wi_glob_id * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else if (wi_glob_id < wi_total * 2 / 3) { hist = backtrack2 + (wi_glob_id - (wi_total / 3)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else { hist = backtrack3 + (wi_glob_id - (wi_total * 2 / 3)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } #elif CL_USE_N_BUFFERS == 4 if (wi_glob_id < wi_total / 4) { hist = backtrack1 + wi_glob_id * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else if (wi_glob_id < wi_total * 2 / 4) { hist = backtrack2 + (wi_glob_id - (wi_total / 4)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else if (wi_glob_id < wi_total * 3 / 4) { hist = backtrack3 + (wi_glob_id - (wi_total * 2 / 4)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } else { hist = backtrack4 + (wi_glob_id - (wi_total * 3 / 4)) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS; } #endif // domains (bitmap or interval only) of each variable per tree level #if CL_WORK == CL_CNT unsigned int sols_count = 0; __global unsigned int* sols_fnd = atoms + 5 + CL_N_VS + wi_glob_id; // solutions found counter #elif CL_WORK == CL_ONE __global DOMAIN_* solution = domains; // solution domains __global unsigned int* sol_fnd = atoms + 5 + CL_N_VS; // solution found flag #elif CL_WORK == CL_OPT __global DOMAIN_ *solutions = domains; // solutions domains __global unsigned int *sols_fnd = atoms + 5 + CL_N_VS; // solution found flag __global unsigned int *val_to_opt_g = atoms + 6 + CL_N_VS; // global value to optimize unsigned int val_to_opt_found; // local optimization value found on a solution unsigned int val_to_opt_new; // local optimization value found on a solution to store atomically unsigned int val_to_opt_tmp; // to compare when updating global value to optimize bool val_to_opt_upd; // to check if global value to optimize was updated unsigned int prev_tree_idx; // first index of the previous backtracking history bool changed; #endif __global DOMAIN_ *hist_prev; // previous level of the backtracking tree __global DOMAIN_ *hist_curr; // current level of the backtracking tree #if USE_TTL == 1 // when a time to live is used inside the kernel, this is the counter that will stop the kernel when it reaches a predefined value unsigned int ttl_ctr = 0; #endif #if CL_N_SHARED_SS > 0 bool get_shared; // look for a shared search-space bool finished_block = false; // if the block of search spaces is depleted #endif // print the CSP as received by the device #if CL_PRINT_CSP if (wi_glob_id == 0) { print_CSP_device(vs, cs, cs_per_v_idx, vs_per_c_idx #if CS_AT_LEAST == 1 || CS_AT_MOST == 1 || CS_AT_MOST_ONE == 1 || CS_EXACTLY == 1 || CS_INT_LIN_EQ == 1 || CS_INT_LIN_LE == 1 || CS_INT_LIN_NE == 1 || CS_INT_LIN_VAR == 1 || CS_ARRAY_INT_ELEMENT == 1 || CS_BOOL_LIN_EQ == 1 || CS_BOOL_LIN_LE == 1 , c_consts #endif #if CL_D_TYPE == CL_BITMAP , b_ds #endif ); } else { return; } #endif // get a new store from the block of search-spaces get_new_str(atoms, vs, hist, vs_prop_, vs_id_to_prop_, &hist_tree_level, hist_labeleds_id, hist_labeleds_n_vals, &prop_v_id #if CL_D_TYPE == CL_BITMAP , b_ds #endif #if (CS_MAXIMIZE == 1 || CS_MINIMIZE == 1) && CL_WORK == CL_OPT , val_to_opt_g #endif , ss_aux_mem TTL_CTR_E); #if CL_N_SHARED_SS > 0 // if this work-item doesn't find a ss to explore, check shared ss if (prop_v_id == CL_N_VS && wi_total > 1) { get_shared = false; if (!finished_block) { atomic_dec(wis_working); finished_block = true; } if (atomic_add(&shared_ss_flags[0], 0) < CL_N_SHARED_SS && atomic_add(&shared_ss_flags[2], 0) == CL_N_SHARED_SS) { get_shared = true; } else if (finished_block && atomic_add(wis_working, 0) == 0) { get_shared = true; } if (get_shared) { get_shared_store(shared_ss, shared_ss_flags, vs_id_to_prop_, vs_prop_, hist, &hist_tree_level, hist_labeleds_id, hist_labeleds_n_vals, &prop_v_id TTL_CTR_E); } } #endif if (prop_v_id != CL_N_VS) { labeled = true; #if CL_STATS == 1 if (hist_labeleds_n_vals[0] == 0) { (*labels)++; (*nodes_expl)++; } #endif // while there are variables to explore for (j = 0; j < CL_N_VS; j++) { CHECK_TTL_V(ttl_ctr, 12) #if CL_WORK == CL_ONE if (atomic_add(sol_fnd, 0) == 1) { #if CL_N_DEVS > 1 props[wi_glob_id] = props_count; #endif return; } #endif prop_ok = 1; // while there are variables to propagate and propagation succeed for (k = 0; k < CL_N_VS; k++) { CHECK_TTL_V(ttl_ctr, 13) // run the propagators propagate(vs, cs, cs_per_v_idx, vs_per_c_idx, #if CS_AT_LEAST == 1 || CS_AT_MOST == 1 || CS_AT_MOST_ONE == 1 || CS_EXACTLY == 1 || CS_INT_LIN_EQ == 1 || CS_INT_LIN_LE == 1 || CS_INT_LIN_NE == 1 || CS_INT_LIN_VAR == 1 || CS_ARRAY_INT_ELEMENT == 1 || CS_BOOL_LIN_EQ == 1 || CS_BOOL_LIN_LE == 1 c_consts, #endif #if (CS_MAXIMIZE == 1 || CS_MINIMIZE == 1) && CL_WORK == CL_OPT val_to_opt_g, #endif #if CL_STATS == 1 nodes_fail, nodes_expl, pruning, props_ok, #endif vs_prop_, prop_v_id, vs_id_to_prop_ TTL_CTR_E #if CL_N_DEVS > 1 , &props_count #endif CS_IGNORE_CALL, &prop_ok, terms_mem); k = 0; // reset the array of variables to propagate if (!prop_ok) { vs_id_to_prop_[0] = 2; vs_id_to_prop_[1] = 2; prop_v_id = CL_N_VS; k = CL_N_VS; } else { v_get_id_to_prop(vs_id_to_prop_, vs_prop_, &prop_v_id TTL_CTR_E); // reset the array of variables to propagate if (prop_v_id == CL_N_VS) { k = CL_N_VS; } } } j = 0; #if CL_WORK == CL_ONE if (atomic_add(sol_fnd, 0) == 1) { #if CL_N_DEVS > 1 props[wi_glob_id] = props_count; #endif return; } #endif // If propagation ok, label next variable if (prop_ok) { #if CL_FILTERING // no more variables to propagate without labeling, so the filtering is complete for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 15) cl_d_copy_gm(&filt_domains[i], &vs_prop_[i].prop_d TTL_CTR_E); } #if CL_CS_IGNORE // send to host the constraints that may be ignored for (i = 0; i < CL_N_CS; i++) { filt_cs[i] = cs_ignore[i]; } i = CL_N_VS; #endif // to avoid unreachable code warning if (i == CL_N_VS) { return; } #endif #if CL_PRE_LABELING == 1 if (revise == PROP_PRE_LABELING) { // update backtracking history hist_curr = &hist[GET_PREV_TREE_LEVEL_IDX(hist_tree_level)]; for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 14) cl_d_copy_gm(&hist_curr[i], &vs_prop_[i].prop_d TTL_CTR_E); } hist_labeleds_n_vals[hist_tree_level - 1] = V_N_VALS(vs_prop_[hist_labeleds_id[hist_tree_level - 1]]); #if CL_CHECK_ERRORS if (hist_tree_level - 1 < 0) { printf((__constant char *)"\n###error 51\n"); } #endif labeled = false; revise = PRE_LABELED_OK; } else { #endif // get next variable to label ID #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 v_get_id_to_label(vs_prop_, vs, &prop_v_id, &last_label_h_idx, label_hs TTL_CTR_E); #else v_get_id_to_label(vs_prop_, vs, &prop_v_id TTL_CTR_E); #endif // if a new variable to label was found do the labeling and remove the labeled value from the backtracking previous level if (prop_v_id != CL_N_VS) { #if CL_STATS == 1 (*labels)++; (*nodes_expl)++; #endif // save backtracking history state hist_curr = &hist[GET_CURR_TREE_LEVEL_IDX(hist_tree_level)]; hist_labeleds_id[hist_tree_level] = prop_v_id; for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 14) cl_d_copy_gm(&hist_curr[i], &vs_prop_[i].prop_d TTL_CTR_E); } // assign a value to the variable, according to the selected heuristic #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 v_assign(&vs_prop_[prop_v_id], &hist_curr[prop_v_id], &hist_labeleds_n_vals[hist_tree_level], &vs[prop_v_id]); #else v_assign(&vs_prop_[prop_v_id], &hist_curr[prop_v_id], &hist_labeleds_n_vals[hist_tree_level]); #endif #if CL_N_SHARED_SS > 0 // set a new shared search-space if (atomic_add(&shared_ss_flags[1], 0) < CL_N_SHARED_SS && atomic_add(wis_working, 0) > 1 && wi_total > 1) { set_shared_store(shared_ss, shared_ss_flags, vs_prop_, prop_v_id, &hist_curr[prop_v_id], &hist_labeleds_n_vals[hist_tree_level] TTL_CTR_E); } #endif // update current history level hist_tree_level++; #if CL_CHECK_ERRORS if (hist_tree_level > (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT) { printf((__constant char *)"\n###error 55\n"); } #endif #if CL_STATS == 1 if (hist_tree_level > convert_int (*max_depth)) { *max_depth = hist_tree_level; } #endif labeled = true; #if CL_WORK == CL_OPT val_to_opt_tmp = convert_int (atomic_add(val_to_opt_g, 0)); #if CL_OPT_M == CL_DECREASE // if optimal value is better then the last one if (val_to_opt_tmp < V_MAX(vs_prop_[CL_VAR_ID_TO_OPT])) #elif CL_OPT_M == CL_INCREASE if (val_to_opt_tmp > V_MIN(vs_prop_[CL_VAR_ID_TO_OPT])) #endif { #if CL_OPT_M == CL_DECREASE cl_v_del_gt_m(&changed, &vs_prop_[CL_VAR_ID_TO_OPT], val_to_opt_tmp); #elif CL_OPT_M == CL_INCREASE cl_v_del_lt_m(&changed, &vs_prop_[CL_VAR_ID_TO_OPT], val_to_opt_tmp); #endif if (changed) { if (V_N_VALS(vs_prop_[CL_VAR_ID_TO_OPT]) == 0) { labeled = false; prop_ok = 0; } else { v_add_to_prop(vs_id_to_prop_, vs_prop_, CL_VAR_ID_TO_OPT); } } } #endif // if there are no more variables to label, a solution was found } else { #if CL_WORK == CL_ONE if (atomic_cmpxchg(sol_fnd, 0, 1) == 0) { for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 15) cl_d_copy_gm(&solution[i], &vs_prop_[i].prop_d TTL_CTR_E); } } #if CL_STATS == 1 (*nodes_expl)++; #endif #if CL_N_DEVS > 1 props[wi_glob_id] = props_count; #endif #if PRINT_SOLS for (i = 0; i < CL_N_VS - 1; i++) { CHECK_TTL_V(ttl_ctr, 184) printf((__constant char *)"%u,", V_MIN(vs_prop_[i])); } printf((__constant char *)"%u\n", V_MIN(vs_prop_[i])); #endif #if CL_VERIFY_SOLS for (i = 0; i < CL_N_VS; i++) { int n_vals; cl_d_cnt_vals_m(&vs_prop_[i].prop_d, &n_vals); if (V_N_VALS(vs_prop_[i]) != 1 || V_MIN(vs_prop_[i]) != V_MAX(vs_prop_[i]) || n_vals != 1) { printf((__constant char *)"ERROR: variable %d have more than one value in a solution (N_VALS=%d, min=%u, max=%u, n_vals=%d)\n", i, V_N_VALS(vs_prop_[i]), V_MIN(vs_prop_[i]), V_MAX(vs_prop_[i]), n_vals); return; } } #endif return; #elif CL_WORK == CL_CNT sols_count++; labeled = false; #if PRINT_SOLS for (i = 0; i < CL_N_VS - 1; i++) { CHECK_TTL_V(ttl_ctr, 185) printf((__constant char *)"%u,", V_MIN(vs_prop_[i])); } printf((__constant char *)"%u\n", V_MIN(vs_prop_[i])); #endif #if CL_VERIFY_SOLS for (i = 0; i < CL_N_VS; i++) { int n_vals; cl_d_cnt_vals_m(&vs_prop_[i].prop_d, &n_vals); if (V_N_VALS(vs_prop_[i]) != 1 || V_MIN(vs_prop_[i]) != V_MAX(vs_prop_[i]) || n_vals != 1) { printf((__constant char *)"ERROR: variable %d have more than one value in a solution (N_VALS=%d, min=%u, max=%u, n_vals=%d)\n", i, V_N_VALS(vs_prop_[i]), V_MIN(vs_prop_[i]), V_MAX(vs_prop_[i]), n_vals); return; } } #endif #if CL_STATS == 1 (*nodes_expl)++; #endif #else // if optimizing #if PRINT_SOLS #if CL_OPT_M == CL_DECREASE printf((__constant char *)"%u->", V_MAX(vs_prop_[CL_VAR_ID_TO_OPT])); #elif CL_OPT_M == CL_INCREASE printf((__constant char *)"%u->", V_MIN(vs_prop_[CL_VAR_ID_TO_OPT])); #endif for (i = 0; i < CL_N_VS - 1; i++) { CHECK_TTL_V(ttl_ctr, 186) printf((__constant char *)"%u,", V_MIN(vs_prop_[i])); } printf((__constant char *)"%u\n", V_MIN(vs_prop_[i])); #endif #if CL_VERIFY_SOLS for (i = 0; i < CL_N_VS; i++) { int n_vals; cl_d_cnt_vals_m(&vs_prop_[i].prop_d, &n_vals); if (V_N_VALS(vs_prop_[i]) != 1 || V_MIN(vs_prop_[i]) != V_MAX(vs_prop_[i]) || n_vals != 1) { printf((__constant char *)"ERROR: variable %d have more than one value in a solution (N_VALS=%d, min=%u, max=%u, n_vals=%d)\n", i, V_N_VALS(vs_prop_[i]), V_MIN(vs_prop_[i]), V_MAX(vs_prop_[i]), n_vals); return; } } #endif val_to_opt_found = V_MIN(vs_prop_[CL_VAR_ID_TO_OPT]); #if CL_OPT_M == CL_DECREASE val_to_opt_new = val_to_opt_found - 1; #elif CL_OPT_M == CL_INCREASE val_to_opt_new = val_to_opt_found + 1; #endif val_to_opt_upd = 0; // while new optimal value not updated, keep trying while (!val_to_opt_upd) { CHECK_TTL_V(ttl_ctr, 16) val_to_opt_tmp = convert_int (atomic_add(val_to_opt_g, 0)); #if CL_OPT_M == CL_DECREASE // if optimal value is better then the last one if (val_to_opt_found <= val_to_opt_tmp) #elif CL_OPT_M == CL_INCREASE if (val_to_opt_found >= val_to_opt_tmp) #endif { // update new optimal value and writes the solution if (atomic_cmpxchg(val_to_opt_g, val_to_opt_tmp, val_to_opt_new) == val_to_opt_tmp) { for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 17) cl_d_copy_gm(&solutions[CL_N_VS * (val_to_opt_found) + i], &vs_prop_[i].prop_d TTL_CTR_E); } val_to_opt_upd = 1; } atomic_xchg(sols_fnd, 1); } else { val_to_opt_upd = 1; } } labeled = false; #endif } #if CL_PRE_LABELING == 1 } #endif } // If propagation not ok, or ok but was a solution, and all or the best solution must be found, do backtracking if (!prop_ok || !labeled) { #if CL_FILTERING // the filtering resulted in an inconsistent CSP cl_d_clear_g(filt_domains); // to avoid unreachable code warning bool empty_aux; cl_d_is_empty_g(&empty_aux, filt_domains); if (empty_aux) { return; } #endif #if CL_PRE_LABELING == 1 if (revise == PROP_PRE_LABELING) { prop_v_id = hist_labeleds_id[hist_tree_level - 1]; cl_d_clear_g(&hist[GET_PREV_TREE_LEVEL_IDX(hist_tree_level) + prop_v_id] TTL_CTR_E); #if CL_CHECK_ERRORS if (hist_tree_level - 1 < 0) { printf((__constant char *)"\n###error 56\n"); } #endif #if CL_CHECK_ERRORS #if CL_WORK == CL_CNT if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 57\n"); } #elif CL_WORK == CL_ONE if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > CL_N_VS + (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 58\n"); } #else if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > CL_N_VS * (CL_D_MAX + 1) + (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 59\n"); } #endif #endif hist_labeleds_n_vals[hist_tree_level - 1] = 0; revise = PRE_LABELED_NOK; } #endif #if CL_WORK == CL_OPT bool empty; // get a labeled variable from the backtracking levels with values yet to be assigned for (k = 0; k < CL_N_VS; k++) { CHECK_TTL_V(ttl_ctr, 18) if (hist_tree_level > 0) { if (hist_labeleds_n_vals[hist_tree_level - 1] == 0) { hist_tree_level--; } else { prev_tree_idx = GET_PREV_TREE_LEVEL_IDX(hist_tree_level); #if CL_CHECK_ERRORS if (prev_tree_idx + CL_VAR_ID_TO_OPT > CL_N_VS * (CL_D_MAX + 1) + (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 60\n"); } #endif // update the domain of the variable to optimize upd_opt_var_hist_g(&hist[prev_tree_idx + CL_VAR_ID_TO_OPT], val_to_opt_g TTL_CTR_E); cl_d_is_empty_g(&empty, &hist[prev_tree_idx + CL_VAR_ID_TO_OPT] TTL_CTR_E); if (empty) { hist_tree_level--; } else { k = CL_N_VS; } } } else { k = CL_N_VS; } } #else // get a labeled variable from the backtracking levels with values yet to be assigned for (k = 0; k < CL_N_VS; k++) { CHECK_TTL_V(ttl_ctr, 19) if (hist_tree_level > 0 && hist_labeleds_n_vals[hist_tree_level - 1] == 0) { hist_tree_level--; } else { k = CL_N_VS; } } #endif // If not all the sub-tree was explored if (hist_tree_level > 0) { hist_prev = &hist[GET_PREV_TREE_LEVEL_IDX(hist_tree_level)]; #if CL_CHECK_ERRORS #if CL_WORK == CL_CNT if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 61\n"); } #elif CL_WORK == CL_ONE if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > CL_N_VS + (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 62\n"); } #else if (GET_PREV_TREE_LEVEL_IDX(hist_tree_level) > CL_N_VS * (CL_D_MAX + 1) + (wi_glob_id + 1) * (CL_N_VS_TO_LABEL + 2) * CL_SPLIT_VALUES_EXT * CL_N_VS) { printf((__constant char *)"\n###error 63\n"); } #endif #endif // update current variables data for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 20) cl_d_copy_mg(&vs_prop_[i].prop_d, &hist_prev[i] TTL_CTR_E); vs_prop_[i].to_prop = 0; cl_v_calc_min_val_m(&vs_prop_[i] TTL_CTR_E); cl_v_calc_max_val_m(&vs_prop_[i] TTL_CTR_E); cl_v_cnt_vals_m(&vs_prop_[i] TTL_CTR_E); } prop_v_id = hist_labeleds_id[hist_tree_level - 1]; #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 for (i = last_label_h_idx; i >= 0; i--) { if (vs[prop_v_id].label_h == label_hs[i]) { last_label_h_idx = i; break; } } #endif #if CL_CHECK_ERRORS if (hist_tree_level - 1 < 0) { printf((__constant char *)"\n###error 64\n"); } #if CL_WORK == CL_OPT if (V_IS_EMPTY(vs_prop_[CL_VAR_ID_TO_OPT])) { printf((__constant char *)"\n###error 69\n"); } #endif #endif #if CL_WORK == CL_OPT val_to_opt_tmp = convert_int (atomic_add(val_to_opt_g, 0)); #if CL_OPT_M == CL_DECREASE // if optimal value is better then the last one if (val_to_opt_tmp < V_MAX(vs_prop_[CL_VAR_ID_TO_OPT])) #elif CL_OPT_M == CL_INCREASE if (val_to_opt_tmp > V_MIN(vs_prop_[CL_VAR_ID_TO_OPT])) #endif { v_add_to_prop(vs_id_to_prop_, vs_prop_, CL_VAR_ID_TO_OPT); } #endif #if CL_PRE_LABELING == 1 if (revise == PRE_LABELED_OK || revise == PRE_LABELED_NOK || hist_labeleds_n_vals[hist_tree_level - 1] == 1) { revise = PROP_NORMALLY; #endif // assign a value to the variable according to the selected heuristic #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 v_assign(&vs_prop_[prop_v_id], &hist_prev[prop_v_id], &hist_labeleds_n_vals[hist_tree_level - 1], &vs[prop_v_id]); #else v_assign(&vs_prop_[prop_v_id], &hist_prev[prop_v_id], &hist_labeleds_n_vals[hist_tree_level - 1]); #endif #if CL_STATS == 1 (*backtracks)++; #endif #if CL_PRE_LABELING == 1 } else { revise = PROP_PRE_LABELING; } #endif #if CL_STATS == 1 if (V_N_VALS(vs_prop_[prop_v_id]) > 0) { (*labels)++; (*nodes_expl)++; } #endif labeled = true; #if CL_CS_IGNORE clear_cs_ignore(cs_ignore); #endif #if CL_CHECK_ERRORS int l; for (l = 0; l < CL_N_VS; l++) { bool empty; #if CL_N_WORDS == 1 empty = (vs_prop_[l].prop_d == 0); #else int m; empty = 1; for (m = 0; m < CL_N_WORDS; m++) { CHECK_TTL(ttl_ctr, 115) if (vs_prop_[l].prop_d[m] != 0) { empty = 0; m = CL_N_WORDS; } } #endif if (empty || vs_prop_[l].n_vals == 0 || vs_prop_[l].n_vals > vs_prop_[l].max + 1 || vs_prop_[l].min > vs_prop_[l].max || vs_prop_[l].max > CL_D_MAX) { printf((__constant char *)"\n###error 72\n"); } } #endif } else { labeled = false; } } // try to get a new search space to explore if (!labeled) { #if CL_CS_IGNORE clear_cs_ignore(cs_ignore); #endif #if CL_FZN_SEQ && CL_FZN_SEQ_N_LABELS > 1 last_label_h_idx = 0; #endif get_new_str(atoms, vs, hist, vs_prop_, vs_id_to_prop_, &hist_tree_level, hist_labeleds_id, hist_labeleds_n_vals, &prop_v_id #if CL_D_TYPE == CL_BITMAP , b_ds #endif #if (CS_MAXIMIZE == 1 || CS_MINIMIZE == 1) && CL_WORK == CL_OPT , val_to_opt_g #endif , ss_aux_mem TTL_CTR_E); #if CL_N_SHARED_SS > 0 // if this work-item doesn't find a ss to explore, check shared ss if (atomic_add(&shared_ss_flags[0], 0) < CL_N_SHARED_SS && atomic_add(&shared_ss_flags[2], 0) == CL_N_SHARED_SS) { get_shared = false; if (!finished_block) { atomic_dec(wis_working); finished_block = true; } if (atomic_add(&shared_ss_flags[0], 0) <= atomic_add(&shared_ss_flags[2], 0)) { get_shared = true; } else if (finished_block && atomic_add(wis_working, 0) == 0) { get_shared = true; } if (get_shared) { get_shared_store(shared_ss, shared_ss_flags, vs_id_to_prop_, vs_prop_, hist, &hist_tree_level, hist_labeleds_id, hist_labeleds_n_vals, &prop_v_id TTL_CTR_E); } } #endif } if (prop_v_id == CL_N_VS) { labeled = false; j = CL_N_VS; } else { labeled = true; #if CL_CHECK_ERRORS int l; for (l = 0; l < CL_N_VS; l++) { bool empty; #if CL_N_WORDS == 1 empty = (vs_prop_[l].prop_d == 0); #else int m; empty = 1; for (m = 0; m < CL_N_WORDS; m++) { CHECK_TTL(ttl_ctr, 115) if (vs_prop_[l].prop_d[m] != 0) { empty = 0; m = CL_N_WORDS; } } #endif if (empty || vs_prop_[l].n_vals > vs_prop_[l].max + 1 || vs_prop_[l].min > vs_prop_[l].max || vs_prop_[l].max > CL_D_MAX) { printf((__constant char *)"\n###error 73\n"); } } #endif } #if CL_STATS == 1 if (hist_labeleds_n_vals[0] == 0) { (*labels)++; (*nodes_expl)++; } #endif } } #if CL_WORK == CL_CNT (*sols_fnd) = sols_count; #endif #if CL_N_DEVS > 1 // to calculate this device performance for work-dsitribution between devices props[wi_glob_id] = props_count; #endif // if optimization and this is the last work-item running, update the optimal cost to the buffer #if CL_WORK == CL_OPT if (atomic_dec(wis_working_opt) == 1 && atomic_add(sols_fnd, 0) >= 1) { #if CL_OPT_M == CL_DECREASE val_to_opt_found = atomic_add(val_to_opt_g, 0) + 1; #elif CL_OPT_M == CL_INCREASE val_to_opt_found = atomic_add(val_to_opt_g, 0) - 1; #endif for (i = 0; i < CL_N_VS; i++) { CHECK_TTL_V(ttl_ctr, 250) cl_d_copy_g(&solutions[i], &solutions[CL_N_VS * (val_to_opt_found) + i] TTL_CTR_E); } } #endif }