The N-Queens problem involves placing N chess queens on an N×N chessboard without any queen threatening another. It is typically solved using backtracking algorithms. Multi-threading is employed to enhance efficiency, especially for larger N, by parallelizing the exploration of the solution space. Multiple threads simultaneously explore different branches, leveraging the independence of potential solutions. This parallelization accelerates the algorithm, making it more scalable and capable of finding solutions faster by utilizing multiple processor cores or threads effectively.
When I was looking for simple solution to efficient and scalable N-Queens algorithm, I couldn’t really find any code, that scales well and or is efficient enough, until I stumbled on level based solution for solving N-Queens problem.
This solution divides N-Queens problem into levels. A level is a usually a vector with column number as index with solution row, where queen is placed, as a value of this index.
I present you the code, that solves N-Queens problem with speedup of up to 30 times.
#include <time.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <omp.h>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>
int valid(unsigned int col, std::vector<unsigned int> row) {
for (unsigned int i = 0; i < col; i++) {
if (std::abs(static_cast<int>(col - i)) == std::abs(static_cast<int>(row[col] - row[i])) || row[col] == row[i])
return 0;
}
return 1;
}
void nqueens_by_level(std::vector<unsigned int> pos, unsigned int start_level,
unsigned int max_level,
void (* const success_func)(std::vector<unsigned int>&)) {
int curr_col = start_level;
int start_level_int = start_level;
int max_level_int = max_level;
while (curr_col > start_level_int || curr_col == start_level_int){
while(!valid(curr_col, pos) && pos[curr_col] < pos.size()){
// If current position not valid, try next row.
pos[curr_col] ++;
}
if(pos[curr_col] < pos.size()){
if (curr_col == max_level_int-1){
// If valid solution found call success_func.
success_func(pos);
pos[curr_col] ++;
}
else pos[++curr_col] = 0;
}
else {
// Backtracking
curr_col--;
if (curr_col == -1){}
else pos[curr_col] ++;
}
}
}
This is N-Queens problem solution algorithm, that solves problem using level based algorithm. This algorithm is able to be scalable, efficient and highly parallelized. Below is the full code of multi-threaded solution implemented with OpenMP. This program can be called by ./openmp [queens_number] [thread_number]
#include <time.h> // for clock_gettime()
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <omp.h>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>
struct SolutionStore {
// store solutions in a static member variable
static std::vector<std::vector<unsigned int>>& solutions() {
static std::vector<std::vector<unsigned int>> sols;
return sols;
}
static void add_solution(const std::vector<unsigned int>& sol) {
// add solution to static member
solutions().push_back(sol);
}
static std::size_t get_solution_count() {
return solutions().size();
}
static void clear_solutions() {
solutions().clear();
}
};
int valid(unsigned int col, std::vector<unsigned int> row) {
for (unsigned int i = 0; i < col; i++) {
if (std::abs(static_cast<int>(col - i)) == std::abs(static_cast<int>(row[col] - row[i])) || row[col] == row[i])
return 0;
}
return 1;
}
void nqueens_by_level(std::vector<unsigned int> pos, unsigned int start_level,
unsigned int max_level,
void (* const success_func)(std::vector<unsigned int>&)) {
int curr_col = start_level;
int start_level_int = start_level;
int max_level_int = max_level;
while (curr_col > start_level_int || curr_col == start_level_int){
while( !valid(curr_col, pos) && pos[curr_col] < pos.size()){
// If current position not valid, try next row.
pos[curr_col] ++;
}
if( pos[curr_col] < pos.size()){
if ( curr_col == max_level_int-1){
// If valid solution found call success_func.
success_func(pos);
pos[curr_col] ++;
}
else pos[++curr_col] = 0;
}
else {
// Backtracking
curr_col--;
if (curr_col == -1){}
else pos[curr_col] ++;
}
}
}
void add_solution_callback(std::vector<unsigned int>& solution) {
#pragma omp critical
SolutionStore::add_solution(solution);
}
std::vector<std::vector<unsigned int>> nqueens_openmp(std::vector<unsigned int> pos, unsigned int n, unsigned int k) {
nqueens_by_level(pos, 0, k, &add_solution_callback);
std::vector<std::vector<unsigned int>> allsolutions = SolutionStore::solutions();
SolutionStore::clear_solutions();
return allsolutions;
}
// timing function that works for both Linux and MAC OS
void my_gettime(struct timespec *ts) {
#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time
clock_serv_t cclock;
mach_timespec_t mts;
host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock);
clock_get_time(cclock, &mts);
mach_port_deallocate(mach_task_self(), cclock);
ts->tv_sec = mts.tv_sec;
ts->tv_nsec = mts.tv_nsec;
#else
clock_gettime(CLOCK_MONOTONIC, ts);
#endif
}
int getStartingLevel(unsigned int n, unsigned int T)
{
int startingLevel = 1;
int tempN = n;
if (n > 12)
{
return n/2;
}
while (tempN < (T*n)) {
tempN *= tempN;
startingLevel++;
}
return startingLevel;
}
int main(int argc, char *argv[]) {
// parse mandatory parameters
int n = atoi(argv[1]);
int T = atoi(argv[2]);
std::cerr << "N: " << n << std::endl;
std::cerr << "Threads: " << T << std::endl;
std::cerr << "Notice! Maximum threads supported by system: " << omp_get_max_threads() << std::endl;
std::vector<unsigned int> pos(n, 0);
std::vector<unsigned int> results;
// Set the number of threads to be used in subsequent parallel regions
omp_set_num_threads(T);
// Starting level
int k = getStartingLevel(n, T);
std::cerr << "Master max_level: " << k << std::endl;
struct timespec t_start, t_end;
my_gettime(&t_start);
// Get first levels using master thread
std::vector<std::vector<unsigned int>> result = nqueens_openmp(pos, n, k);
std::cerr << "Vector size after master get: " << result.size() << std::endl;
// Divides work to threads
#pragma omp parallel for
for (auto it = result.begin(); it != result.end(); ++it) {
nqueens_by_level(*it, k, n, &add_solution_callback);
}
// end timer
my_gettime(&t_end);
// time in seconds
double time_secs = (t_end.tv_sec - t_start.tv_sec)
+ (double) (t_end.tv_nsec - t_start.tv_nsec) * 1e-9;
int numOfSolutions = SolutionStore::get_solution_count();
// print output
std::cerr << "Number of solutions found: " << numOfSolutions << std::endl;
fprintf(stderr, "Run-time of the program: %8.0lf milli-seconds\n", time_secs*1000.0);
return 0;
}