The N-Queens problem involves placing N chess queens on an N×N chessboard without any queen threatening another. It is typically solved using backtracking algorithms. Multi-threading is employed to enhance efficiency, especially for larger N, by parallelizing the exploration of the solution space. Multiple threads simultaneously explore different branches, leveraging the independence of potential solutions. This parallelization accelerates the algorithm, making it more scalable and capable of finding solutions faster by utilizing multiple processor cores or threads effectively.

When I was looking for simple solution to efficient and scalable N-Queens algorithm, I couldn’t really find any code, that scales well and or is efficient enough, until I stumbled on level based solution for solving N-Queens problem.

This solution divides N-Queens problem into levels. A level is a usually a vector with column number as index with solution row, where queen is placed, as a value of this index.

I present you the code, that solves N-Queens problem with speedup of up to 30 times.

#include <time.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <omp.h>

#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>

int valid(unsigned int col, std::vector<unsigned int> row) {
  for (unsigned int i = 0; i < col; i++) {
    if (std::abs(static_cast<int>(col - i)) == std::abs(static_cast<int>(row[col] - row[i])) || row[col] == row[i])
      return 0;
  }
  return 1;
}

void nqueens_by_level(std::vector<unsigned int> pos, unsigned int start_level,
                      unsigned int max_level,
                      void (* const success_func)(std::vector<unsigned int>&)) {


  int curr_col = start_level;
  int start_level_int = start_level;
  int max_level_int = max_level;
  while (curr_col > start_level_int || curr_col == start_level_int){

    while(!valid(curr_col, pos) && pos[curr_col] < pos.size()){
      // If current position not valid, try next row.
      pos[curr_col] ++;
    }
    if(pos[curr_col] < pos.size()){
      if (curr_col == max_level_int-1){
	      // If valid solution found call success_func.
        success_func(pos);
        pos[curr_col] ++;
      }
      else pos[++curr_col] = 0;
    }
    else {
      // Backtracking
      curr_col--;
      if (curr_col == -1){}
      else pos[curr_col] ++;
    }
  }

}

This is N-Queens problem solution algorithm, that solves problem using level based algorithm. This algorithm is able to be scalable, efficient and highly parallelized. Below is the full code of multi-threaded solution implemented with OpenMP. This program can be called by ./openmp [queens_number] [thread_number]

#include <time.h> // for clock_gettime()
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <omp.h>

#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <algorithm>

struct SolutionStore {
    // store solutions in a static member variable
    static std::vector<std::vector<unsigned int>>& solutions() {
        static std::vector<std::vector<unsigned int>> sols;
        return sols;
    }

    static void add_solution(const std::vector<unsigned int>& sol) {
        // add solution to static member
        solutions().push_back(sol);
    }

    static std::size_t get_solution_count() {
        return solutions().size();
    }

    static void clear_solutions() {
        solutions().clear();
    }
};

int valid(unsigned int col, std::vector<unsigned int> row) {
  for (unsigned int i = 0; i < col; i++) {
    if (std::abs(static_cast<int>(col - i)) == std::abs(static_cast<int>(row[col] - row[i])) || row[col] == row[i])
      return 0;
  }
  return 1;
}

void nqueens_by_level(std::vector<unsigned int> pos, unsigned int start_level,
                      unsigned int max_level,
                      void (* const success_func)(std::vector<unsigned int>&)) {


  int curr_col = start_level;
  int start_level_int = start_level;
  int max_level_int = max_level;
  while (curr_col > start_level_int || curr_col == start_level_int){

    while( !valid(curr_col, pos) && pos[curr_col] < pos.size()){
      // If current position not valid, try next row.
      pos[curr_col] ++;
    }
    if( pos[curr_col] < pos.size()){
      if ( curr_col == max_level_int-1){
	      // If valid solution found call success_func.
        success_func(pos);
        pos[curr_col] ++;
      }
      else pos[++curr_col] = 0;
    }
    else {
      // Backtracking
      curr_col--;
      if (curr_col == -1){}
      else pos[curr_col] ++;
    }
  }

}

void add_solution_callback(std::vector<unsigned int>& solution) {
    #pragma omp critical
    SolutionStore::add_solution(solution);
}

std::vector<std::vector<unsigned int>> nqueens_openmp(std::vector<unsigned int> pos, unsigned int n, unsigned int k) {
    nqueens_by_level(pos, 0, k, &add_solution_callback);
    std::vector<std::vector<unsigned int>> allsolutions = SolutionStore::solutions();
    SolutionStore::clear_solutions();
    return allsolutions;
}

// timing function that works for both Linux and MAC OS
void my_gettime(struct timespec *ts) {
#ifdef __MACH__ // OS X does not have clock_gettime, use clock_get_time
  clock_serv_t cclock;
  mach_timespec_t mts;
  host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock);
  clock_get_time(cclock, &mts);
  mach_port_deallocate(mach_task_self(), cclock);
  ts->tv_sec = mts.tv_sec;
  ts->tv_nsec = mts.tv_nsec;
#else
  clock_gettime(CLOCK_MONOTONIC, ts);
#endif
}

int getStartingLevel(unsigned int n, unsigned int T)
{
  int startingLevel = 1;
  int tempN = n;

  if (n > 12)
  {
    return n/2;
  }

  while (tempN < (T*n)) {
    tempN *= tempN;
    startingLevel++;
  }

  return startingLevel;
}

int main(int argc, char *argv[]) {
        // parse mandatory parameters
        int n = atoi(argv[1]);
        int T = atoi(argv[2]);
        std::cerr << "N: " << n << std::endl;
        std::cerr << "Threads: " << T << std::endl;

        std::cerr << "Notice! Maximum threads supported by system: " << omp_get_max_threads() << std::endl;

        std::vector<unsigned int> pos(n, 0);
        std::vector<unsigned int> results;
        
        // Set the number of threads to be used in subsequent parallel regions
        omp_set_num_threads(T);
        // Starting level
        int k = getStartingLevel(n, T);
        std::cerr << "Master max_level: " << k << std::endl;

        struct timespec t_start, t_end;
        my_gettime(&t_start);

        // Get first levels using master thread
        std::vector<std::vector<unsigned int>> result = nqueens_openmp(pos, n, k);

        std::cerr << "Vector size after master get: " << result.size() << std::endl;

        // Divides work to threads
        #pragma omp parallel for  
        for (auto it = result.begin(); it != result.end(); ++it) {
            nqueens_by_level(*it, k, n, &add_solution_callback);
        }

        // end timer
        my_gettime(&t_end);

        // time in seconds
        double time_secs = (t_end.tv_sec - t_start.tv_sec)
                         + (double) (t_end.tv_nsec - t_start.tv_nsec) * 1e-9;

        int numOfSolutions = SolutionStore::get_solution_count();
        // print output
            std::cerr << "Number of solutions found: " << numOfSolutions << std::endl;
            fprintf(stderr, "Run-time of the program: %8.0lf milli-seconds\n", time_secs*1000.0);
    return 0;
}

By Owner

Leave a Reply

Your email address will not be published. Required fields are marked *