Closest Pair algorithm implementation in C++

Posted on

Problem

I had been working on the implementation of closest pair algorithm in a 2-D plane.

My approach has been that of divide and conquer O(nlogn) :

#include <algorithm>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <cmath>
using namespace std;

int getval(int size)
{
  if (size < 15)
    return size;
  else
    return 15;
}

struct point
{
  int x, y;
};

struct point_pair
{
  point a, b;
};

bool compare_x(point &a, point &b)
{
  if (a.x < b.x)
    return true;
  else
    return false;
}
bool compare_y(point &a, point &b)
{
  if (a.y < b.y)
    return true;
  else
    return false;
}

//calculates distance between a pair of points
double calc_distance(point_pair pair)
{
  int k1 = abs(pair.a.x - pair.b.x);
  int k2 = abs(pair.a.y - pair.b.y);
  size_t k3 = k1 * k1;
  size_t k4 = k2 * k2;
  size_t k5 = k3 + k4;
  return sqrt(k5);
}

point_pair minimal_distance_naive(vector<point> points)
{
  point_pair min;
  min.a = points[0];
  min.b = points[1];

  for (int i = 0; i < points.size(); i++)
  {
    point_pair temp;
    temp.a = points[i];

    for (int j = i + 1; j < points.size(); j++)
    {
      temp.b = points[j];

      if (calc_distance(temp) < calc_distance(min))
      {
        min = temp;
      }
    }
  }
  return min;
}

point_pair minimal_distance_rec(vector<point> points_x, vector<point> points_y)
{

  if (points_x.size() <= 3)
  {
    return minimal_distance_naive(points_x);
  }

  else
  {
    int mid = points_x.size() / 2;
    vector<point> left_x, left_y;
    vector<point> right_x, right_y;

    for (int i = 0; i < mid; i++)
    {
      left_x.push_back(points_x[i]);
    }
    for (int i = mid; i < points_x.size(); i++)
    {
      right_x.push_back(points_x[i]);
    }

    int middle = points_x[mid - 1].x;

    for (int i = 0; i < points_y.size(); i++)
    {
      if (points_y[i].x <= middle)
      {
        left_y.push_back(points_y[i]);
      }
      else
      {
        right_y.push_back(points_y[i]);
      }
    }

    point_pair left = minimal_distance_rec(left_x, left_y);
    point_pair right = minimal_distance_rec(right_x, right_y);

    double d_left = calc_distance(left);
    double d_right = calc_distance(right);
    point_pair min;
    double min_distance;
    if (d_left <= d_right)
    {
      min_distance = d_left;
      min = left;
    }
    else
    {
      min_distance = d_right;
      min = right;
    }
    vector<point> middle_set;
    for (int i = 0; i < points_y.size(); i++)
    {
      if (abs(points_y[i].x - middle) <= min_distance)
      {
        middle_set.push_back(points_y[i]);
      }
    }

    if (middle_set.size() < 2)
    {
      return min;
    }

    point_pair init;
    init.a = middle_set[0];
    init.b = middle_set[1];
    double k = 0, m = calc_distance(init);
    point_pair tmp, min_tmp = init;

    for (int i = 0; i < middle_set.size(); i++)
    {
      tmp.a = middle_set[i];
      for (int j = i + 1; j <= getval(middle_set.size() - 1); j++)
      {
        tmp.b = middle_set[j];
        k = calc_distance(tmp);
        if (k < m)
        {
          m = k;
          min_tmp = tmp;
        }
      }
    }
    if (min_distance < m)
      return min;
    else
      return min_tmp;
  }
}

double minimal_distance(vector<int> x, vector<int> y)
{
  vector<point> points(x.size());
  for (int i = 0; i < x.size(); i++)
  {
    points[i].x = x[i];
    points[i].y = y[i];
  }

  sort(points.begin(), points.end(), compare_x);
  vector<point> points_x = points;

  sort(points.begin(), points.end(), compare_y);
  vector<point> points_y = points;

  point_pair p = minimal_distance_rec(points_x, points_y);
  return calc_distance(p);
}

int main()
{
  size_t n;
  std::cin >> n;
  vector<int> x(n);
  vector<int> y(n);
  for (size_t i = 0; i < n; i++)
  {
    cin >> x[i] >> y[i];
  }
  cout << fixed;
  cout << setprecision(9) << minimal_distance(x, y) << "n";
}

Input: n number of points, followed by n lines of coordinates, x followed by y

Output: k minimum distance

The code works fine for inputs in small range, like: -100 <= x, y <= 100.
How can I make it work for large inputs, say in the range: -1000000000 <= x,y <= 1000000000 ?

Solution

Your code can be improved in regard to its efficiency and its readability. But first of all, don’t write that using namespace std.

Efficiency

You create a lot of useless vectors in your code. To avoid this, the first and most easy step is to use reference as arguments to your functions, instead of values

// arguments by value make a copy of what is passed to the function
point_pair minimal_distance_rec(vector<point> points_x, vector<point> points_y)

// whereas arguments by reference don't
point_pair minimal_distance_rec(vector<point>& points_x, vector<point>& points_y)

Note that you should declare those references const if you don’t modify what they refer to inside your function.

The second step is more difficult. Your algorithm doesn’t require you to create two or more vectors from the vector it’s given. It is enough to know the position that divides your vector into two equal sides to handle the left and the right sides independently. So use iterators, which are the canonical C++ way to denote positions in a vector or any sort of range. It will also ease the use of <algorithm>, and make your program more readable in the end.

NB: there’s also a small efficiency loss in your calc_distance function: you can do with the square distance most of the time, which avoid time-consuming floating-point calculations.

Readability

Your code is cluttered with unnecessary types, functions and instructions.

Let’s take for instance compare_x:

bool compare_x(point &a, point &b)
{
  if (a.x < b.x)
    return true;
  else
    return false;
}

First, notice that it is equivalent to

bool compare_x(point &a, point &b)
{
  return a.x < b.x
}

And second, that compare_x(a, b) isn’t shorter than a.x < b.x.

By using the standard library, you can also avoid creating function or write them in a more concise manner:

int getval(int size)
{
  if (size < 15)
    return size;
  else
    return 15;
}

is equivalent to std::min(size, 15), for instance.

In the same way, your point_pair doesn’t bring anything a std::pair<point, point> doesn’t bring (it actually brings way less). I’d agree that point should be kept, because .x and .y are more conventional and readable than .first and .second with regards to a point.

But you’d have to make other more significant efforts your code more readable. Using iterators, as I said, is one of them. Even in simple loops as inside your minimal_distance_naive, it already is useful as it avoids signed/unsigned comparisons (between an int i and a points.size() for instance), and comes with named functions such as std::distance, std::next, std::advance.

But it will shine when you can build on them to use standard algorithms. It brings us to the main shortcoming of your code. Your minimal_distance_rec is unreadable because it’s very long, and its recursive structure isn’t apparent.

The divide-and-conquer algorithm for finding the closest pair is yet simple:

  1. find the closest pair on the left side

  2. find the closest pair on the right side

  3. find the closest pair with one point on the left, one point on the right

Step 3 can be optimized by considering, for each point on the left side, only the points of the right side that are in the rectangle of size d, d*2 (where d is the minimal distance already found) centered vertically around the left point and beginning horizontally on its right side.

Computing that rectangle’s coordinates and finding whether a point is inside it deserve their own functions. It might be trivial by itself, but it doesn’t mean it’s easy to understand the code doing it at first glance. And it will shorten your main function a lot.

Then filtering the points on the right side, and choosing the closest to the left point to update the minimal known distance are well-known algorithms, so well-known they’re standardized and shouldn’t be rewritten: std::partition and std::min_element. You have to customize their behavior through lambda functions though.

Here’s a complete rewrite of your code according to my review:

#include <algorithm>
#include <iostream>
#include <vector>
#include <cmath>

struct point {
    int x, y;
};

using pair = std::pair<point, point>;

unsigned square_distance(const point& lhs, const point& rhs) {
    unsigned adj = std::abs(lhs.x - rhs.x);
    unsigned opp = std::abs(lhs.y - rhs.y);
    return adj*adj + opp*opp;
}

using Iterator = std::vector<point>::iterator;
unsigned minimal_distance_naive(Iterator first, Iterator last) {
    pair closest{*first, *std::next(first)};  
    for (auto out = first; out != last; ++out) {
        pair temp;
        temp.first = *out;
        for (auto in = std::next(out); in != last; ++in) {
            temp.second = *in;
            closest = std::min(closest, temp, [](const auto& lhs, const auto& rhs) {
                return square_distance(lhs.first, lhs.second) < square_distance(rhs.first, rhs.second);
            });
        }
    }
    return square_distance(closest.first, closest.second);
}

bool is_inside_rectangle(const point& input, const point& up_left, const point& bottom_right) {
    return      up_left.x <= input.x && input.x <= bottom_right.x
            &&  up_left.y >= input.y && input.y >= bottom_right.y;
}

pair candidates_rectangle(const point& p, unsigned square_dist) {
    double dist = std::sqrt(square_dist);
    pair res{ { p.x,                           static_cast<int>(p.y + dist) },
              { static_cast<int>(p.x + dist),  static_cast<int>(p.y - dist) }};
    return res;
}


unsigned minimal_distance_rec(Iterator first, Iterator last, const std::size_t threshold = 3) {
    // assume points are sorted by x
    if (std::distance(first, last) <= threshold) return minimal_distance_naive(first, last);
    auto pivot = std::next(first, std::distance(first, last) / 2);
    auto min_left = minimal_distance_rec(first, pivot);
    auto min_right = minimal_distance_rec(pivot, last);

    auto temp_min = std::min(min_left, min_right);
    // define the band inside which disctance can be less than temp_min
    auto not_too_left = std::partition(first, pivot, [&](const auto& p) {
        return p.x < (pivot->x - static_cast<int>(std::sqrt(temp_min)));
    });
    auto not_too_right = std::partition(pivot, last, [&](const auto& p) {
        return p.x <= (pivot-> x + static_cast<int>(std::sqrt(temp_min)));
    });

    // and look for the closest pair inside
    std::for_each(not_too_left, pivot, [&](const auto& lp) {
        auto [up_left, bottom_right] = candidates_rectangle(lp, std::sqrt(temp_min));
        auto outside = std::partition(pivot, not_too_right, [=](const auto& rp) {
            return !is_inside_rectangle(rp, up_left, bottom_right);
        });
        auto middle_closest = std::min_element(pivot, outside, [=](const auto& lhs, const auto& rhs) {
            return square_distance(lp, lhs) < square_distance(lp, rhs);
        });
        temp_min = std::min(temp_min, square_distance(lp, *middle_closest));
    });
    return temp_min;
}

double minimal_distance(std::vector<point>& points) {
    std::sort(points.begin(), points.end(), [](const auto& lhs, const auto& rhs) {
        return lhs.x < rhs.x;
    });
    return std::sqrt(minimal_distance_rec(points.begin(), points.end()));
}

int main() {

    std::size_t n;
    std::cin >> n;

    std::vector<point> points;
    for (std::size_t i = 0u; i < n; i++) {
        int a, b;
        std::cin >> a >> b;
        points.push_back({a, b});
    }
    std::cout << minimal_distance(points);
}

Leave a Reply

Your email address will not be published. Required fields are marked *