# Closest Pair algorithm implementation in C++

Posted on

Problem

I had been working on the implementation of closest pair algorithm in a 2-D plane.

My approach has been that of divide and conquer O(nlogn) :

``````#include <algorithm>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <cmath>
using namespace std;

int getval(int size)
{
if (size < 15)
return size;
else
return 15;
}

struct point
{
int x, y;
};

struct point_pair
{
point a, b;
};

bool compare_x(point &a, point &b)
{
if (a.x < b.x)
return true;
else
return false;
}
bool compare_y(point &a, point &b)
{
if (a.y < b.y)
return true;
else
return false;
}

//calculates distance between a pair of points
double calc_distance(point_pair pair)
{
int k1 = abs(pair.a.x - pair.b.x);
int k2 = abs(pair.a.y - pair.b.y);
size_t k3 = k1 * k1;
size_t k4 = k2 * k2;
size_t k5 = k3 + k4;
return sqrt(k5);
}

point_pair minimal_distance_naive(vector<point> points)
{
point_pair min;
min.a = points[0];
min.b = points[1];

for (int i = 0; i < points.size(); i++)
{
point_pair temp;
temp.a = points[i];

for (int j = i + 1; j < points.size(); j++)
{
temp.b = points[j];

if (calc_distance(temp) < calc_distance(min))
{
min = temp;
}
}
}
return min;
}

point_pair minimal_distance_rec(vector<point> points_x, vector<point> points_y)
{

if (points_x.size() <= 3)
{
return minimal_distance_naive(points_x);
}

else
{
int mid = points_x.size() / 2;
vector<point> left_x, left_y;
vector<point> right_x, right_y;

for (int i = 0; i < mid; i++)
{
left_x.push_back(points_x[i]);
}
for (int i = mid; i < points_x.size(); i++)
{
right_x.push_back(points_x[i]);
}

int middle = points_x[mid - 1].x;

for (int i = 0; i < points_y.size(); i++)
{
if (points_y[i].x <= middle)
{
left_y.push_back(points_y[i]);
}
else
{
right_y.push_back(points_y[i]);
}
}

point_pair left = minimal_distance_rec(left_x, left_y);
point_pair right = minimal_distance_rec(right_x, right_y);

double d_left = calc_distance(left);
double d_right = calc_distance(right);
point_pair min;
double min_distance;
if (d_left <= d_right)
{
min_distance = d_left;
min = left;
}
else
{
min_distance = d_right;
min = right;
}
vector<point> middle_set;
for (int i = 0; i < points_y.size(); i++)
{
if (abs(points_y[i].x - middle) <= min_distance)
{
middle_set.push_back(points_y[i]);
}
}

if (middle_set.size() < 2)
{
return min;
}

point_pair init;
init.a = middle_set[0];
init.b = middle_set[1];
double k = 0, m = calc_distance(init);
point_pair tmp, min_tmp = init;

for (int i = 0; i < middle_set.size(); i++)
{
tmp.a = middle_set[i];
for (int j = i + 1; j <= getval(middle_set.size() - 1); j++)
{
tmp.b = middle_set[j];
k = calc_distance(tmp);
if (k < m)
{
m = k;
min_tmp = tmp;
}
}
}
if (min_distance < m)
return min;
else
return min_tmp;
}
}

double minimal_distance(vector<int> x, vector<int> y)
{
vector<point> points(x.size());
for (int i = 0; i < x.size(); i++)
{
points[i].x = x[i];
points[i].y = y[i];
}

sort(points.begin(), points.end(), compare_x);
vector<point> points_x = points;

sort(points.begin(), points.end(), compare_y);
vector<point> points_y = points;

point_pair p = minimal_distance_rec(points_x, points_y);
return calc_distance(p);
}

int main()
{
size_t n;
std::cin >> n;
vector<int> x(n);
vector<int> y(n);
for (size_t i = 0; i < n; i++)
{
cin >> x[i] >> y[i];
}
cout << fixed;
cout << setprecision(9) << minimal_distance(x, y) << "n";
}
``````

Input: n number of points, followed by n lines of coordinates, x followed by y

Output: k minimum distance

The code works fine for inputs in small range, like: `-100 <= x, y <= 100`.
How can I make it work for large inputs, say in the range: `-1000000000 <= x,y <= 1000000000` ?

Solution

Your code can be improved in regard to its efficiency and its readability. But first of all, don’t write that `using namespace std`.

## Efficiency

You create a lot of useless vectors in your code. To avoid this, the first and most easy step is to use reference as arguments to your functions, instead of values

``````// arguments by value make a copy of what is passed to the function
point_pair minimal_distance_rec(vector<point> points_x, vector<point> points_y)

// whereas arguments by reference don't
point_pair minimal_distance_rec(vector<point>& points_x, vector<point>& points_y)
``````

Note that you should declare those references `const` if you don’t modify what they refer to inside your function.

The second step is more difficult. Your algorithm doesn’t require you to create two or more vectors from the vector it’s given. It is enough to know the position that divides your vector into two equal sides to handle the left and the right sides independently. So use `iterators`, which are the canonical C++ way to denote positions in a vector or any sort of range. It will also ease the use of `<algorithm>`, and make your program more readable in the end.

NB: there’s also a small efficiency loss in your `calc_distance` function: you can do with the square distance most of the time, which avoid time-consuming floating-point calculations.

Your code is cluttered with unnecessary types, functions and instructions.

Let’s take for instance `compare_x`:

``````bool compare_x(point &a, point &b)
{
if (a.x < b.x)
return true;
else
return false;
}
``````

First, notice that it is equivalent to

``````bool compare_x(point &a, point &b)
{
return a.x < b.x
}
``````

And second, that `compare_x(a, b)` isn’t shorter than `a.x < b.x`.

By using the standard library, you can also avoid creating function or write them in a more concise manner:

``````int getval(int size)
{
if (size < 15)
return size;
else
return 15;
}
``````

is equivalent to `std::min(size, 15)`, for instance.

In the same way, your `point_pair` doesn’t bring anything a `std::pair<point, point>` doesn’t bring (it actually brings way less). I’d agree that point should be kept, because `.x` and `.y` are more conventional and readable than `.first` and `.second` with regards to a point.

But you’d have to make other more significant efforts your code more readable. Using iterators, as I said, is one of them. Even in simple loops as inside your `minimal_distance_naive`, it already is useful as it avoids signed/unsigned comparisons (between an `int i` and a `points.size()` for instance), and comes with named functions such as `std::distance`, `std::next`, `std::advance`.

But it will shine when you can build on them to use standard algorithms. It brings us to the main shortcoming of your code. Your `minimal_distance_rec` is unreadable because it’s very long, and its recursive structure isn’t apparent.

The divide-and-conquer algorithm for finding the closest pair is yet simple:

1. find the closest pair on the left side

2. find the closest pair on the right side

3. find the closest pair with one point on the left, one point on the right

Step 3 can be optimized by considering, for each point on the left side, only the points of the right side that are in the rectangle of size `d, d*2` (where `d` is the minimal distance already found) centered vertically around the left point and beginning horizontally on its right side.

Computing that rectangle’s coordinates and finding whether a point is inside it deserve their own functions. It might be trivial by itself, but it doesn’t mean it’s easy to understand the code doing it at first glance. And it will shorten your main function a lot.

Then filtering the points on the right side, and choosing the closest to the left point to update the minimal known distance are well-known algorithms, so well-known they’re standardized and shouldn’t be rewritten: `std::partition` and `std::min_element`. You have to customize their behavior through lambda functions though.

Here’s a complete rewrite of your code according to my review:

``````#include <algorithm>
#include <iostream>
#include <vector>
#include <cmath>

struct point {
int x, y;
};

using pair = std::pair<point, point>;

unsigned square_distance(const point& lhs, const point& rhs) {
unsigned adj = std::abs(lhs.x - rhs.x);
unsigned opp = std::abs(lhs.y - rhs.y);
}

using Iterator = std::vector<point>::iterator;
unsigned minimal_distance_naive(Iterator first, Iterator last) {
pair closest{*first, *std::next(first)};
for (auto out = first; out != last; ++out) {
pair temp;
temp.first = *out;
for (auto in = std::next(out); in != last; ++in) {
temp.second = *in;
closest = std::min(closest, temp, [](const auto& lhs, const auto& rhs) {
return square_distance(lhs.first, lhs.second) < square_distance(rhs.first, rhs.second);
});
}
}
return square_distance(closest.first, closest.second);
}

bool is_inside_rectangle(const point& input, const point& up_left, const point& bottom_right) {
return      up_left.x <= input.x && input.x <= bottom_right.x
&&  up_left.y >= input.y && input.y >= bottom_right.y;
}

pair candidates_rectangle(const point& p, unsigned square_dist) {
double dist = std::sqrt(square_dist);
pair res{ { p.x,                           static_cast<int>(p.y + dist) },
{ static_cast<int>(p.x + dist),  static_cast<int>(p.y - dist) }};
return res;
}

unsigned minimal_distance_rec(Iterator first, Iterator last, const std::size_t threshold = 3) {
// assume points are sorted by x
if (std::distance(first, last) <= threshold) return minimal_distance_naive(first, last);
auto pivot = std::next(first, std::distance(first, last) / 2);
auto min_left = minimal_distance_rec(first, pivot);
auto min_right = minimal_distance_rec(pivot, last);

auto temp_min = std::min(min_left, min_right);
// define the band inside which disctance can be less than temp_min
auto not_too_left = std::partition(first, pivot, [&](const auto& p) {
return p.x < (pivot->x - static_cast<int>(std::sqrt(temp_min)));
});
auto not_too_right = std::partition(pivot, last, [&](const auto& p) {
return p.x <= (pivot-> x + static_cast<int>(std::sqrt(temp_min)));
});

// and look for the closest pair inside
std::for_each(not_too_left, pivot, [&](const auto& lp) {
auto [up_left, bottom_right] = candidates_rectangle(lp, std::sqrt(temp_min));
auto outside = std::partition(pivot, not_too_right, [=](const auto& rp) {
return !is_inside_rectangle(rp, up_left, bottom_right);
});
auto middle_closest = std::min_element(pivot, outside, [=](const auto& lhs, const auto& rhs) {
return square_distance(lp, lhs) < square_distance(lp, rhs);
});
temp_min = std::min(temp_min, square_distance(lp, *middle_closest));
});
return temp_min;
}

double minimal_distance(std::vector<point>& points) {
std::sort(points.begin(), points.end(), [](const auto& lhs, const auto& rhs) {
return lhs.x < rhs.x;
});
return std::sqrt(minimal_distance_rec(points.begin(), points.end()));
}

int main() {

std::size_t n;
std::cin >> n;

std::vector<point> points;
for (std::size_t i = 0u; i < n; i++) {
int a, b;
std::cin >> a >> b;
points.push_back({a, b});
}
std::cout << minimal_distance(points);
}
``````