-
Notifications
You must be signed in to change notification settings - Fork 20k
Randomized closest 2 points algorithm #6251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
82fb1f5
f1391bd
63ddb36
21fed7d
b4c88a7
d7949d6
a7c2163
852c2e7
f854cf7
b18bce9
d1f7ec5
f49710a
94c5ad4
b5c62ab
e89e6c4
c93383b
0e32962
cac9706
e05cfdd
c386ec9
bb66f17
92ff833
20d0d31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package com.thealgorithms.randomized; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Random; | ||
import java.util.Set; | ||
|
||
class Point { | ||
double x; | ||
double y; | ||
|
||
Point(double x, double y) { | ||
this.x = x; | ||
this.y = y; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "(" + x + ", " + y + ")"; | ||
} | ||
} | ||
|
||
public final class ClosestPair { | ||
private static final double INFINITY = Double.MAX_VALUE; | ||
|
||
private ClosestPair() { | ||
} | ||
|
||
public static double euclideanDistance(Point p1, Point p2) { | ||
return Math.sqrt(Math.pow(p1.x - p2.x, 2) + Math.pow(p1.y - p2.y, 2)); | ||
} | ||
|
||
/** | ||
* Algorithm Proof https://www.cs.toronto.edu/~anikolov/CSC473W20/kt-rabin.pdf | ||
* Additional information: https://en.wikipedia.org/wiki/Closest_pair_of_points_problem | ||
* This class uses Rabin's randomized approach to find the closest pair of points. | ||
* Rabin's approach randomly selects a sample of points to estimate an initial closest distance | ||
* (delta), then uses a grid for "probabilistic refinement". Finally, it updates the closest pair | ||
* with the closest distance. | ||
*/ | ||
|
||
public static Object[] rabinRandomizedClosestPair(List<Point> points) { | ||
// Error handling, must have at least 2 points | ||
if (points == null || points.size() < 2) { | ||
return new Object[] {null, null, INFINITY}; | ||
} | ||
|
||
Collections.shuffle(points, new Random()); // shuffle for required randomness | ||
|
||
double delta = INFINITY; // initialize distance | ||
Point closestA = null; | ||
Point closestB = null; | ||
|
||
// without exceeding number of points, work with some sample | ||
int sampleSize = Math.min(7, points.size()); | ||
|
||
Random random = new Random(); // select randomly | ||
Set<Point> sampleSet = new HashSet<>(); // ensure unique pairs | ||
while (sampleSet.size() < sampleSize) { | ||
sampleSet.add(points.get(random.nextInt(points.size()))); | ||
} | ||
List<Point> sample = new ArrayList<>(sampleSet); | ||
|
||
// initially the closest points are found via brute force | ||
for (int i = 0; i < sample.size(); i++) { | ||
for (int j = i + 1; j < sample.size(); j++) { | ||
double dist = euclideanDistance(sample.get(i), sample.get(j)); | ||
if (dist < delta) { | ||
closestA = sample.get(i); | ||
closestB = sample.get(j); | ||
delta = dist; // update distance | ||
} | ||
} | ||
} | ||
|
||
// Confirm neither closestA nor closestB are null | ||
if (closestA == null || closestB == null) { | ||
return new Object[] {points.get(0), points.get(1), euclideanDistance(points.get(0), points.get(1))}; | ||
} | ||
|
||
// Create a grid, We will use "Probabilistic Filtering" by only checking | ||
// neighboring grids to prevent bruteforce checking outside initialization | ||
Map<String, Point> grid = new HashMap<>(); | ||
|
||
// coordinates computed based on delta, estimated closest distance | ||
for (Point p : points) { | ||
int gridX = (int) (p.x / delta); | ||
int gridY = (int) (p.y / delta); | ||
String key = gridX + "," + gridY; // string for indexing | ||
|
||
// check neighboring cells | ||
for (int dX = -1; dX <= 1; dX++) { | ||
for (int dY = -1; dY <= 1; dY++) { | ||
String neighborKey = (gridX + dX) + "," + (gridY + dY); | ||
Point neighborValue = grid.get(neighborKey); | ||
|
||
// update points only if valid neighbor | ||
if (neighborValue != null && p != neighborValue) { | ||
double dist = euclideanDistance(p, neighborValue); | ||
if (dist < delta) { | ||
closestA = p; | ||
closestB = neighborValue; | ||
delta = dist; | ||
} | ||
} | ||
} | ||
} | ||
grid.put(key, p); | ||
} | ||
return new Object[] {closestA, closestB, delta}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package com.thealgorithms.randomized; | ||
bri-harris marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertNotNull; | ||
import static org.junit.jupiter.api.Assertions.assertNull; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Random; | ||
import org.junit.jupiter.api.Test; | ||
|
||
class ClosestPairTest { | ||
|
||
@Test | ||
void testStandardCaseClosestPair() { | ||
List<Point> points = Arrays.asList(new Point(1, 4), new Point(2, 8), new Point(0, 1), new Point(4, 5), new Point(9, 4)); | ||
Object[] closestPair = ClosestPair.rabinRandomizedClosestPair(points); | ||
assertTrue((double) closestPair[2] > 0, "Distance must be positive"); | ||
} | ||
|
||
@Test | ||
void testTwoDistinctPoints() { | ||
List<Point> points = Arrays.asList(new Point(1, 2), new Point(2, 3)); | ||
Object[] closestPair = ClosestPair.rabinRandomizedClosestPair(points); | ||
|
||
assertNotNull(closestPair[0]); | ||
assertNotNull(closestPair[1]); | ||
|
||
assertTrue((closestPair[0].equals(points.get(0)) && closestPair[1].equals(points.get(1))) || (closestPair[1].equals(points.get(0)) && closestPair[0].equals(points.get(1)))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure this test is correct? Because the .equals method is not overwritten in the Point class There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please refine and improve the test a bit? The likely cause of the NullPointer warning is that the analysis tool doesn't recognize assertNotNull() as a safe null check—assigning the value to a variable before asserting might help. Thats probably why infer is failing |
||
assertEquals(closestPair[2], ClosestPair.euclideanDistance(points.get(0), points.get(1))); | ||
} | ||
|
||
@Test | ||
void testIdenticalPointsPairWithDistanceZero() { | ||
List<Point> points = Arrays.asList(new Point(1.0, 2.0), new Point(1.0, 2.0), new Point(1.0, 1.0)); | ||
Object[] closestPair = ClosestPair.rabinRandomizedClosestPair(points); | ||
assertEquals(0, (double) closestPair[2], "Distance is zero"); | ||
} | ||
|
||
@Test | ||
void testLargeDatasetRandomPoints() { | ||
List<Point> points = new ArrayList<>(); | ||
Random random = new Random(); | ||
for (int i = 0; i < 1000; i++) { | ||
points.add(new Point(random.nextDouble() * 100, random.nextDouble() * 100)); | ||
} | ||
Object[] closestPair = ClosestPair.rabinRandomizedClosestPair(points); | ||
assertNotNull(closestPair[0]); | ||
assertNotNull(closestPair[1]); | ||
assertTrue((double) closestPair[2] > 0, "Distance must be positive"); | ||
} | ||
|
||
@Test | ||
void testSinglePointShouldReturnNoPair() { | ||
List<Point> points = Arrays.asList(new Point(5.0, 5.0)); | ||
Object[] closestPair = ClosestPair.rabinRandomizedClosestPair(points); | ||
assertNull(closestPair[0]); | ||
assertNull(closestPair[1]); | ||
} | ||
} | ||
bri-harris marked this conversation as resolved.
Show resolved
Hide resolved
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think currently, only one point is stored per grid cell, overwriting any existing points. This can cause missed closest pairs when multiple points fall into the same cell. It is recommended to store a list of points per cell to ensure correctness...