Skip to content

refactor: improve code and test coverage for MapReduce example #6348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 23 additions & 24 deletions src/main/java/com/thealgorithms/misc/MapReduce.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,34 @@
import java.util.function.Function;
import java.util.stream.Collectors;

/*
* MapReduce is a programming model for processing and generating large data sets with a parallel,
distributed algorithm on a cluster.
* It has two main steps: the Map step, where the data is divided into smaller chunks and processed in parallel,
and the Reduce step, where the results from the Map step are combined to produce the final output.
* Wikipedia link : https://en.wikipedia.org/wiki/MapReduce
*/

/**
* MapReduce is a programming model for processing and generating large data sets
* using a parallel, distributed algorithm on a cluster.
* It consists of two main phases:
* - Map: the input data is split into smaller chunks and processed in parallel.
* - Reduce: the results from the Map phase are aggregated to produce the final output.
*
* See also: https://en.wikipedia.org/wiki/MapReduce
*/
public final class MapReduce {

private MapReduce() {
}
/*
*Counting all the words frequency within a sentence.
*/
public static String mapreduce(String sentence) {
List<String> wordList = Arrays.stream(sentence.split(" ")).toList();

// Map step
Map<String, Long> wordCounts = wordList.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting()));

// Reduce step
StringBuilder result = new StringBuilder();
wordCounts.forEach((word, count) -> result.append(word).append(": ").append(count).append(","));
/**
* Counts the frequency of each word in a given sentence using a simple MapReduce-style approach.
*
* @param sentence the input sentence
* @return a string representing word frequencies in the format "word: count,word: count,..."
*/
public static String countWordFrequencies(String sentence) {
// Map phase: split the sentence into words
List<String> words = Arrays.asList(sentence.trim().split("\\s+"));

// Removing the last ',' if it exists
if (!result.isEmpty()) {
result.setLength(result.length() - 1);
}
// Group and count occurrences of each word, maintain insertion order
Map<String, Long> wordCounts = words.stream().collect(Collectors.groupingBy(Function.identity(), LinkedHashMap::new, Collectors.counting()));

return result.toString();
// Reduce phase: format the result
return wordCounts.entrySet().stream().map(entry -> entry.getKey() + ": " + entry.getValue()).collect(Collectors.joining(","));
}
}
22 changes: 7 additions & 15 deletions src/test/java/com/thealgorithms/misc/MapReduceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

public class MapReduceTest {
@Test
public void testMapReduceWithSingleWordSentence() {
String oneWordSentence = "Hactober";
String result = MapReduce.mapreduce(oneWordSentence);

assertEquals("Hactober: 1", result);
}

@Test
public void testMapReduceWithMultipleWordSentence() {
String multipleWordSentence = "I Love Love HactoberFest";
String result = MapReduce.mapreduce(multipleWordSentence);

assertEquals("I: 1,Love: 2,HactoberFest: 1", result);
@ParameterizedTest
@CsvSource({"'hello world', 'hello: 1,world: 1'", "'one one two', 'one: 2,two: 1'", "'a a a a', 'a: 4'", "' spaced out ', 'spaced: 1,out: 1'"})
void testCountWordFrequencies(String input, String expected) {
String result = MapReduce.countWordFrequencies(input);
assertEquals(expected, result);
}
}
Loading