Skip to content

Commit f7c5b60

Browse files
authored
Add Inverted Index (TheAlgorithms#262)
1 parent 5533161 commit f7c5b60

File tree

3 files changed

+120
-1
lines changed

3 files changed

+120
-1
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
using System.Collections.Generic;
2+
using FluentAssertions;
3+
using NUnit.Framework;
4+
5+
namespace DataStructures.Tests
6+
{
7+
public class InvertedIndexTests
8+
{
9+
[Test]
10+
public void Or_GetSourcesWithAtLeastOneFromList_ReturnAllSources()
11+
{
12+
var index = new InvertedIndex();
13+
var source1 = "one star is sparkling bright";
14+
var source2 = "two stars are sparkling even brighter";
15+
index.AddToIndex(nameof(source1), source1);
16+
index.AddToIndex(nameof(source2), source2);
17+
18+
var or = index.Or(new List<string> { "star", "sparkling" });
19+
20+
or.Should().BeEquivalentTo(nameof(source1), nameof(source2));
21+
}
22+
23+
[Test]
24+
public void And_GetSourcesWithAllInsideList_ReturnFirstSource()
25+
{
26+
var index = new InvertedIndex();
27+
var source1 = "one star is sparkling bright";
28+
var source2 = "two stars are sparkling even brighter";
29+
index.AddToIndex(nameof(source1), source1);
30+
index.AddToIndex(nameof(source2), source2);
31+
32+
var and = index.And(new List<string> { "star", "sparkling" });
33+
34+
and.Should().BeEquivalentTo(nameof(source1));
35+
}
36+
}
37+
}

DataStructures/InvertedIndex.cs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
4+
namespace DataStructures
5+
{
6+
/// <summary>
7+
/// Inverted index is the simplest form of document indexing,
8+
/// allowing performing boolean queries on text data.
9+
///
10+
/// This realization is just simplified for better understanding the process of indexing
11+
/// and working on straightforward string inputs.
12+
/// </summary>
13+
public class InvertedIndex
14+
{
15+
private readonly Dictionary<string, List<string>> invertedIndex = new();
16+
17+
/// <summary>
18+
/// Build inverted index with source name and source content.
19+
/// </summary>
20+
/// <param name="sourceName">Name of the source.</param>
21+
/// <param name="sourceContent">Content of the source.</param>
22+
public void AddToIndex(string sourceName, string sourceContent)
23+
{
24+
var context = sourceContent.Split(' ').Distinct();
25+
foreach (var word in context)
26+
{
27+
if (!invertedIndex.ContainsKey(word))
28+
{
29+
invertedIndex.Add(word, new List<string> { sourceName });
30+
}
31+
else
32+
{
33+
invertedIndex[word].Add(sourceName);
34+
}
35+
}
36+
}
37+
38+
/// <summary>
39+
/// Returns the source names contains ALL terms inside at same time.
40+
/// </summary>
41+
/// <param name="terms">List of terms.</param>
42+
/// <returns>Source names.</returns>
43+
public IEnumerable<string> And(IEnumerable<string> terms)
44+
{
45+
var entries = terms
46+
.Select(term => invertedIndex
47+
.Where(x => x.Key.Equals(term))
48+
.SelectMany(x => x.Value))
49+
.ToList();
50+
51+
var intersection = entries
52+
.Skip(1)
53+
.Aggregate(new HashSet<string>(entries.First()), (hashSet, enumerable) =>
54+
{
55+
hashSet.IntersectWith(enumerable);
56+
return hashSet;
57+
});
58+
59+
return intersection;
60+
}
61+
62+
/// <summary>
63+
/// Returns the source names contains AT LEAST ONE from terms inside.
64+
/// </summary>
65+
/// <param name="terms">List of terms.</param>
66+
/// <returns>Source names.</returns>
67+
public IEnumerable<string> Or(IEnumerable<string> terms)
68+
{
69+
var sources = new List<string>();
70+
foreach (var term in terms)
71+
{
72+
var source = invertedIndex
73+
.Where(x => x.Key.Equals(term))
74+
.SelectMany(x => x.Value);
75+
76+
sources.AddRange(source);
77+
}
78+
79+
return sources.Distinct();
80+
}
81+
}
82+
}

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ This repository contains algorithms and data structures implemented in C# for ed
150150
* [Directed Weighted Graph Via Adjacency Matrix](./DataStructures/Graph/DirectedWeightedGraph.cs)
151151
* [Disjoint Set](./DataStructures/DisjointSet)
152152
* [SortedList](./DataStructures/SortedList.cs)
153+
* [Inverted index](./DataStructures/InvertedIndex.cs)
153154
* [Unrolled linked list](./DataStructures/UnrolledList/UnrolledLinkedList.cs)
154155

155-
156156
## Contributing
157157

158158
You can contribute with pleasure to this repository.

0 commit comments

Comments
 (0)