Skip to content

Commit 521d36f

Browse files
committed
updated some limitations with regard to concurrency and rate limiting
1 parent 0010c13 commit 521d36f

File tree

3 files changed

+156
-4
lines changed

3 files changed

+156
-4
lines changed

Abot.Tests.Unit/Core/DomainRateLimiterTest.cs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,5 +155,123 @@ public void AddDomain_ParamGreaterThanDefault_UsesParam()
155155

156156
Assert.IsTrue(timer.ElapsedMilliseconds > 190);
157157
}
158+
159+
[Test]
160+
public void AddDomain_AddDuplicateDomain_FirstAddWins()
161+
{
162+
var domainRateLimiter = new DomainRateLimiter(5);
163+
var domain = new Uri("http://a.com");
164+
165+
domainRateLimiter.AddDomain(domain, 50);
166+
domainRateLimiter.AddDomain(domain, 150);//This should be ignored
167+
168+
var timer = System.Diagnostics.Stopwatch.StartNew();
169+
domainRateLimiter.RateLimit(domain);
170+
domainRateLimiter.RateLimit(domain);
171+
timer.Stop();
172+
173+
Assert.IsTrue(timer.ElapsedMilliseconds >= 50 && timer.ElapsedMilliseconds < 150, string.Format("Expected it to take more than 50 but less than 150 but only took {0}", timer.ElapsedMilliseconds));
174+
}
175+
176+
177+
[Test]
178+
[ExpectedException(typeof(ArgumentNullException))]
179+
public void AddOrUpdateDomain_NullUri()
180+
{
181+
new DomainRateLimiter(1000).AddOrUpdateDomain(null, 100);
182+
}
183+
184+
[Test]
185+
[ExpectedException(typeof(ArgumentException))]
186+
public void AddOrUpdateDomain_ZeroCrawlDelay()
187+
{
188+
new DomainRateLimiter(1000).AddOrUpdateDomain(new Uri("http://a.com"), 0);
189+
}
190+
191+
[Test]
192+
[ExpectedException(typeof(ArgumentException))]
193+
public void AddOrUpdateDomain_NegativeCrawlDelay()
194+
{
195+
new DomainRateLimiter(1000).AddOrUpdateDomain(new Uri("http://a.com"), -1);
196+
}
197+
198+
[Test]
199+
public void AddOrUpdateDomain_ParamLessThanDefault_UsesDefault()
200+
{
201+
Uri rootUri = new Uri("http://a.com/");
202+
Uri pageUri1 = new Uri("http://a.com/a.html");
203+
Uri pageUri2 = new Uri("http://a.com/b.html");
204+
205+
Stopwatch timer = Stopwatch.StartNew();
206+
DomainRateLimiter unitUnderTest = new DomainRateLimiter(100);
207+
208+
unitUnderTest.AddOrUpdateDomain(rootUri, 5);
209+
210+
unitUnderTest.RateLimit(rootUri);
211+
unitUnderTest.RateLimit(pageUri1);
212+
unitUnderTest.RateLimit(pageUri2);
213+
timer.Stop();
214+
215+
Assert.IsTrue(timer.ElapsedMilliseconds > 190);
216+
}
217+
218+
[Test]
219+
public void AddOrUpdateDomain_ParamGreaterThanDefault_UsesParam()
220+
{
221+
Uri rootUri = new Uri("http://a.com/");
222+
Uri pageUri1 = new Uri("http://a.com/a.html");
223+
Uri pageUri2 = new Uri("http://a.com/b.html");
224+
225+
Stopwatch timer = Stopwatch.StartNew();
226+
DomainRateLimiter unitUnderTest = new DomainRateLimiter(5);
227+
228+
unitUnderTest.AddOrUpdateDomain(rootUri, 100);
229+
230+
unitUnderTest.RateLimit(rootUri);
231+
unitUnderTest.RateLimit(pageUri1);
232+
unitUnderTest.RateLimit(pageUri2);
233+
timer.Stop();
234+
235+
Assert.IsTrue(timer.ElapsedMilliseconds > 190);
236+
}
237+
238+
[Test]
239+
public void AddOrUpdateDomain_AddDuplicateDomain_LastUpdateWins()
240+
{
241+
var domainRateLimiter = new DomainRateLimiter(5);
242+
var domain = new Uri("http://a.com");
243+
244+
domainRateLimiter.AddOrUpdateDomain(domain, 50);
245+
domainRateLimiter.AddOrUpdateDomain(domain, 150);//This should override the previous
246+
247+
var timer = System.Diagnostics.Stopwatch.StartNew();
248+
domainRateLimiter.RateLimit(domain);
249+
domainRateLimiter.RateLimit(domain);
250+
timer.Stop();
251+
252+
Assert.IsTrue(timer.ElapsedMilliseconds >= 150, $"Expected it to take more than 150 millisecs but only took {timer.ElapsedMilliseconds}");
253+
}
254+
255+
256+
[Test]
257+
public void RemoveDomain_NoLongerRateLimitsThatDomain()
258+
{
259+
//Arrange
260+
var domainRateLimiter = new DomainRateLimiter(5);
261+
var domain = new Uri("http://a.com");
262+
263+
domainRateLimiter.AddDomain(domain, 1000);
264+
265+
//Act
266+
domainRateLimiter.RemoveDomain(domain);
267+
268+
//Assert
269+
var timer = System.Diagnostics.Stopwatch.StartNew();
270+
domainRateLimiter.RateLimit(domain);
271+
domainRateLimiter.RateLimit(domain);
272+
timer.Stop();
273+
274+
Assert.IsTrue(timer.ElapsedMilliseconds < 25, $"Expected it to take less than 25 millisecs but only took {timer.ElapsedMilliseconds}");
275+
}
158276
}
159277
}

Abot/Core/DomainRateLimiter.cs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ public interface IDomainRateLimiter
2020
/// Add a domain entry so that domain may be rate limited according the the param minumum crawl delay
2121
/// </summary>
2222
void AddDomain(Uri uri, long minCrawlDelayInMillisecs);
23+
24+
/// <summary>
25+
/// Add/Update a domain entry so that domain may be rate limited according the the param minumum crawl delay
26+
/// </summary>
27+
void AddOrUpdateDomain(Uri uri, long minCrawlDelayInMillisecs);
28+
29+
/// <summary>
30+
/// Remove a domain entry so that it will no longer be rate limited
31+
/// </summary>
32+
void RemoveDomain(Uri uri);
2333
}
2434

2535
[Serializable]
@@ -62,10 +72,34 @@ public void AddDomain(Uri uri, long minCrawlDelayInMillisecs)
6272
if (minCrawlDelayInMillisecs < 1)
6373
throw new ArgumentException("minCrawlDelayInMillisecs");
6474

65-
long millThatIsGreater = minCrawlDelayInMillisecs > _defaultMinCrawlDelayInMillisecs ? minCrawlDelayInMillisecs : _defaultMinCrawlDelayInMillisecs;
66-
GetRateLimter(uri, millThatIsGreater);//just calling this method adds the new domain
75+
GetRateLimter(uri, Math.Max(minCrawlDelayInMillisecs, _defaultMinCrawlDelayInMillisecs));//just calling this method adds the new domain
6776
}
6877

78+
public void AddOrUpdateDomain(Uri uri, long minCrawlDelayInMillisecs)
79+
{
80+
if (uri == null)
81+
throw new ArgumentNullException("uri");
82+
83+
if (minCrawlDelayInMillisecs < 1)
84+
throw new ArgumentException("minCrawlDelayInMillisecs");
85+
86+
var delayToUse = Math.Max(minCrawlDelayInMillisecs, _defaultMinCrawlDelayInMillisecs);
87+
if (delayToUse > 0)
88+
{
89+
var rateLimiter = new RateLimiter(1, TimeSpan.FromMilliseconds(delayToUse));
90+
91+
_rateLimiterLookup.AddOrUpdate(uri.Authority, rateLimiter, (key, oldValue) => rateLimiter);
92+
_logger.DebugFormat("Added/updated domain [{0}] with minCrawlDelayInMillisecs of [{1}] milliseconds", uri.Authority, delayToUse);
93+
}
94+
}
95+
96+
public void RemoveDomain(Uri uri)
97+
{
98+
IRateLimiter rateLimiter;
99+
_rateLimiterLookup.TryRemove(uri.Authority, out rateLimiter);
100+
}
101+
102+
69103
private IRateLimiter GetRateLimter(Uri uri, long minCrawlDelayInMillisecs)
70104
{
71105
IRateLimiter rateLimiter;

Abot/Util/ThreadManager.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public interface IThreadManager : IDisposable
1212
/// <summary>
1313
/// Max number of threads to use.
1414
/// </summary>
15-
int MaxThreads { get; }
15+
int MaxThreads { get; set; }
1616

1717
/// <summary>
1818
/// Will perform the action asynchrously on a seperate thread
@@ -55,7 +55,7 @@ public ThreadManager(int maxThreads)
5555
public int MaxThreads
5656
{
5757
get;
58-
private set;
58+
set;
5959
}
6060

6161
/// <summary>

0 commit comments

Comments
 (0)