Skip to content

Commit 369d6fb

Browse files
committed
* Changed how we get the redirect URI following PR comment
1 parent c4d0241 commit 369d6fb

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

Abot/Crawler/WebCrawler.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ protected virtual void ProcessRedirect(CrawledPage crawledPage)
724724

725725
try
726726
{
727-
var uri = GetRedirectUri(crawledPage);
727+
var uri = ExtractRedirectUri(crawledPage);
728728

729729
PageToCrawl page = new PageToCrawl(uri);
730730
page.ParentUri = crawledPage.ParentUri;
@@ -1042,7 +1042,7 @@ protected virtual void ValidateRootUriForRedirection(CrawledPage crawledRootPage
10421042
}
10431043

10441044
if (IsRedirect(crawledRootPage)) {
1045-
_crawlContext.RootUri = GetRedirectUri(crawledRootPage);
1045+
_crawlContext.RootUri = ExtractRedirectUri(crawledRootPage);
10461046
_logger.InfoFormat("The root URI [{0}] was redirected to [{1}]. Pages from domains [{2}] and [{3}] will be considered internal.",
10471047
_crawlContext.OriginalRootUri,
10481048
_crawlContext.RootUri,
@@ -1058,7 +1058,7 @@ protected virtual void ValidateRootUriForRedirection(CrawledPage crawledRootPage
10581058
/// If HTTP auto redirections is disabled, this value is stored in the 'Location' header of the response.
10591059
/// If auto redirections is enabled, this value is stored in the response's ResponseUri property.
10601060
/// </remarks>
1061-
protected virtual Uri GetRedirectUri(CrawledPage crawledPage)
1061+
protected virtual Uri ExtractRedirectUri(CrawledPage crawledPage)
10621062
{
10631063
Uri locationUri;
10641064
if (_crawlContext.CrawlConfiguration.IsHttpRequestAutoRedirectsEnabled) {
@@ -1067,12 +1067,12 @@ protected virtual Uri GetRedirectUri(CrawledPage crawledPage)
10671067
} else {
10681068
// For manual redirects, we need to look for the location header.
10691069
var location = crawledPage.HttpWebResponse.Headers["Location"];
1070-
1070+
1071+
// Check if the location is absolute. If not, create an absolute uri.
10711072
if (!Uri.TryCreate(location, UriKind.Absolute, out locationUri))
10721073
{
1073-
var site = crawledPage.Uri.Scheme + "://" + crawledPage.Uri.Host;
1074-
location = site + location;
1075-
locationUri = new Uri(location);
1074+
Uri baseUri = new Uri(crawledPage.Uri.GetLeftPart(UriPartial.Authority));
1075+
locationUri = new Uri(baseUri, location);
10761076
}
10771077
}
10781078
return locationUri;

0 commit comments

Comments
 (0)