@@ -724,7 +724,7 @@ protected virtual void ProcessRedirect(CrawledPage crawledPage)
724
724
725
725
try
726
726
{
727
- var uri = GetRedirectUri ( crawledPage ) ;
727
+ var uri = ExtractRedirectUri ( crawledPage ) ;
728
728
729
729
PageToCrawl page = new PageToCrawl ( uri ) ;
730
730
page . ParentUri = crawledPage . ParentUri ;
@@ -1042,7 +1042,7 @@ protected virtual void ValidateRootUriForRedirection(CrawledPage crawledRootPage
1042
1042
}
1043
1043
1044
1044
if ( IsRedirect ( crawledRootPage ) ) {
1045
- _crawlContext . RootUri = GetRedirectUri ( crawledRootPage ) ;
1045
+ _crawlContext . RootUri = ExtractRedirectUri ( crawledRootPage ) ;
1046
1046
_logger . InfoFormat ( "The root URI [{0}] was redirected to [{1}]. Pages from domains [{2}] and [{3}] will be considered internal." ,
1047
1047
_crawlContext . OriginalRootUri ,
1048
1048
_crawlContext . RootUri ,
@@ -1058,7 +1058,7 @@ protected virtual void ValidateRootUriForRedirection(CrawledPage crawledRootPage
1058
1058
/// If HTTP auto redirections is disabled, this value is stored in the 'Location' header of the response.
1059
1059
/// If auto redirections is enabled, this value is stored in the response's ResponseUri property.
1060
1060
/// </remarks>
1061
- protected virtual Uri GetRedirectUri ( CrawledPage crawledPage )
1061
+ protected virtual Uri ExtractRedirectUri ( CrawledPage crawledPage )
1062
1062
{
1063
1063
Uri locationUri ;
1064
1064
if ( _crawlContext . CrawlConfiguration . IsHttpRequestAutoRedirectsEnabled ) {
@@ -1067,12 +1067,12 @@ protected virtual Uri GetRedirectUri(CrawledPage crawledPage)
1067
1067
} else {
1068
1068
// For manual redirects, we need to look for the location header.
1069
1069
var location = crawledPage . HttpWebResponse . Headers [ "Location" ] ;
1070
-
1070
+
1071
+ // Check if the location is absolute. If not, create an absolute uri.
1071
1072
if ( ! Uri . TryCreate ( location , UriKind . Absolute , out locationUri ) )
1072
1073
{
1073
- var site = crawledPage . Uri . Scheme + "://" + crawledPage . Uri . Host ;
1074
- location = site + location ;
1075
- locationUri = new Uri ( location ) ;
1074
+ Uri baseUri = new Uri ( crawledPage . Uri . GetLeftPart ( UriPartial . Authority ) ) ;
1075
+ locationUri = new Uri ( baseUri , location ) ;
1076
1076
}
1077
1077
}
1078
1078
return locationUri ;
0 commit comments