(* :Name: RandomWalkWeb (2012-09-28_A) *)

(* :Context: RandomWalkWeb` *)

(* :Title: Random Walk on the Web *)

(* :Author: Todd Silvestri *)

(* :Copyright: (C) 2012 Todd Silvestri *)

(* :Summary:
    This package contains functions to perform random walks on the World Wide 
    Web and visualize the resulting data.
*)

(* :Keywords: random walk, World Wide Web, WWW, visualization, data *)

(* :Mathematica Version: 8.0 *)

(* :Package Version: 1.4 *)

(* :History:
    Version 1.0, July 2012 -- Initial version
    Version 1.3, August 2012 -- Major revisions and additions
    Version 1.4, September 2012 -- Minor revisions and additions
*)

(* :Requirements:
    1. This package uses .NET/Link to communicate with the .NET runtime.  The 
       .NET Framework 2.0 or higher (or compatible) is required.
*)

(* :Warnings:
    1. The Unicode (UTF-8) character encoding is used for input/output 
       functions.
*)

(* :Limitations: None *)

(* :Discussion: *)

(* :References:
    [1] Mozilla Foundation. "Public Suffix List." Public Suffix List. 
        http://publicsuffix.org/ (accessed August 2, 2012).
    [2] Piperoglou, Stephanos. "Relative URLs." WebReference. 
        http://www.webreference.com/html/tutorial2/3.html (accessed 
        August 7, 2012).
*)


BeginPackage["RandomWalkWeb`", {"NETLink`"}]; (* RandomWalkWeb` *)


(* RandomWalkWeb`Information` *)
`Information`$VersionNumber = 1.4;
`Information`$ReleaseNumber = 4;


(* Specify the default raw character encoding to use for input/output 
   functions: *)
$CharacterEncoding = "UTF8";

(* Launch the .NET runtime: *)
InstallNET[];


(* Clear symbol attributes: *)
ClearAttributes[AbsoluteLinkQ, {Listable, Protected, ReadProtected}]
ClearAttributes[AbsoluteLinks, {Protected}]
ClearAttributes[BaseURL, {Listable, Protected, ReadProtected}]
ClearAttributes[DomainName, {Listable, Protected, ReadProtected}]
ClearAttributes[EffectiveTLDNameQ, {Listable, Protected, ReadProtected}]
ClearAttributes[GetFavicon, {Listable, Protected, ReadProtected}]
ClearAttributes[GetHyperlinks, {Listable, Protected, ReadProtected}]
ClearAttributes[GetSource, {Listable, Protected, ReadProtected}]
ClearAttributes[GetStepData, {Protected, ReadProtected}]
ClearAttributes[Hostname, {Listable, Protected, ReadProtected}]
ClearAttributes[HyperlinkType, {Protected}]
ClearAttributes[LogMessage, {Protected, ReadProtected}]
ClearAttributes[PerformRandomWalks, {Listable, Protected, ReadProtected}]
ClearAttributes[RandomWalkGraph, {Protected, ReadProtected}]
ClearAttributes[RandomWalkWeb, {Listable, Protected, ReadProtected}]
ClearAttributes[RefreshIconCache, {Protected}]
ClearAttributes[SiteRoot, {Listable, Protected, ReadProtected}]
ClearAttributes[ToAbsoluteLink, {Listable, Protected, ReadProtected}]
ClearAttributes[ToGraphData, {Protected, ReadProtected}]
ClearAttributes[VertexIcon, {Protected}]
ClearAttributes[VertexType, {Protected}]
ClearAttributes[$BaseDataDirectory, {ReadProtected}]
ClearAttributes[$DataFilePrefix, {ReadProtected}]
ClearAttributes[$DebugLogging, {ReadProtected}]
ClearAttributes[$ETLDNInfo, {Protected, ReadProtected}]
ClearAttributes[$LogFileName, {ReadProtected}]
ClearAttributes[$Timeout, {ReadProtected}]
ClearAttributes[$UserAgent, {ReadProtected}]


(* Usage messages *)

AbsoluteLinkQ::usage = 
  "\!\(\*RowBox[{\"AbsoluteLinkQ\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gives True if \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\) is a fully qualified link, and \
gives False otherwise.";

AbsoluteLinks::usage = 
  "AbsoluteLinks is an option to GetHyperlinks that specifies whether \
links should be converted to fully qualified form.";

BaseURL::usage = 
  "\!\(\*RowBox[{\"BaseURL\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gives the base of \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).";

DomainName::usage = 
  "\!\(\*RowBox[{\"DomainName\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gives the domain name of \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).";

EffectiveTLDNameQ::usage = 
  "\!\(\*RowBox[{\"EffectiveTLDNameQ\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"tld\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gives True if \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"tld\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\) is a known effective top-level \
domain, and gives False otherwise.";

GetFavicon::usage = 
  "\!\(\*RowBox[{\"GetFavicon\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gets the favorite icon \
associated with the requested \!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"url\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True]\).\n\
\!\(\*RowBox[{\"GetFavicon\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"address\\\",\\\"TI\\\"]\\)\\\"\",\
ShowStringCharacters->True], \",\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"source\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gets the favorite icon \
associated with the specified \!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"address\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True]\) \
and \!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"source\\\",\\\"TI\\\"]\
\\)\\\"\", ShowStringCharacters->True]\).";

GetHyperlinks::usage = 
  "\!\(\*RowBox[{\"GetHyperlinks\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"address\\\",\\\"TI\\\"]\\)\\\"\",\
ShowStringCharacters->True], \",\", StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"source\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True], \"]\
\"}]\) gives a list of hyperlinks extracted from \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"source\\\",\\\"TI\\\"]\\)\\\
\"\", ShowStringCharacters->True]\) located at the specified \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"address\\\",\\\"TI\\\"]\\)\
\\\"\", ShowStringCharacters->True]\).";

GetSource::usage = 
  "\!\(\*RowBox[{\"GetSource\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \"]\"}]\) gets the source of the page \
associated with the requested \!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"url\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True]\).";

GetStepData::usage = 
  "\!\(\*RowBox[{\"GetStepData\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \",\", RowBox[{RowBox[{RowBox[{\"{\", \
RowBox[{SubscriptBox[StyleBox[\"fn\", \"TI\"], StyleBox[\"1\", \
\"TR\"]], \",\", SubscriptBox[StyleBox[\"fn\", \"TI\"], \
StyleBox[\"2\", \"TR\"]], \",\", StyleBox[\"\[Ellipsis]\", \"TR\"]}], \
\"}\"}], \",\"}]}], StyleBox[\"n\", \"TI\"], \"]\"}]\) gives a list \
of the first \!\(\*StyleBox[\"n\", \"TI\"]\) steps extracted from \
each of the random walk data files \
\!\(\*SubscriptBox[StyleBox[\"fn\", \"TI\"], StyleBox[\"i\", \"TI\"]]\) \
originating from \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).\n\!\(\*RowBox[{\"GetStepData\", \
\"[\", StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True], \",\", StyleBox[\"fn\", \"TI\"], \"]\
\"}]\) gives a list of steps extracted from the random walk data file \
\!\(\*StyleBox[\"fn\", \"TI\"]\) originating from \!\(\*StyleBox[\"\\\
\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True]\).";

Hostname::usage = 
  "\!\(\*RowBox[{\"Hostname\", \"[\", StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"url\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True], \
\"]\"}]\) gives the hostname of \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).";

HyperlinkType::usage = 
  "HyperlinkType is an option for GetHyperlinks that determines the \
type of hyperlinks returned by the function.";

LogMessage::usage = 
  "\!\(\*RowBox[{\"LogMessage\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"loi\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \",\", StyleBox[\"msg\", \"TI\"], \
\"]\"}]\) logs message \!\(\*StyleBox[\"msg\", \"TI\"]\) with level \
of importance \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"loi\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\) to a file.";

PerformRandomWalks::usage = 
  "\!\(\*RowBox[{\"PerformRandomWalks\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \",\", SubscriptBox[StyleBox[\"n\", \"TI\
\"], StyleBox[\"w\", \"TI\"]], \",\", SubscriptBox[StyleBox[\"n\", \
\"TI\"], StyleBox[\"s\", \"TI\"]], \"]\"}]\) performs \
\!\(\*SubscriptBox[StyleBox[\"n\", \"TI\"], StyleBox[\"w\", \"TI\"]]\) \
random walks on the Web originating from \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\) and each having at most \
\!\(\*SubscriptBox[StyleBox[\"n\", \"TI\"], StyleBox[\"s\", \"TI\"]]\) \
steps.";

RandomWalkGraph::usage = 
  "\!\(\*RowBox[{\"RandomWalkGraph\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \",\", RowBox[{RowBox[{RowBox[{\"{\", \
RowBox[{SubscriptBox[StyleBox[\"fn\", \"TI\"], StyleBox[\"1\", \
\"TR\"]], \",\", SubscriptBox[StyleBox[\"fn\", \"TI\"], \
StyleBox[\"2\", \"TR\"]], \",\", StyleBox[\"\[Ellipsis]\", \"TR\"]}], \
\"}\"}], \",\"}]}], StyleBox[\"n\", \"TI\"], \"]\"}]\) constructs a \
graph of the first \!\(\*StyleBox[\"n\", \"TI\"]\) steps extracted \
from each of the random walk data files \
\!\(\*SubscriptBox[StyleBox[\"fn\", \"TI\"], StyleBox[\"i\", \"TI\"]]\) \
originating from \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).\n\
\!\(\*RowBox[{\"RandomWalkGraph\", \"[\", StyleBox[\"stepData\", \"TI\
\"], \"]\"}]\) constructs a graph using the specified \!\(\*StyleBox[\
\"stepData\", \"TI\"]\).";

RandomWalkWeb::usage = 
  "\!\(\*RowBox[{\"RandomWalkWeb\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\", \
ShowStringCharacters->True], \",\", SubscriptBox[StyleBox[\"n\", \"TI\
\"], StyleBox[\"s\", \"TI\"]], \"]\"}]\) performs a random walk on \
the Web originating from \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\) and having at most \
\!\(\*SubscriptBox[StyleBox[\"n\", \"TI\"], StyleBox[\"s\", \"TI\"]]\) \
steps.";

RefreshIconCache::usage = 
  "RefreshIconCache is an option to RandomWalkGraph that specifies \
whether the icon cache should be refreshed.";

SiteRoot::usage = 
  "\!\(\*RowBox[{\"SiteRoot\", \"[\", StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"url\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True], \
\"]\"}]\) gives the site root of \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\
\", ShowStringCharacters->True]\).";

ToAbsoluteLink::usage = 
  "\!\(\*RowBox[{\"ToAbsoluteLink\", \"[\", \
StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"]\\)\\\"\",\
ShowStringCharacters->True], \",\", StyleBox[\"\\\"\\!\\(\\*StyleBox[\
\\\"hyperlink\\\",\\\"TI\\\"]\\)\\\"\", ShowStringCharacters->True], \
\"]\"}]\) converts \
\!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"hyperlink\\\",\\\"TI\\\"]\\\
)\\\"\", ShowStringCharacters->True]\) to a fully qualified link \
based on \!\(\*StyleBox[\"\\\"\\!\\(\\*StyleBox[\\\"url\\\",\\\"TI\\\"\
]\\)\\\"\", ShowStringCharacters->True]\).";

ToGraphData::usage = 
  "\!\(\*RowBox[{\"ToGraphData\", \"[\", StyleBox[\"stepData\", \
\"TI\"], \"]\"}]\) converts the elements in \!\(\*StyleBox[\"stepData\
\", \"TI\"]\) to directed graph data.";

VertexIcon::usage = 
  "VertexIcon is an option to RandomWalkGraph that specifies whether \
vertices should be rendered using favorite icons.";

VertexType::usage = 
  "VertexType is an option for GetStepData and related functions that \
determines the type of vertices returned by the function.";

$BaseDataDirectory := Directory[];
$BaseDataDirectory::usage = 
  "$BaseDataDirectory gives the base data directory in which random \
walk data and favorite icons are placed.";

$DataFilePrefix = "RW";
$DataFilePrefix::usage = 
  "$DataFilePrefix specifies the label to be prepended to each random \
walk data file name.";

$DebugLogging = False;
$DebugLogging::usage = 
  "$DebugLogging specifies whether functions should log debug-level \
messages.";

$ETLDNInfo = {};
$ETLDNInfo::usage = 
  "$ETLDNInfo gives the timestamp of the data file and number of \
effective top-level domain names in the lookup table.";

$LogFileName := FileNameJoin[{$TemporaryDirectory, "messages.log"}];
$LogFileName::usage = 
  "$LogFileName is the fully qualified name of the log file.";

$Timeout = 100000;
$Timeout::usage = 
  "$Timeout is the number of milliseconds to wait before a synchronous \
request times out.";

$UserAgent = 
  "Mathematica/" <> ToString[NumberForm[$VersionNumber, {2, 1}]] <> "." <> 
  ToString[NumberForm[$ReleaseNumber, {2, 1}]] <> " RWW/" <> 
  ToString[NumberForm[RandomWalkWeb`Information`$VersionNumber, {2, 1}]];
$UserAgent::usage = "$UserAgent is the value of the User-agent HTTP header.";


(* Option values *)

(* GetHyperlinks:
   Allows the following options: (1) AbsoluteLinks and (2) HyperlinkType.

   When AbsoluteLinks->True (default), the function converts all relative 
   and root-relative links to absolute links.  Setting AbsoluteLinks->False 
   disables conversion.  Note that the AbsoluteLinks option has no effect on 
   the output when HyperlinkType->"Raw".

   When HyperlinkType->"Selected" (default), the function returns a list of 
   hyperlinks filtered based on the $SelectionCriteria parameter.  Setting 
   HyperlinkType->"Raw" causes the function to return all extracted links 
   (including JavaScript and SMTP).
*)
Options[GetHyperlinks] = {AbsoluteLinks->True, HyperlinkType->"Selected"};

(* GetStepData:
   Allows the following options: (1) VertexType.

   When VertexType->"DomainName" (default), the function returns a list of 
   steps between vertices labeled by domain name.  Setting 
   VertexType->"Hostname" causes the function to return a list with vertices 
   labeled by hostname.
*)
Options[GetStepData] = {VertexType->"DomainName"};

(* RandomWalkGraph:
   Allows the following options: (1) RefreshIconCache and (2) VertexIcon.  In 
   addition, it permits the same options as GetStepData and Graph.

   When RefreshIconCache->False (default), the function does not attempt to 
   refresh the icon cache.  Setting RefreshIconCache->True causes the function 
   to attempt to download the icons from their corresponding URLs regardless 
   of whether they already exist in the local icon cache.  Note that the 
   RefreshIconCache option has no effect when VertexIcon->False.

   When VertexIcon->False (default), the function returns a plot of the graph 
   using the standard vertex shape.  Setting VertexIcon->True causes the 
   function to use the vertex's associated favorite icon (i.e., favicon) as 
   its shape.  RandomWalkGraph first checks a local, predefined folder for 
   the icon.  If the icon does not exist, the function attempts to download 
   it from the corresponding URL and cache it locally for future use.
*)
Options[RandomWalkGraph] = 
  Union[Options[GetStepData], Options[Graph], {RefreshIconCache->False, 
    VertexIcon->False}];


Begin["`Private`"]; (* RandomWalkWeb`Private` *)


(* Load the specified types into the .NET runtime: *)
LoadNETType["System.Drawing.Image"];
LoadNETType["System.Drawing.Imaging.ImageFormat"];
LoadNETType["System.Environment"];
StreamReader = LoadNETType["System.IO.StreamReader"];
LoadNETType["System.Net.HttpStatusCode"];
HttpWebRequest = LoadNETType["System.Net.HttpWebRequest"];
HttpWebResponse = LoadNETType["System.Net.HttpWebResponse"];
LoadNETType["System.Net.WebRequest"];


(* AbsoluteLinkQ[url]:
   Tests whether the specified URL is an absolute (i.e., fully qualified) 
   link.  It supports the four default URI schemes in the .NET Framework.
*)
AbsoluteLinkQ[url_String] := 
  StringMatchQ[url, StartOfString ~~ 
    (("http" ~~ ("" | "s")) | "ftp" | "file") ~~ "://" ~~ __, IgnoreCase->True]


(* BaseURL[url]:
   Returns the base of the specified URL up to, and including, the 
   trailing "/".  BaseURL is only valid for fully qualified input.
*)
BaseURL[url_String] := 
  Module[{$FunctionName = "BaseURL", characters, lastCharacter, 
          baseURL = $Failed},
    (* Check that the specified URL is absolute.  If it is, then get its 
       base.  Otherwise, return $Failed. *)
    If[AbsoluteLinkQ[url],
      (* Get a list of the characters in the string. *)
      characters = Characters[url];
      
      (* Remove characters from the end until a '/' is reached. *)
      lastCharacter = Last[characters];
      While[lastCharacter != "/",
        characters = Most[characters];
        lastCharacter = Last[characters];
      ];
      
      (* Construct the base URL. *)
      baseURL = StringJoin[characters];
    ];
    
    (* Return the base URL. *)
    Return[baseURL];
  ]


(* DomainName[url]:
   Returns the domain name (e.g., example.com) for the specified URL.  
   DomainName is only valid for fully qualified input.
*)
DomainName[url_String] := 
  Module[{$FunctionName = "DomainName", hostname, hostnameComponents, 
          numberComponents, i = 1, domainName = $Failed},
    (* Check that the specified URL is absolute. *)
    If[AbsoluteLinkQ[url],
      (* Get the hostname. *)
      hostname = Hostname[url];
      
      (* Get the hostname components. *)
      hostnameComponents = StringSplit[hostname, "."];
      
      (* If the list of hostname components is nonempty, then attempt to 
         construct the domain name. *)
      If[!EmptyQ[hostnameComponents],
        (* Get the number of hostname components. *)
        numberComponents = Length[hostnameComponents];
        
        (* Starting at the end of the hostname components list, contruct a 
           string by combining components one at a time (joining with a '.') 
           and checking whether that (lowercase) string is in the list of 
           effective TLD names.  Continue the process until the string cannot 
           be located within the list.  The result is the domain name. *)
        domainName = hostnameComponents[[-i]];
        While[EffectiveTLDNameQ[ToLowerCase[domainName]],
          i++;
          
          (* Check that we will not exceeded the number of hostname 
             components.  If so, then exit the while loop. *)
          If[i <= numberComponents,
            domainName = hostnameComponents[[-i]] <> "." <> domainName;,
            (* else *)
            Break[];
          ];
        ];
      ];
      
      (* If the above method did not properly construct the domain name 
         (i.e., the TLD was not on the list), then set the domain name to 
         the hostname. *)
      If[i == 1,
        LogMessage["ERROR", "Could not determine effective TLD."];
        LogMessage["INFO", "Hostname: " <> hostname];
        
        domainName = hostname;
      ];
    ];
    
    (* Return the domain name. *)
    Return[domainName];
  ]


(* etldnqHelper[]:
   A helper function for EffectiveTLDNameQ.
*)

(* Get the current package directory and assemble the absolute file name of 
   the data file. *)
$RWWPackageDirectory = DirectoryName[$InputFileName];
$DatFileName = FileNameJoin[{$RWWPackageDirectory, "Data", "etldn.dat"}];

etldnqHelper[] := 
  Module[{$FunctionName = "etldnqHelper", datETLDN, infoTimestamp, 
          effectiveTLDNames, infoNumberTLDs},
    (* Import the data from the specified file. *)
    datETLDN = Import[$DatFileName, "Lines"];
    
    (* Get the content's timestamp. *)
    infoTimestamp = StringCases[First[datETLDN], 
      StartOfString ~~ "//" ~~ t__:>t, IgnoreCase->True] //First //StringTrim;
    
    (* Remove all comment/empty lines.  Process the resulting list, 
       returning all valid TLDs.  Delete duplicates and sort the list. *)
    effectiveTLDNames = Select[datETLDN, (StringFreeQ[#, "//"] && (# != ""))&];
    effectiveTLDNames = StringCases[effectiveTLDNames, StartOfString ~~ 
      ("" | "*." | "!") ~~ tld__:>tld, 
      IgnoreCase->True] //Flatten //StringTrim;
    effectiveTLDNames = Sort[DeleteDuplicates[effectiveTLDNames]];
    
    (* Get the number of TLDs. *)
    infoNumberTLDs = Length[effectiveTLDNames];
    
    (* Return the content's timestamp, the number of TLDs, and the list of 
       effective TLD names. *)
    Return[{{infoTimestamp, infoNumberTLDs}, effectiveTLDNames}];
  ]

(* Get the data file information (i.e., timestamp and number of TLDs) and the 
   list of effective TLD names. *)
{$ETLDNInfo, effectiveTLDNames} = etldnqHelper[];


(* EffectiveTLDNameQ[tld]:
   Tests whether the specified TLD is in the list of effective TLD names.  The 
   original list was obtained from [1] and modified to include additional TLDs.
*)
EffectiveTLDNameQ[tld_String] := 
  MemberQ[effectiveTLDNames, tld]


(* EmptyQ[list]:
   Yields True if list contains no elements, and yields False otherwise.
*)
EmptyQ[list_List] := 
  Length[list] == 0


(* EmptyQ[str]:
   Yields True if str contains no characters, and yields False otherwise.
*)
EmptyQ[str_String] := 
  StringLength[str] == 0


(* gclrvExceptionHandler:
   A custom .NET exception handler for GetCLRVersion.
*)
gclrvExceptionHandler = Function[{msgSym, msgTag, msgStr},
  Module[{$FunctionName = "GetCLRVersion", exc, baseExc},
    (* Get the .NET exception object that was thrown in the most 
       recent call. *)
    exc = GetNETException[];
    
    (* Get the root cause of all other exceptions in the chain. *)
    baseExc = exc@GetBaseException[];
    
    (* Log the exception. *)
    LogMessage["ERROR", "Unhandled exception in " <> $FunctionName <> 
      ". ---> ", baseExc@ToString[], "\n"];
  ]
];


(* GetCLRVersion[]:
   Returns the version (i.e., major.minor[.build[.revision]]) of the common 
   language runtime (CLR).
*)
GetCLRVersion[] := 
  NETBlock[
    Block[{$FunctionName = "GetCLRVersion", 
           $NETExceptionHandler = gclrvExceptionHandler, version},
      LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
      
      (* Get the Version object of the CLR. *)
      version = Environment`Version;
      
      (* Return the version as a string. *)
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
      Return[version@ToString[]];
    ]
  ]


(* gfExceptionHandler:
   A custom .NET exception handler for GetFavicon[<<as>>].
*)
gfExceptionHandler = Function[{msgSym, msgTag, msgStr},
  Module[{$FunctionName = "GetFavicon[<<as>>]", exc, baseExc},
    (* Get the .NET exception object that was thrown in the most 
       recent call. *)
    exc = GetNETException[];
    
    (* Get the root cause of all other exceptions in the chain. *)
    baseExc = exc@GetBaseException[];
    
    (* Log the exception. *)
    LogMessage["ERROR", "Unhandled exception in " <> $FunctionName <> 
      ". ---> ", baseExc@ToString[], "\n"];
  ]
];


(* GetFavicon[address, source]:
   Returns the favorite icon (i.e., favicon) for the specified address/source.
*)
GetFavicon[address_String, source_String] := 
  NETBlock[
    Block[{$FunctionName = "GetFavicon[<<as>>]", 
           $NETExceptionHandler = gfExceptionHandler, rawIconLinks, 
           extractedIconLink, iconLink, webRequest, favicon = $Failed, 
           httpWebRequest, webResponse, httpWebResponse, contentType = "", 
           stream, image, tempFileName, imageSaved = False},
      LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
      LogMessage["DEBUG", "Extracting raw icon links."];
      
      (* Check for an external 'icon' resource (W3C preferred).  If 
         unavailable, then check for an external 'shortcut icon' resource.  
         If neither are available, then assume the resource is at a 
         predefined URI (i.e., favicon.ico at root level). *)
      rawIconLinks = StringCases[source, 
        Shortest["<link rel=" ~~ "\"icon\"" ~~ ___ ~~ "href=\"" ~~ 
        ref__ ~~ "\"" ~~ ___ ~~ ">"]:>ref, IgnoreCase->True];
      If[EmptyQ[rawIconLinks],
        rawIconLinks = StringCases[source, 
          Shortest["<link rel=" ~~ "\"shortcut icon\"" ~~ ___ ~~ "href=\"" ~~ 
          ref__ ~~ "\"" ~~ ___ ~~ ">"]:>ref, IgnoreCase->True];
      ];
      If[EmptyQ[rawIconLinks],
        rawIconLinks = {"/favicon.ico"};
      ];
      extractedIconLink = First[rawIconLinks];
      
      (* Convert the extracted icon link to an absolute link. *)
      iconLink = ToAbsoluteLink[address, extractedIconLink];
      
      LogMessage["INFO", "Requested URL: " <> iconLink];
      
      (* Initialize a new WebRequest instance for the specified URI scheme. *)
      webRequest = WebRequest`Create[iconLink];
      
      (* If WebRequest failed to initialize, then return $Failed. *)
      If[webRequest === $Failed,
        LogMessage["ERROR", "WebRequest failed to initialize."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[favicon];
      ];
      
      (* Check the class instance returned by the Create method.  If the 
         class is not 'System.Net.HttpWebRequest', then return $Failed. *)
      If[StringMatchQ[webRequest@ToString[], "System.Net.HttpWebRequest", 
           IgnoreCase->True],
        (* Initialize a new instance of the HttpWebRequest class. *)
        httpWebRequest = CastNETObject[webRequest, HttpWebRequest];,
        (* else *)
        LogMessage["ERROR", 
          "Create method returned an unexpected class instance."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[favicon];
      ];
      
      (* Set the time-out value (in milliseconds) for the GetResponse 
         method. *)
      httpWebRequest@Timeout = $Timeout;
      
      (* Set the value of the User-agent HTTP header. *)
      httpWebRequest@UserAgent = $UserAgent;
      
      (* Assign the response from the Internet resource to a WebResponse 
         object. *)
      webResponse = httpWebRequest@GetResponse[];
      
      (* If the GetResponse method failed, then return $Failed. *)
      If[webResponse === $Failed,
        LogMessage["ERROR", "GetResponse method failed."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[favicon];
      ];
      
      (* Assign the WebResponse object to an HttpWebResponse variable. *)
      httpWebResponse = CastNETObject[webResponse, HttpWebResponse];
      
      (* Get the content type of the response. *)
      contentType = httpWebResponse@ContentType;
      LogMessage["INFO", "Content-Type: " <> contentType];
      
      (* The MIME type of the response must be one of the following: 
         'image/vnd.microsoft.icon', 'image/x-icon', 'image/ico', 
         'image/png', 'image/gif', 'image/jpeg', 'text/plain', or 
         'application/octet-stream'.  If not, then return $Failed. *)
      If[StringFreeQ[contentType, "ico" | "png" | "gif" | "jpeg" | "plain" | 
           "octet-stream", IgnoreCase->True],
        LogMessage["ERROR", "Unexpected MIME type."];
        
        (* Close the response stream and release any system resources 
           associated with the response. *)
        httpWebResponse@Close[];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[favicon];
      ];
      
      (* Process the response based on the returned status: *)
      If[httpWebResponse@StatusCode == HttpStatusCode`OK,
        (* HTTP 200 - The request succeeded and the requested information 
           is in the response. *)
        LogMessage["INFO", "HTTP Status-Code: 200 (OK)"];
        
        (* Get the URI of the Internet resource that actually responded 
           to the request. *)
        iconLink = httpWebRequest@Address@ToString[];
        
        (* Get the byte stream associated with the response. *)
        stream = httpWebResponse@GetResponseStream[];
        
        LogMessage["DEBUG", "Creating image from response stream."];
        
        (* Create an Image from the specified data stream.  The stream 
           must be kept open for the lifetime of the Image. *)
        image = Image`FromStream[stream];
        
        (* If the FromStream method failed, then return $Failed. *)
        If[image === $Failed,
          LogMessage["ERROR", "FromStream method failed."];
          
          (* Close the byte stream. *)
          stream@Close[];
          
          (* Close the response stream and release any system resources 
             associated with the response. *)
          httpWebResponse@Close[];
          
          LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
            " with status ($Failed)."];
          Return[favicon];
        ];
        
        (* Save the Image to a temporary file in the specified (PNG) format 
           and update the status variable.  Note that the Image class does 
           not support alpha transparency in bitmaps. *)
        tempFileName = FileNameJoin[{$TemporaryDirectory, "temp$" <> 
          IntegerString[Hash[iconLink, "CRC32"], 16] <> ".png"}];
        image@Save[tempFileName, ImageFormat`Png];
        imageSaved = True;
        
        (* Close the byte stream. *)
        stream@Close[];
      ];
      
      (* Close the response stream and release any system resources 
         associated with the response. *)
      httpWebResponse@Close[];
      
      (* If the image was saved, then import it as a PNG and delete the 
         temporary file. *)
      If[imageSaved,
        favicon = Import[tempFileName, "PNG"];
        DeleteFile[tempFileName];
      ];
      
      (* Return the favorite icon. *)
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
      Return[favicon];
    ]
  ]


(* GetFavicon[url]:
   Returns the favorite icon (i.e., favicon) for the specified URL.
*)
GetFavicon[url_String] := 
  Module[{$FunctionName = "GetFavicon[<<u>>]", address = url, source, 
          favicon = $Failed},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    
    (* Get the responding address and source. *)
    {address, source} = GetSource[url];
    
    (* If GetSource did not return source, then return $Failed. *)
    If[source === $Failed,
      LogMessage["ERROR", "GetSource failed to return source."];
      
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
        " with status ($Failed)."];
      Return[{address, favicon}];
    ];
    
    (* Get the favorite icon. *)
    favicon = GetFavicon[address, source];
    
    (* Return the responding address and favorite icon. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[{address, favicon}];
  ]


(* GetHyperlinks[address, source]:
   Returns the list of hyperlinks extracted from the specified address/source.
*)

(* Selection criteria: absolute, relative, and root-relative links *)
$SelectionCriteria = !(JSSchemeQ[#] || SMTPSchemeQ[#])&;

GetHyperlinks[address_String, source_String, OptionsPattern[]] := 
  Module[{$FunctionName = "GetHyperlinks", rawHyperlinks, hyperlinks, 
          selectedHyperlinks, numberHyperlinks},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    
    (* Get the list of raw (i.e.,unprocessed) hyperlinks extracted from 
       the specified source. *)
    rawHyperlinks = GetRawHyperlinks[source];
    
    LogMessage["DEBUG", "Processing raw hyperlinks."];
    
    (* Select all raw hyperlinks that are free of invalid URI characters 
       (per RFC 3986).  Invalid URI characters may be present due to 
       improperly coded/formatted source. *)
    rawHyperlinks = Select[rawHyperlinks, 
      StringFreeQ[#, ("^" | "`" | "{" | "}" | "\\" | "<" | ">" | "\"" | "'")]&];
    
    (* Replace any line feeds ('\n') or carriage returns ('\r') with an 
       empty string.  Trim any whitespace from the beginning and end of 
       all raw hyperlinks. *)
    rawHyperlinks = StringReplace[rawHyperlinks, 
      ("\n" | "\r")..->""] //StringTrim;
    
    (* Process raw hyperlinks according to the specified option values. *)
    Switch[OptionValue[HyperlinkType],
      (* Raw hyperlinks *)
      "Raw",
      hyperlinks = rawHyperlinks;,
      
      (* Selected hyperlinks *)
      "Selected",
      selectedHyperlinks = Select[rawHyperlinks, $SelectionCriteria];
      
      (* Convert hyperlinks to absolute links, otherwise use selected 
         hyperlinks. *)
      If[OptionValue[AbsoluteLinks],
        hyperlinks = ToAbsoluteLink[address, selectedHyperlinks];,
        (* else *)
        hyperlinks = selectedHyperlinks;
      ];,
      
      (* Default *)
      _,
      hyperlinks = rawHyperlinks;
    ];
    
    (* Get the number of hyperlinks processed. *)
    numberHyperlinks = Length[hyperlinks];
    LogMessage["INFO", "Hyperlinks Processed: " <> ToString[numberHyperlinks]];
    
    (* Return the list of hyperlinks. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[hyperlinks];
  ]


(* GetRawHyperlinks[source]:
   Returns the list of raw (i.e., unprocessed) hyperlinks extracted from the 
   specified source.
*)
GetRawHyperlinks[source_String] := 
  Module[{$FunctionName = "GetRawHyperlinks", rawHyperlinks, 
          numberRawHyperlinks},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    LogMessage["DEBUG", "Extracting raw hyperlinks."];
    
    (* Extract the URL from the href attribute of all <a> tags in the 
       specified source. *)
    rawHyperlinks = StringCases[source, 
      Shortest["<a" ~~ ___ ~~ "href=\"" ~~ ref__ ~~ "\"" ~~ ___ ~~ ">"]:>ref, 
      IgnoreCase->True];
    
    (* Get the number of raw hyperlinks extracted. *)
    numberRawHyperlinks = Length[rawHyperlinks];
    LogMessage["INFO", "Raw Hyperlinks Extracted: " <> 
      ToString[numberRawHyperlinks]];
    
    (* Return the list of raw hyperlinks. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[rawHyperlinks];
  ]


(* gsExceptionHandler:
   A custom .NET exception handler for GetSource.
*)
gsExceptionHandler = Function[{msgSym, msgTag, msgStr},
  Module[{$FunctionName = "GetSource", exc, baseExc},
    (* Get the .NET exception object that was thrown in the most 
       recent call. *)
    exc = GetNETException[];
    
    (* Get the root cause of all other exceptions in the chain. *)
    baseExc = exc@GetBaseException[];
    
    (* Log the exception. *)
    LogMessage["ERROR", "Unhandled exception in " <> $FunctionName <> 
      ". ---> ", baseExc@ToString[], "\n"];
  ]
];


(* GetSource[url]:
   Returns the URI and source (i.e., HTML) of the page sent by the web server 
   for the specified URL.  The returned URI is the address of the Internet 
   resource that actually responded to the request and may differ from the 
   original URL due to one or more redirects.
*)
GetSource[url_String] := 
  NETBlock[
    Block[{$FunctionName = "GetSource", 
           $NETExceptionHandler = gsExceptionHandler, webRequest, 
           address = url, source = $Failed, httpWebRequest, webResponse, 
           httpWebResponse, contentType = "", stream, streamReader},
      LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
      LogMessage["INFO", "Requested URL: " <> address];
      
      (* Initialize a new WebRequest instance for the specified URI scheme. *)
      webRequest = WebRequest`Create[address];
      
      (* If WebRequest failed to initialize, then return $Failed. *)
      If[webRequest === $Failed,
        LogMessage["ERROR", "WebRequest failed to initialize."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[{address, source}];
      ];
      
      (* Check the class instance returned by the Create method.  If the 
         class is not 'System.Net.HttpWebRequest', then return $Failed. *)
      If[StringMatchQ[webRequest@ToString[], "System.Net.HttpWebRequest", 
           IgnoreCase->True],
        (* Initialize a new instance of the HttpWebRequest class. *)
        httpWebRequest = CastNETObject[webRequest, HttpWebRequest];,
        (* else *)
        LogMessage["ERROR", 
          "Create method returned an unexpected class instance."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[{address, source}];
      ];
      
      (* Set the time-out value (in milliseconds) for the GetResponse method. *)
      httpWebRequest@Timeout = $Timeout;
      
      (* Set the value of the User-agent HTTP header. *)
      httpWebRequest@UserAgent = $UserAgent;
      
      (* Assign the response from the Internet resource to a WebResponse 
         object. *)
      webResponse = httpWebRequest@GetResponse[];
      
      (* If the GetResponse method failed, then return $Failed. *)
      If[webResponse === $Failed,
        LogMessage["ERROR", "GetResponse method failed."];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[{address, source}];
      ];
      
      (* Assign the WebResponse object to an HttpWebResponse variable. *)
      httpWebResponse = CastNETObject[webResponse, HttpWebResponse];
      
      (* Get the content type of the response. *)
      contentType = httpWebResponse@ContentType;
      LogMessage["INFO", "Content-Type: " <> contentType];
      
      (* If the MIME type of the response is not 'text/html', then 
         return $Failed. *)
      If[StringFreeQ[contentType, "text/html", IgnoreCase->True],
        LogMessage["ERROR", "Unexpected MIME type."];
        
        (* Close the response stream and release any system resources 
           associated with the response. *)
        httpWebResponse@Close[];
        
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " with status ($Failed)."];
        Return[{address, source}];
      ];
      
      (* Process the response based on the returned status: *)
      If[httpWebResponse@StatusCode == HttpStatusCode`OK,
        (* HTTP 200 - The request succeeded and the requested information 
           is in the response. *)
        LogMessage["INFO", "HTTP Status-Code: 200 (OK)"];
        
        (* Get the URI of the Internet resource that actually responded 
           to the request. *)
        address = httpWebRequest@Address@ToString[];
        
        (* Get the byte stream associated with the response and initialize 
           a StreamReader class to read the characters. *)
        stream = httpWebResponse@GetResponseStream[];
        streamReader = NETNew[StreamReader, stream];
        
        LogMessage["DEBUG", "Reading response stream."];
        
        (* Read to the end of the stream. *)
        source = streamReader@ReadToEnd[];
        
        (* Close the byte streams. *)
        streamReader@Close[];
        stream@Close[];
      ];
      
      (* Close the response stream and release any system resources 
         associated with the response. *)
      httpWebResponse@Close[];
      
      (* Return the URI and corresponding page source. *)
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
      Return[{address, source}];
    ]
  ]


(* GetStepData[originURL, fileNumber]:
   Returns a list of steps (i.e., graph edges) extracted from random walk data 
   originating from the specified URL (see RandomWalkWeb).  The returned list 
   does not contain steps that connect a vertex to itself, nor does it contain 
   duplicate steps.
*)
GetStepData[originURL_String, fileNumber_Integer, OptionsPattern[]] := 
  Module[{$FunctionName = "GetStepData[<<of>>]", dataDirectory, dataFileName, 
          rwData, stepData = $Failed, x},
    (* Define the folder where the data is stored.  The folder name is 
       derived from an MD5 hash of the origin URL. *)
    dataDirectory = FileNameJoin[{$BaseDataDirectory, 
      IntegerString[Hash[originURL, "MD5"], 16]}];
    
    (* Check that the directory exists.  If so, then attempt to import the 
       data file and process it.  Otherwise, return $Failed. *)
    If[FileExistsQ[dataDirectory],
      (* Assemble the fully qualified name of the data file. *)
      dataFileName = FileNameJoin[{dataDirectory, 
        $DataFilePrefix <> ToString[fileNumber] <> ".txt"}];
      
      (* Check that the file exists.  If so, then open it and process the 
         data.  Otherwise, return $Failed. *)
      If[FileExistsQ[dataFileName],
        (* Import the random walk data.  The data consists of a list of 
           URLs that were successfully visited during the walk. *)
        rwData = Import[dataFileName, "Lines"];
        
        (* Extract the URL data per the specified option value.  Partition 
           the result into sets of length 2 with an offset of 1.  Each subset 
           represents a step (i.e., graph edge) from the first element 
           (i.e., vertex) to the second. *)
        Switch[OptionValue[VertexType],
          (* Domain name *)
          "DomainName",
          stepData = Partition[Map[DomainName[#]&, rwData], 2, 1];,
          
          (* Hostname *)
          "Hostname",
          stepData = Partition[Map[Hostname[#]&, rwData], 2, 1];,
          
          (* Default *)
          _,
          stepData = Partition[Map[DomainName[#]&, rwData], 2, 1];
        ];
        
        (* Remove steps that connect a vertex to itself. *)
        stepData = DeleteCases[stepData, {x_, x_}];
        
        (* Remove duplicate steps. *)
        stepData = DeleteDuplicates[stepData];
      ];
    ];
    
    (* Return the step data. *)
    Return[stepData];
  ]


(* GetStepData[originURL, fileNumbers, numSteps]:
   Returns a list of steps (i.e., graph edges) extracted from random walk data 
   originating from the specified URL (see RandomWalkWeb).  The returned list 
   does not contain steps that connect a vertex to itself, nor does it contain 
   duplicate steps.
*)
GetStepData[
  originURL_String, 
  fileNumbers:{__Integer}, 
  numSteps_Integer /; numSteps >= 1, 
  opts:OptionsPattern[]
  ] := 
  Module[{$FunctionName = "GetStepData[<<ofn>>]", gsdStepData, stepData = {}},
    (* Get the step data from the specified files. *)
    gsdStepData = GetStepData[originURL, #, opts]& /@ fileNumbers;
    
    (* Remove failed data imports. *)
    gsdStepData = DeleteCases[gsdStepData, $Failed];
    
    (* If the requested number of steps is less than or equal to the total 
       number of steps in the list of step data, then take only the requested 
       elements.  Otherwise, take all elements in the list.  Do this for each 
       sublist, taking the union along the way. *)
    If[numSteps <= Length[#],
      stepData = Union[stepData, Take[#, numSteps]];,
      (* else *)
      stepData = Union[stepData, #];
    ]& /@ gsdStepData;
    
    (* Return the step data. *)
    Return[stepData];
  ]


(* Hostname[url]:
   Returns the hostname (e.g., www.example.com) for the specified URL.  Hostname 
   is only valid for fully qualified input.
*)
Hostname[url_String] := 
  Module[{$FunctionName = "Hostname", siteRoot, hostname = $Failed},
    (* Check that the specified URL is absolute. *)
    If[AbsoluteLinkQ[url],
      (* Get the site root. *)
      siteRoot = SiteRoot[url];
      
      (* Split the URL and extract the last component. *)
      hostname = Last[URLSplit[siteRoot]];
      
      (* If the hostname contains a '?' (i.e., a parameter) or a ':' 
         (i.e., a port), then extract the pure hostname. *)
      If[!StringFreeQ[hostname, ("?" | ":")],
        hostname = First[StringSplit[hostname, ("?" | ":")]];
      ];
    ];
    
    (* Return the hostname. *)
    Return[hostname];
  ]


(* JSSchemeQ[url]:
   Tests whether the specified URL begins with the JavaScript scheme.
*)
JSSchemeQ[url_String] := 
  StringMatchQ[StringTrim[url], StartOfString ~~ "javascript:" ~~ ___, 
    IgnoreCase->True]


(* LogMessage[loi, msg]:
   Logs custom messages with different levels of importance (e.g., INFO, 
   DEBUG, etc.) to a file.
*)
LogMessage[
  loi_String /; StringMatchQ[loi, {"ERROR", "INFO", "DEBUG"}], 
  msg__
  ] := 
  Block[{$FunctionName = "LogMessage", 
         $DateStringFormat = {"[", "Year", "-", "Month", "-", "Day", " ", 
           "Hour24", ":", "Minute", ":", "Second", ".", "Millisecond", "]"}, 
         logDirectory = DirectoryName[$LogFileName], outputStream},
    (* If a name was specified for the log file, then log messages. *)
    If[!EmptyQ[$LogFileName],
      (* If debug logging is enabled, then log all messages.  Otherwise, 
         log only error and information messages. *)
      If[$DebugLogging || !StringMatchQ[loi, "DEBUG"],
        (* Check if the folder exists.  If it does not, then create it. *)
        If[!FileExistsQ[logDirectory],
          CreateDirectory[logDirectory];
        ];
        
        (* Open the specified file to append output to it.  A new file is 
           created if it does not exist. *)
        outputStream = OpenAppend[$LogFileName];
        
        (* Write a line to the stream. *)
        WriteLine[outputStream, DateString[], " ", StringPad[loi, 5, " "], 
          " : ", msg];
        
        (* Close the stream. *)
        Close[outputStream];
      ];
    ];
  ]


(* PerformRandomWalks[originURL, numWalks, numSteps]:
   Perform the specified number of random walks from the origin URL for the 
   given number of steps.  The resulting data for each walk is exported to a 
   file and stored in a folder whose name is derived from an MD5 hash of the 
   origin URL.  PerformRandomWalks returns the number of successfully exported 
   data files.
*)
PerformRandomWalks[
  originURL_String?AbsoluteLinkQ, 
  numWalks_Integer /; numWalks >= 1, 
  numSteps_Integer /; numSteps >= 1
  ] := 
  Module[{$FunctionName = "PerformRandomWalks", dataDirectory, clrVersion, i, 
          pt, successfulURLs, dataFileName, exportedFileName, numFiles = 0},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    
    (* Define a folder to store the data.  Check if the folder exists.  If it 
       does not, then create it.  The folder name is derived from an MD5 hash 
       of the origin URL. *)
    dataDirectory = FileNameJoin[{$BaseDataDirectory, 
      IntegerString[Hash[originURL, "MD5"], 16]}];
    If[!FileExistsQ[dataDirectory],
      CreateDirectory[dataDirectory];
    ];
    
    (* Get and log the version of the common language runtime (CLR). *)
    clrVersion = GetCLRVersion[];
    LogMessage["INFO", "CLR Version: " <> clrVersion];
    
    (* Attempt to perform multiple random walks from the origin URL for the 
       given number of steps.  Export the resulting data for each walk to a 
       file. *)
    For[i = 1, i <= numWalks, i++,
      (* If a notebook-based front end is being used, then print information 
         to a temporary cell. *)
      If[$Notebooks,
        pt = PrintTemporary["Walk " <> ToString[i] <> " of " <> 
          ToString[numWalks] <> ": " <> originURL];
      ];
      LogMessage["INFO", "Walk Number: " <> ToString[i] <> " (of " <> 
        ToString[numWalks] <> ")"];
      
      (* Perform a random walk. *)
      successfulURLs = RandomWalkWeb[originURL, numSteps];
      
      (* Export the data (i.e., list of successfully visited URLs) to a 
         text file.  The file name consists of a prefix string and a number 
         identifying the current walk. *)
      If[successfulURLs =!= $Failed,
        dataFileName = FileNameJoin[{dataDirectory, 
          $DataFilePrefix <> ToString[i] <> ".txt"}];
        exportedFileName = Export[dataFileName, successfulURLs];
        
        (* If the data export was successful, then increment the file count. *)
        If[exportedFileName =!= $Failed,
          numFiles++;
        ];
      ];
      
      (* If a notebook-based front end is being used, then explicitly delete 
         the temporary cell. *)
      If[$Notebooks,
        NotebookDelete[pt];
      ];
    ];
    
    (* Return the number of successfully exported data files. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[numFiles];
  ]


(* RandomWalkGraph[originURL, fileNumbers, numSteps]:
   Returns a plot of the graph extracted from random walk data (see 
   RandomWalkWeb), along with a list of enumerated vertex labels.  The 
   returned graph does not contain edges that connect a vertex to itself, 
   nor does it contain duplicate edges.
*)
RandomWalkGraph[
  originURL_String, 
  fileNumbers:{__Integer}, 
  numSteps_Integer /; numSteps >= 1, 
  opts:OptionsPattern[]
  ] := 
  Module[{$FunctionName = "RandomWalkGraph[<<ofn>>]", gsdOptions, stepData, 
          graph = $Failed, enumeratedVertexLabels = {}},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    
    (* Filter options to pass to subsequent function calls. *)
    gsdOptions = FilterRules[{opts}, Options[GetStepData]];
    
    (* Get the requested number of steps (i.e., graph edges) extracted from 
       each of the specified data files. *)
    stepData = GetStepData[originURL, fileNumbers, numSteps, gsdOptions];
    
    (* If GetStepData did not return step data, then return $Failed. *)
    If[!StepDataQ[stepData],
      LogMessage["ERROR", "GetStepData failed to return step data."];
      
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
        " with status ($Failed)."];
      Return[{graph, enumeratedVertexLabels}];
    ];
    
    (* Get a plot of the graph extracted from the given step data, along 
       with a list of enumerated vertex labels. *)
    {graph, enumeratedVertexLabels} = RandomWalkGraph[stepData, opts];
    
    (* Return the graph and the list of enumerated vertex labels. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[{graph, enumeratedVertexLabels}];
  ]


(* RandomWalkGraph[stepData]:
   Returns a plot of the graph extracted from random walk data (see 
   RandomWalkWeb), along with a list of enumerated vertex labels.  The 
   returned graph does not contain edges that connect a vertex to itself, 
   nor does it contain duplicate edges.
*)
RandomWalkGraph[stepData_List?StepDataQ, opts:OptionsPattern[]] := 
  Module[{$FunctionName = "RandomWalkGraph[<<s>>]", gOptions, vertexLabels, 
          enumeratedVertexLabels = {}, replacementRules, vl, i, graphData, 
          faviconDirectory, j, vertexLabel, faviconFileName, favicon, 
          favicons = {}, url, pt, resizedFavicon, vertexShape, 
          graph = $Failed},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    
    (* Filter options to pass to subsequent function calls. *)
    gOptions = FilterRules[{opts}, Options[Graph]];
    
    (* Get an alphabetically sorted list of unique vertex labels. *)
    vertexLabels = stepData //Flatten //DeleteDuplicates //Sort;
    
    (* Enumerate the vertex labels.  This yields a list that can be used as 
       a key for the graph. *)
    enumeratedVertexLabels = {First[Flatten[Position[vertexLabels, #]]], 
      #}& /@ vertexLabels;
    
    (* Generate a list of replacement rules that convert the original vertex 
       labels to numerals. *)
    replacementRules = Replace[enumeratedVertexLabels, {i_, vl_}->vl->i, 2];
    
    (* Convert the step data to graph data and apply the replacement rules. *)
    graphData = ToGraphData[stepData] /. replacementRules;
    
    (* If the VertexIcon option is set to True, then use the vertex's 
       associated favorite icon (i.e., favicon) as its shape.  Check a local, 
       predefined folder for the icon.  If the icon does not exist, then 
       attempt to download it from the corresponding URL and cache it locally 
       for future use.  Otherwise, use the standard vertex shape. *)
    If[OptionValue[VertexIcon],
      (* Define a folder to store the icons depending upon the VertexType 
         option. *)
      Switch[OptionValue[VertexType],
        (* Domain name *)
        "DomainName",
        faviconDirectory = FileNameJoin[{$BaseDataDirectory, "favicons", 
          "domain_name"}];,
        
        (* Hostname *)
        "Hostname",
        faviconDirectory = FileNameJoin[{$BaseDataDirectory, "favicons", 
          "hostname"}];,
        
        (* Default *)
        _,
        faviconDirectory = FileNameJoin[{$BaseDataDirectory, "favicons", 
          "domain_name"}];
      ];
      
      (* Check if the folder exists.  If it does not, then create it. *)
      If[!FileExistsQ[faviconDirectory],
        CreateDirectory[faviconDirectory];
      ];
      
      (* Get each vertex's associated favorite icon. *)
      For[j = 1, j <= Length[vertexLabels], j++,
        (* Get the current vertex label. *)
        vertexLabel = vertexLabels[[j]];
        
        (* Define the icon's file name.  It is derived from an MD5 hash of 
           the vertex label. *)
        faviconFileName = FileNameJoin[{faviconDirectory, 
          IntegerString[Hash[vertexLabel, "MD5"], 16] <> ".png"}];
        
        (* Check if the file exists.  If it does, then import the icon as a 
           PNG.  If the file does not exist or we are forcing a refresh, then 
           attempt to download the icon from the corresponding URL. *)
        If[FileExistsQ[faviconFileName] && !OptionValue[RefreshIconCache],
          favicon = Import[faviconFileName, "PNG"];,
          (* else *)
          
          (* Form the URL depending upon the VertexType option. *)
          Switch[OptionValue[VertexType],
            (* Domain name *)
            "DomainName",
            url = URLJoin[{"http:", "www." <> vertexLabel, ""}];,
            
            (* Hostname *)
            "Hostname",
            url = URLJoin[{"http:", vertexLabel, ""}];,
            
            (* Default *)
            _,
            url = URLJoin[{"http:", "www." <> vertexLabel, ""}];
          ];
          
          (* If a notebook-based front end is being used, then print 
             information to a temporary cell. *)
          If[$Notebooks,
            pt = PrintTemporary["Downloading icon from URL: " <> url];
          ];
          
          (* Get the favorite icon. *)
          favicon = GetFavicon[url] //Last;
          
          (* If a notebook-based front end is being used, then explicitly 
             delete the temporary cell. *)
          If[$Notebooks,
            NotebookDelete[pt];
          ];
          
          (* If the icon is invalid, then assign the vertex a generated 
             shape. *)
          If[favicon === $Failed,
            favicon = Graphics[{GrayLevel[0.75], 
              Rectangle[RoundingRadius->0.1]}, ImageSize->{32}];
          ];
          
          (* Export the icon as a PNG. *)
          Export[faviconFileName, favicon, "PNG"];
        ];
        
        (* Resize the icon to have the given maximum pixel width or height. *)
        resizedFavicon = ImageResize[favicon, {16}];
        
        (* Insert the resized favorite icon into the list of enumerated 
           vertex labels.  This yields an enhanced list that can be used 
           as a legend for the graph. *)
        enumeratedVertexLabels = Insert[enumeratedVertexLabels, 
          resizedFavicon, {j, 2}];
        
        (* Append the resized icon to the list of favorite icons. *)
        AppendTo[favicons, j->resizedFavicon];
      ];
      
      (* Assign the custom vertex shapes. *)
      vertexShape = favicons;,
      (* else *)
      vertexShape = Automatic;
    ];
    
    (* Store a plot of the graph. *)
    graph = Graph[graphData, Append[gOptions, VertexShape->vertexShape]];
    
    (* Return the graph and the list of enumerated vertex labels. *)
    LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];
    Return[{graph, enumeratedVertexLabels}];
  ]


(* RandomWalkWeb[originURL, numSteps]:
   Performs a random walk on the Web.  RandomWalkWeb returns a list of 
   successfully visited URLs after attempting to walk the requested number 
   of steps from the origin URL.
*)

(* Gather criteria: first part of domain name *)
$GatherCriteria = First[StringSplit[DomainName[#], "."]]&;

RandomWalkWeb[originURL_String, numSteps_Integer] := 
  Module[{$FunctionName = "RandomWalkWeb", notebookObject, step, str, 
          url = originURL, address, source, visitedURLs = {}, hyperlinks, 
          successfulURLs = {}, previousHyperlinks = {}, partitionedHyperlinks, 
          sublistHyperlinks, backtrack = False},
    LogMessage["DEBUG", "Entering " <> $FunctionName <> "."];
    LogMessage["INFO", "Steps Requested: numSteps = " <> ToString[numSteps]];
    
    (* If a notebook-based front end is being used, then get the current 
       notebook being evaluated. *)
    If[$Notebooks,
      notebookObject = EvaluationNotebook[];
    ];
    
    (* Perform a random walk on the Web. *)
    step = 0;
    While[step <= numSteps,
      (* If a notebook-based front end is being used, then display 
         information in the window's status area. *)
      If[$Notebooks,
        str = "Step " <> ToString[step] <> " of " <> ToString[numSteps] <> 
          ": " <> url;
        SetOptions[notebookObject, WindowStatusArea->str];
      ];
      
      (* Get the responding address and source. *)
      {address, source} = GetSource[url];
      
      (* Add the responding address to an internal list of visited URLs. *)
      AppendTo[visitedURLs, address];
      
      (* If GetSource returned source, then attempt to get hyperlinks and 
         randomly choose the next URL to visit.  Otherwise, attempt to 
         backtrack. *)
      If[source =!= $Failed,
        (* Get the hyperlinks and delete duplicates. *)
        hyperlinks = GetHyperlinks[address, source] //DeleteDuplicates;
        
        (* Remove the requested and responding URL from the list of 
           hyperlinks.  This ensures that the URL is not visited more than 
           once consecutively. *)
        hyperlinks = DeleteCases[hyperlinks, url];
        hyperlinks = DeleteCases[hyperlinks, address];
        
        (* If the set of hyperlinks is nonempty, then randomly choose the 
           next URL to visit.  Otherwise, attempt to backtrack. *)
        If[!EmptyQ[hyperlinks],
          (* Add the responding address to the list of successfully visited 
             URLs. *)
          AppendTo[successfulURLs, address];
          
          (* Store the current list of hyperlinks for backtracking (if 
             necessary).  This eliminates the need to request the source 
             from the last successfully visited URL. *)
          previousHyperlinks = hyperlinks;
          
          (* Gather the list of hyperlinks into sublists based on the 
             defined gather criteria. *)
          partitionedHyperlinks = GatherBy[hyperlinks, $GatherCriteria];
          
          (* Randomly choose a partition and then randomly choose a URL 
             from that sublist.  This attempts to reduce the probability 
             that the next URL to visit points to the same domain. *)
          sublistHyperlinks = RandomChoice[partitionedHyperlinks];
          url = RandomChoice[sublistHyperlinks];
          
          (* Increment the step counter. *)
          step++;,
          (* else *)
          LogMessage["ERROR", "No hyperlinks available."];
          backtrack = True;
        ];,
        (* else *)
        LogMessage["ERROR", "No source available."];
        backtrack = True;
      ];
      
      (* The visited URL either returned no source or no hyperlinks.  Attempt 
         to backtrack. *)
      If[backtrack,
        LogMessage["DEBUG", 
          "Attempting to backtrack using previous hyperlinks."];
        backtrack = False;
        
        (* Remove the current URL from the list of previous hyperlinks.  This 
           ensures that the URL that triggered the backtrack is not visited 
           again. *)
        previousHyperlinks = DeleteCases[previousHyperlinks, url];
        
        (* Use the previous list of hyperlinks for backtracking. *)
        hyperlinks = previousHyperlinks;
        
        (* If the set of hyperlinks is nonempty, then randomly choose the 
           next URL to visit.  Otherwise, exit the loop since no hyperlinks 
           are available for backtracking. *)
        If[!EmptyQ[hyperlinks],
          (* Gather the list of hyperlinks into sublists based on the 
             defined gather criteria. *)
          partitionedHyperlinks = GatherBy[hyperlinks, $GatherCriteria];
          
          (* Randomly choose a partition and then randomly choose a URL 
             from that sublist.  This attempts to reduce the probability 
             that the next URL to visit points to the same domain. *)
          sublistHyperlinks = RandomChoice[partitionedHyperlinks];
          url = RandomChoice[sublistHyperlinks];,
          (* else *)
          LogMessage["ERROR", "No hyperlinks available for backtracking."];
          
          (* Exit the loop. *)
          Break[];
        ];
      ];
    ];
    
    (* If a notebook-based front end is being used, then clear the 
       status area. *)
    If[$Notebooks,
      SetOptions[notebookObject, WindowStatusArea->""];
    ];
    
    (* If the set of successful URLs is nonempty, then log exit messages 
       accordingly.  Otherwise, set the list to $Failed. *)
    If[!EmptyQ[successfulURLs],
      LogMessage["INFO", "Steps Completed: step = " <> ToString[step - 1]];
      
      (* If the specified number of steps was completed, then exit is 
         normal.  Otherwise, exit is premature. *)
      If[numSteps == step - 1,
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> "."];,
        (* else *)
        LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
          " prematurely (step < numSteps)."];
      ];,
      (* else *)
      successfulURLs = $Failed;
      
      LogMessage["INFO", "Steps Completed: step = " <> ToString[step]];
      LogMessage["DEBUG", "Exiting " <> $FunctionName <> 
        " with status ($Failed)."];
    ];
    
    (* Return *)
    Return[successfulURLs];
  ]


(* SiteRoot[url]:
   Returns the site root (i.e., transfer protocol and domain name) for the 
   specified URL up to, but not including, the trailing "/".  SiteRoot is 
   only valid for fully qualified input.
*)
SiteRoot[url_String] := 
  If[AbsoluteLinkQ[url],
    URLJoin[Take[URLSplit[url], 2]],
    (* else *)
    $Failed
  ]


(* SMTPSchemeQ[url]:
   Tests whether the specified URL begins with the SMTP scheme.
*)
SMTPSchemeQ[url_String] := 
  StringMatchQ[StringTrim[url], StartOfString ~~ "mailto:" ~~ ___, 
    IgnoreCase->True]


(* StepDataQ[sdq]:
   Tests whether the specified list has the appropriate form for step data 
   (i.e., {{v_i, v_j},...}).
*)
StepDataQ[sdq_] := 
  MatchQ[sdq, {{_, _}..}]


(* StringPad[str, n, x]:
   Makes a string of length n by padding str with the element x on the right, 
   repeating x if necessary.  Negative n specifies to pad on the left.
*)
StringPad[str_String, n_Integer, x_String] := 
  StringJoin[PadRight[Characters[str], n, x]]


(* ToAbsoluteLink[url, hyperlink]:
   Converts the relative [2] hyperlink to an absolute link based on the 
   specified URL.
*)
ToAbsoluteLink[url_String?AbsoluteLinkQ, hyperlink_String] := 
  Module[{$FunctionName = "ToAbsoluteLink", absoluteLink, hashlessBase, 
          baseURL, component, pattern = "../", patternCount, 
          replacementPattern, hyperlinkComponent, baseComponents, siteRoot, 
          uriScheme},
    (* If the hyperlink is absolute, then return it without conversion.  
       Otherwise, convert the relative link. *)
    If[AbsoluteLinkQ[hyperlink],
      absoluteLink = hyperlink;,
      (* else *)
      
      (* Relative link *)
      If[StringMatchQ[hyperlink, StartOfString ~~ ("#" | "." | "/") ~~ ___],
        (* If the hyperlink begins with a hash, then process it accordingly. *)
        If[StringMatchQ[hyperlink, StartOfString ~~ "#" ~~ ___],
          (* If the hyperlink consists of a single hash, then return the URL. *)
          If[StringMatchQ[hyperlink, StartOfString ~~ "#" ~~ EndOfString],
            absoluteLink = url;,
            (* else *)
            
            (* e.g., #anchor *)
            
            (* If the URL does not contain a hash, then append the hashed 
               hyperlink to the end of it to form the absolute link.  
               Otherwise, the URL already contains a hash and it must be 
               removed prior to appending the hashed hyperlink to it. *)
            If[StringFreeQ[url, "#"],
              absoluteLink = url <> hyperlink;,
              (* else *)
              hashlessBase = StringCases[url, 
                Longest[StartOfString ~~ hb__ ~~ "#" ~~ ___]:>hb, 
                IgnoreCase->True] //First;
              absoluteLink = hashlessBase <> hyperlink;
            ];
          ];
        ];
        
        (* If the hyperlink begins with a dot, then process it accordingly. *)
        If[StringMatchQ[hyperlink, StartOfString ~~ "." ~~ ___],
          (* If the hyperlink consists of a single dot and slash, then drop 
             it and append the remainder to the base URL. *)
          If[StringMatchQ[hyperlink, StartOfString ~~ "./" ~~ ___],
            baseURL = BaseURL[url];
            
            component = StringCases[hyperlink, 
              Longest["./" ~~ comp__]:>comp, IgnoreCase->True];
            If[!EmptyQ[component],
              absoluteLink = baseURL <> First[component];,
              (* else *)
              absoluteLink = baseURL;
            ];,
            (* else *)
            
            (* e.g., ../dir2/ *)
            
            (* Count the number of times the pattern occurs in the hyperlink. *)
            patternCount = StringCount[hyperlink, pattern];
            
            (* If the pattern occurs at least once, then remove it from the 
               hyperlink and construct the absolute link accordingly. *)
            If[patternCount >= 1,
              (* Construct the full pattern and remove it from the hyperlink. *)
              replacementPattern = StringJoin[Table[pattern, {patternCount}]];
              hyperlinkComponent = StringReplace[hyperlink, 
                replacementPattern->""];
              
              (* Determine the components of the base URL. *)
              baseURL = BaseURL[url];
              baseComponents = URLSplit[baseURL];
              
              (* If the number of base components is greater than the pattern 
                 count (plus two), then construct the absolute link by 
                 removing the required number of components from the right 
                 side of the base URL and appending the remaining hyperlink 
                 component.  Otherwise, append the remaining hyperlink 
                 component to the site root. *)
              If[Length[baseComponents] > patternCount + 2,
                absoluteLink = URLJoin[Drop[baseComponents, -patternCount]] <> 
                  "/" <> hyperlinkComponent;,
                  (* else *)
                siteRoot = SiteRoot[url];
                absoluteLink = siteRoot <> "/" <> hyperlinkComponent;
              ];
            ];
          ];
        ];
        
        (* If the hyperlink begins with a slash, then process it accordingly. *)
        If[StringMatchQ[hyperlink, StartOfString ~~ "/" ~~ ___],
          (* If the hyperlink consists of two slashes, then prepend the URI 
             scheme to it. *)
          If[StringMatchQ[hyperlink, StartOfString ~~ "//" ~~ ___],
            uriScheme = First[URLSplit[url]];
            absoluteLink = uriScheme <> hyperlink;,
            (* else *)
            
            (* e.g., /dir3/ *)
            siteRoot = SiteRoot[url];
            absoluteLink = siteRoot <> hyperlink;
          ];
        ];,
        (* else *)
        
        (* e.g., page3.html *)
        baseURL = BaseURL[url];
        absoluteLink = baseURL <> hyperlink;
      ];
    ];
    
    (* Return the absolute link. *)
    Return[absoluteLink];
  ]


(* ToGraphData[stepData]:
   Converts step data to (directed) graph data (i.e., {v_i, v_j} to 
   DirectedEdge[v_i, v_j]).
*)
ToGraphData[stepData_List] := 
  Module[{$FunctionName = "ToGraphData", graphData = $Failed, vi, vj},
    (* Check that the step data has the appropriate form. *)
    If[StepDataQ[stepData],
      (* Convert the step data to graph data (i.e., {v_i, v_j} to 
         DirectedEdge[v_i, v_j]). *)
      graphData = Replace[stepData, {vi_, vj_}->DirectedEdge[vi, vj], 2];
    ];
    
    (* Return the graph data. *)
    Return[graphData];
  ]


(* URLJoin[url]:
   Joins the url_i together into either an absolute, relative or root-relative 
   URL.  URLJoin just assembles a URL path string; it does not actually search 
   for the specified URL.
*)
URLJoin[url_List] := 
  If[!EmptyQ[url],
    If[StringMatchQ[First[url], ___ ~~ ":" ~~ EndOfString, IgnoreCase->True],
      First[url] <> "//" <> StringJoin[Riffle[Rest[url], "/"]],
      (* else *)
      If[StringMatchQ[First[url], "..", IgnoreCase->True],
        StringJoin[Riffle[url, "/"]],
        (* else *)
        "/" <> StringJoin[Riffle[url, "/"]]
      ]
    ],
    (* else *)
    ""
  ]


(* URLSplit[url]:
   Splits a uniform resource locator (URL) into a list of parts.  URLSplit 
   just operates on URL path strings; it does not actually search for the 
   specified URL.
*)
URLSplit[url_String] := 
  DeleteCases[StringSplit[url, "/"], ""]


(* WriteLine[channel, expr]:
   Converts the expr_i to strings, and then writes them in sequence to the 
   specified output channel.  WriteLine puts a newline at the end of the 
   output it generates.
*)
WriteLine[channel_, expr__] := 
  WriteString[channel, expr, "\n"]


End[]; (* RandomWalkWeb`Private` *)


(* Set symbol attributes: *)
SetAttributes[AbsoluteLinkQ, {Listable, Protected, ReadProtected}]
SetAttributes[AbsoluteLinks, {Protected}]
SetAttributes[BaseURL, {Listable, Protected, ReadProtected}]
SetAttributes[DomainName, {Listable, Protected, ReadProtected}]
SetAttributes[EffectiveTLDNameQ, {Listable, Protected, ReadProtected}]
SetAttributes[GetFavicon, {Listable, Protected, ReadProtected}]
SetAttributes[GetHyperlinks, {Listable, Protected, ReadProtected}]
SetAttributes[GetSource, {Listable, Protected, ReadProtected}]
SetAttributes[GetStepData, {Protected, ReadProtected}]
SetAttributes[Hostname, {Listable, Protected, ReadProtected}]
SetAttributes[HyperlinkType, {Protected}]
SetAttributes[LogMessage, {Protected, ReadProtected}]
SetAttributes[PerformRandomWalks, {Listable, Protected, ReadProtected}]
SetAttributes[RandomWalkGraph, {Protected, ReadProtected}]
SetAttributes[RandomWalkWeb, {Listable, Protected, ReadProtected}]
SetAttributes[RefreshIconCache, {Protected}]
SetAttributes[SiteRoot, {Listable, Protected, ReadProtected}]
SetAttributes[ToAbsoluteLink, {Listable, Protected, ReadProtected}]
SetAttributes[ToGraphData, {Protected, ReadProtected}]
SetAttributes[VertexIcon, {Protected}]
SetAttributes[VertexType, {Protected}]
SetAttributes[$BaseDataDirectory, {ReadProtected}]
SetAttributes[$DataFilePrefix, {ReadProtected}]
SetAttributes[$DebugLogging, {ReadProtected}]
SetAttributes[$ETLDNInfo, {Protected, ReadProtected}]
SetAttributes[$LogFileName, {ReadProtected}]
SetAttributes[$Timeout, {ReadProtected}]
SetAttributes[$UserAgent, {ReadProtected}]


EndPackage[] (* RandomWalkWeb` *)
