Having stumbled across Google's Webmaster Tools a while ago it's been in the back of my mind to create a custom HttpHandler to dynamically create a (virtual) SiteMap.xml file that Google (and other search engines) can use as a reference when spidering my sites.
I thought I'd share the basic implantation as a starting point. Ideally you'd add a property to each EPiServer PageType page for "change frequency" and "priority" which the sitemap generator would then use, but for this basic version I've simply set the homepage to 1.0 (the maximum priority) with a daily change, and all other pages to 0.6 and weekly.
First declare the class and the required members:
public class SearchEngineSiteMap : IHttpHandler
{
bool IHttpHandler.IsReusable
{
get { return true; }
}
void IHttpHandler.ProcessRequest(HttpContext context)
{
GenerateSiteMap(context);
}
Next we need to configure the output stream, create an XmlTextWriter and the outer Xml block:
/// <summary>
/// Generate the SiteMap
/// </summary>
/// <param name="context"></param>
private void GenerateSiteMap(HttpContext context)
{
//Set the response information
context.Response.Expires = -1;
context.Response.ContentType = "application/xml";
Encoding encoding = new UTF8Encoding();
context.Response.ContentEncoding = encoding;
//Create an XMLTextWriter to build the XML, passing it the context's outputstream
XmlTextWriter xmlTextWriter = new XmlTextWriter(context.Response.OutputStream, encoding);
xmlTextWriter.Formatting = Formatting.Indented;
xmlTextWriter.WriteStartDocument();
//Write the root xml element
xmlTextWriter.WriteStartElement("urlset");
xmlTextWriter.WriteStartAttribute("xmlns");
xmlTextWriter.WriteValue("http://www.sitemaps.org/schemas/sitemap/0.9");
xmlTextWriter.WriteEndAttribute();
//Get EPiServer's StartPage (not the RootPage!)
PageData p = EPiServer.DataFactory.Instance.GetPage(PageReference.StartPage);
//SiteMaps can only contain unique urls so maintain a list of added urls
List<string> alreadyAddedUrls = new List<string>();
//Now call recursive method to populate every published/visible etc page
RenderNodesToSiteMap(
context,
xmlTextWriter,
alreadyAddedUrls,
p);
//Close the root element
xmlTextWriter.WriteEndElement();
//end of document
xmlTextWriter.WriteEndDocument();
//finally close the XMLTextWriter
xmlTextWriter.Close();
}
Finally we need to add a method that will be recursively called for each published page in the site:
/// <summary>
/// Recursively converts the given page into XML for use in the sitemap.
/// </summary>
/// <param name="context">Current Context</param>
/// <param name="xmlTextWriter">XMLTextWriter to write give page (p) to</param>
/// <param name="alreadyAddedUrls">List of Urls already added to the SiteMap</param>
/// <param name="p">The page to add to the sitemap</param>
private void RenderNodesToSiteMap(
HttpContext context,
XmlTextWriter xmlTextWriter,
List<string> alreadyAddedUrls,
PageData p)
{
//Make sure the page is published
if (PageDataUtilities.IsPagePublished(child))
{
//Get the page's 'Friendly' URL
string url = PageDataUtilities.GetFriendlyUrl(p, true);
// Make sure this URL is not in the XML already
if (!alreadyAddedUrls.Contains(url))
{
//Add it ready to check later
alreadyAddedUrls.Add(url);
//Write the Url element
xmlTextWriter.WriteStartElement("url");
//Add the location (Url) attribute - making sure its encoded!
xmlTextWriter.WriteElementString("loc", HttpUtility.HtmlEncode(url));
//Add when it was last modified
xmlTextWriter.WriteElementString(
"lastmod",
p.Changed.ToString("u", CultureInfo.InvariantCulture).Replace(" ", "T"));
//If its the StartPage set the change frequency to daily
//and the priority to 1
if (p.PageLink == PageReference.StartPage)
{
xmlTextWriter.WriteElementString(
"changefreq",
"daily");
xmlTextWriter.WriteElementString(
"priority",
"1.0");
}
else //Otherwise weekly and a lower priority
{
xmlTextWriter.WriteElementString(
"changefreq",
"weekly");
xmlTextWriter.WriteElementString(
"priority",
"0.6");
}
//Close the URL node
xmlTextWriter.WriteEndElement();
}
//Now loop through all the
foreach (PageData child in EPiServer.DataFactory.Instance.GetChildren(p.PageLink))
{
RenderNodesToSiteMap(
context,
xmlTextWriter,
alreadyAddedUrls,
child);
}
}
With all that done the last thing is to register it in the Web.config (inside the System.Web element) as follows:
<httpHandlers>
...
<add
path="sitemap.xml"
verb="*"
type="MyLibrary.SearchEngineSiteMap, MyLibrary" />
...
</httpHandlers>