Docs: sitemap generator, basic robots.txt. Fixes #916.

This commit is contained in:
Mark Harrah 2013-10-17 15:00:48 -04:00
parent 715a5655ef
commit d6ca66d406
3 changed files with 144 additions and 7 deletions

View File

@ -8,14 +8,17 @@
import sbtsite.SphinxSupport
import SiteKeys.{makeSite,siteMappings}
import Sxr.sxr
import SiteMap.Entry
object Docs
{
val cnameFile = SettingKey[File]("cname-file", "Location of the CNAME file for the website.")
val rootFiles = SettingKey[Seq[File]]("root-files", "Location of file that will be copied to the website root.")
val latestRelease = SettingKey[Boolean]("latest-release")
val siteExcludes = Set(".buildinfo", "objects.inv")
def siteInclude(f: File) = !siteExcludes.contains(f.getName)
def siteSourceBase(siteSourceVersion: String) = s"https://github.com/sbt/sbt/raw/$siteSourceVersion/src/sphinx/"
val sbtSiteBase = uri("http://www.scala-sbt.org/")
val SnapshotPath = "snapshot"
val ReleasePath = "release"
@ -41,12 +44,16 @@ object Docs
git.remoteRepo := "git@github.com:sbt/sbt.github.com.git",
localRepoDirectory,
ghkeys.synchLocal <<= synchLocalImpl,
cnameFile <<= (sourceDirectory in SphinxSupport.Sphinx) / "CNAME",
rootFiles := {
val base = (sourceDirectory in SphinxSupport.Sphinx).value
Seq("CNAME", "robots.txt").map(base / _)
},
latestRelease in ThisBuild := false,
commands += setLatestRelease,
GitKeys.gitBranch in ghkeys.updatedRepository := Some("master")
)
def localRepoDirectory = ghkeys.repository := {
// distinguish between building to update the site or not so that CI jobs
// that don't commit+publish don't leave uncommitted changes in the working directory
@ -67,12 +74,11 @@ object Docs
}
val siteVersion = sbtV.takeWhile(_ != '-')
val siteSourceVersion = if(snap) release(siteVersion) else siteVersion
val siteSourceBase = s"https://github.com/sbt/sbt/raw/$siteSourceVersion/src/sphinx/"
Map[String,String](
"sbt.full.version" -> sbtV,
"sbt.partial.version" -> release(sbtV),
"sbt.site.version" -> siteVersion,
"sbt.site.source.base" -> siteSourceBase,
"sbt.site.source.base" -> siteSourceBase(siteSourceVersion),
"sbt.binary.version" -> CrossVersion.binarySbtVersion(sbtV),
"scala.full.version" -> scalaV,
"scala.partial.version" -> release(scalaV),
@ -80,13 +86,14 @@ object Docs
)
}
def synchLocalImpl = (ghkeys.privateMappings, ghkeys.updatedRepository, version, isSnapshot, latestRelease, streams, cnameFile) map {
(mappings, repo, v, snap, latest, s, cname) =>
def synchLocalImpl = (ghkeys.privateMappings, ghkeys.updatedRepository, version, isSnapshot, latestRelease, streams, rootFiles) map {
(mappings, repo, v, snap, latest, s, roots) =>
val versioned = repo / v
IO.delete(versioned)
val toCopy = for( (file, target) <- mappings if siteInclude(file) ) yield (file, versioned / target)
IO.copy(toCopy)
IO.copyFile(cname, repo / cname.getName)
for(f <- roots)
IO.copyFile(f, repo / f.getName)
IO.touch(repo / ".nojekyll")
IO.write(repo / "versions.js", versionsJs(sortVersions(collectVersions(repo))))
if(!snap && latest)
@ -94,8 +101,43 @@ object Docs
if(snap || latest)
linkSite(repo, v, if(snap) SnapshotPath else ReleasePath, s.log)
s.log.info("Copied site to " + versioned)
if(latest) {
val (index, siteMaps) = SiteMap.generate(repo, sbtSiteBase, gzip=true, siteEntry(v), s.log)
s.log.info(s"Generated site map index: $index")
s.log.debug(s"Generated site maps: ${siteMaps.mkString("\n\t", "\n\t", "")}")
}
repo
}
def siteEntry(CurrentVersion: String)(file: File, relPath: String): Option[Entry] =
{
val apiOrSxr = """([^/]+)/(api|sxr)/.*""".r
val docs = """([^/]+)/docs/.*""".r
val old077 = """0\.7\.7/.*""".r
val manualRedirects = """[^/]+\.html""".r
val snapshot = """(.+-SNAPSHOT|snapshot)/.+/.*""".r
// highest priority is the home page
// X/docs/ are higher priority than X/(api|sxr)/
// release/ is slighty higher priority than <releaseVersion>/
// non-current releases are low priority
// 0.7.7 documentation is very low priority
// snapshots docs are very low priority
// the manual redirects from the old version of the site have no priority at all
relPath match {
case "index.html" => Some(Entry("weekly", 1.0))
case docs(ReleasePath) => Some( Entry("weekly", 0.9) )
case docs(CurrentVersion) => Some( Entry("weekly", 0.8) )
case apiOrSxr(ReleasePath, _) => Some( Entry("weekly", 0.6) )
case apiOrSxr(CurrentVersion, _) => Some( Entry("weekly", 0.5) )
case snapshot(_) => Some( Entry("weekly", 0.02) )
case old077() => Some( Entry("never", 0.01) )
case docs(_) => Some( Entry("never", 0.2) )
case apiOrSxr(_, _) => Some( Entry("never", 0.1) )
case x => Some( Entry("never", 0.0) )
}
}
def versionsJs(vs: Seq[String]): String = "var availableDocumentationVersions = " + vs.mkString("['", "', '", "']")
// names of all directories that are explicit versions
def collectVersions(base: File): Seq[String] = (base * versionFilter).get.map(_.getName)

91
project/SiteMap.scala Normal file
View File

@ -0,0 +1,91 @@
import sbt._
object SiteMap
{
// represents the configurable aspects of a sitemap entry
final case class Entry(changeFreq: String, priority: Double) {
assert(priority >= 0.0 && priority <= 1.0, s"Priority must be between 0.0 and 1.0:, was $priority")
}
def generate(repoBase: File, remoteBase: URI, gzip: Boolean, entry: (File, String) => Option[Entry], log: Logger): (File, Seq[File]) =
{
def relativize(files: PathFinder): Seq[(File, String)] = files pair relativeTo(repoBase)
def entries(files: PathFinder) =
relativize(files) flatMap { case (f, path) =>
entry(f, path).toList map { e =>
entryXML(e, f, path)
}
}
def entriesXML(entries: Seq[xml.Node]): xml.Elem =
{
assert(entries.size <= 50000, "A site map cannot contain more than 50,000 entries.")
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{entries}
</urlset>
}
def entryXML(e: Entry, f: File, relPath: String) =
<url>
<loc>{remoteBase.resolve(relPath).toString}</loc>
<lastmod>{lastModifiedString(f)}</lastmod>
<changefreq>{e.changeFreq}</changefreq>
<priority>{e.priority.toString}</priority>
</url>
def singleSiteMap(dir: File, files: PathFinder): Option[File] = {
val es = entries(files)
if(es.isEmpty) None else Some( writeXMLgz(dir / "sitemap.xml", dir / "sitemap.xml.gz", gzip, entriesXML(es)) )
}
def indexEntryXML(sub: File, relPath: String): xml.Elem =
<sitemap>
<loc>{remoteBase.resolve(relPath).toString}</loc>
<lastmod>{lastModifiedString(sub)}</lastmod>
</sitemap>
def indexEntriesXML(entries: Seq[xml.Node]): xml.Elem =
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{entries}
</sitemapindex>
def indexEntries(subs: Seq[File]) =
relativize(subs) map { case (f, path) => indexEntryXML(f, path) }
def siteMapIndex(dir: File, subs: Seq[File]): File =
{
val xml = indexEntriesXML(indexEntries(subs))
writeXMLgz(dir / "sitemap_index.xml", dir / "sitemap_index.xml.gz", gzip, xml)
}
def isSymlink(f: File) = f.getCanonicalFile != f.getAbsoluteFile
val (symlinks, normal) = (repoBase * DirectoryFilter).get.partition(dir => isSymlink(dir))
log.debug("Detected symlinks: " + symlinks.mkString("\n\t", "\n\t", ""))
val subMaps =
singleSiteMap(repoBase, (repoBase * "*.html") +++ (symlinks ** "*.html") ).toList ++
normal.flatMap( dir =>
singleSiteMap(dir, dir ** "*.html").toList
)
val index = siteMapIndex(repoBase, subMaps)
(index, subMaps)
}
// generates a string suitable for a sitemap file representing the last modified time of the given File
private[this] def lastModifiedString(f: File): String =
{
val formatter = new java.text.SimpleDateFormat("yyyy-MM-dd")
formatter.format(new java.util.Date(f.lastModified))
}
// writes the provided XML node to `output` and then gzips it to `gzipped` if `gzip` is true
private[this] def writeXMLgz(output: File, gzipped: File, gzip: Boolean, node: xml.Node): File =
{
writeXML(output, node)
if(gzip) {
IO.gzip(output, gzipped)
gzipped
} else
output
}
private[this] def writeXML(output: File, node: xml.Node): Unit =
write(output, new xml.PrettyPrinter(1000, 4).format(node))
private[this] def write(output: File, xmlString: String)
{
// use \n as newline because toString uses PrettyPrinter, which hard codes line endings to be \n
IO.write(output, s"<?xml version='1.0' encoding='${IO.utf8.name}'?>\n")
IO.append(output, xmlString)
}
}

4
src/sphinx/robots.txt Normal file
View File

@ -0,0 +1,4 @@
User-agent: *
Disallow: /snapshot/
Sitemap: http://www.scala-sbt.org/sitemap_index.xml.gz
Host: www.scala-sbt.org