diff --git a/project/Docs.scala b/project/Docs.scala index f01912ad1..f417038eb 100644 --- a/project/Docs.scala +++ b/project/Docs.scala @@ -8,14 +8,17 @@ import sbtsite.SphinxSupport import SiteKeys.{makeSite,siteMappings} import Sxr.sxr + import SiteMap.Entry object Docs { - val cnameFile = SettingKey[File]("cname-file", "Location of the CNAME file for the website.") + val rootFiles = SettingKey[Seq[File]]("root-files", "Location of file that will be copied to the website root.") val latestRelease = SettingKey[Boolean]("latest-release") val siteExcludes = Set(".buildinfo", "objects.inv") def siteInclude(f: File) = !siteExcludes.contains(f.getName) + def siteSourceBase(siteSourceVersion: String) = s"https://github.com/sbt/sbt/raw/$siteSourceVersion/src/sphinx/" + val sbtSiteBase = uri("http://www.scala-sbt.org/") val SnapshotPath = "snapshot" val ReleasePath = "release" @@ -41,12 +44,16 @@ object Docs git.remoteRepo := "git@github.com:sbt/sbt.github.com.git", localRepoDirectory, ghkeys.synchLocal <<= synchLocalImpl, - cnameFile <<= (sourceDirectory in SphinxSupport.Sphinx) / "CNAME", + rootFiles := { + val base = (sourceDirectory in SphinxSupport.Sphinx).value + Seq("CNAME", "robots.txt").map(base / _) + }, latestRelease in ThisBuild := false, commands += setLatestRelease, GitKeys.gitBranch in ghkeys.updatedRepository := Some("master") ) + def localRepoDirectory = ghkeys.repository := { // distinguish between building to update the site or not so that CI jobs // that don't commit+publish don't leave uncommitted changes in the working directory @@ -67,12 +74,11 @@ object Docs } val siteVersion = sbtV.takeWhile(_ != '-') val siteSourceVersion = if(snap) release(siteVersion) else siteVersion - val siteSourceBase = s"https://github.com/sbt/sbt/raw/$siteSourceVersion/src/sphinx/" Map[String,String]( "sbt.full.version" -> sbtV, "sbt.partial.version" -> release(sbtV), "sbt.site.version" -> siteVersion, - "sbt.site.source.base" -> siteSourceBase, + "sbt.site.source.base" -> siteSourceBase(siteSourceVersion), "sbt.binary.version" -> CrossVersion.binarySbtVersion(sbtV), "scala.full.version" -> scalaV, "scala.partial.version" -> release(scalaV), @@ -80,13 +86,14 @@ object Docs ) } - def synchLocalImpl = (ghkeys.privateMappings, ghkeys.updatedRepository, version, isSnapshot, latestRelease, streams, cnameFile) map { - (mappings, repo, v, snap, latest, s, cname) => + def synchLocalImpl = (ghkeys.privateMappings, ghkeys.updatedRepository, version, isSnapshot, latestRelease, streams, rootFiles) map { + (mappings, repo, v, snap, latest, s, roots) => val versioned = repo / v IO.delete(versioned) val toCopy = for( (file, target) <- mappings if siteInclude(file) ) yield (file, versioned / target) IO.copy(toCopy) - IO.copyFile(cname, repo / cname.getName) + for(f <- roots) + IO.copyFile(f, repo / f.getName) IO.touch(repo / ".nojekyll") IO.write(repo / "versions.js", versionsJs(sortVersions(collectVersions(repo)))) if(!snap && latest) @@ -94,8 +101,43 @@ object Docs if(snap || latest) linkSite(repo, v, if(snap) SnapshotPath else ReleasePath, s.log) s.log.info("Copied site to " + versioned) + + if(latest) { + val (index, siteMaps) = SiteMap.generate(repo, sbtSiteBase, gzip=true, siteEntry(v), s.log) + s.log.info(s"Generated site map index: $index") + s.log.debug(s"Generated site maps: ${siteMaps.mkString("\n\t", "\n\t", "")}") + } + repo } + def siteEntry(CurrentVersion: String)(file: File, relPath: String): Option[Entry] = + { + val apiOrSxr = """([^/]+)/(api|sxr)/.*""".r + val docs = """([^/]+)/docs/.*""".r + val old077 = """0\.7\.7/.*""".r + val manualRedirects = """[^/]+\.html""".r + val snapshot = """(.+-SNAPSHOT|snapshot)/.+/.*""".r + // highest priority is the home page + // X/docs/ are higher priority than X/(api|sxr)/ + // release/ is slighty higher priority than / + // non-current releases are low priority + // 0.7.7 documentation is very low priority + // snapshots docs are very low priority + // the manual redirects from the old version of the site have no priority at all + relPath match { + case "index.html" => Some(Entry("weekly", 1.0)) + case docs(ReleasePath) => Some( Entry("weekly", 0.9) ) + case docs(CurrentVersion) => Some( Entry("weekly", 0.8) ) + case apiOrSxr(ReleasePath, _) => Some( Entry("weekly", 0.6) ) + case apiOrSxr(CurrentVersion, _) => Some( Entry("weekly", 0.5) ) + case snapshot(_) => Some( Entry("weekly", 0.02) ) + case old077() => Some( Entry("never", 0.01) ) + case docs(_) => Some( Entry("never", 0.2) ) + case apiOrSxr(_, _) => Some( Entry("never", 0.1) ) + case x => Some( Entry("never", 0.0) ) + } + } + def versionsJs(vs: Seq[String]): String = "var availableDocumentationVersions = " + vs.mkString("['", "', '", "']") // names of all directories that are explicit versions def collectVersions(base: File): Seq[String] = (base * versionFilter).get.map(_.getName) diff --git a/project/SiteMap.scala b/project/SiteMap.scala new file mode 100644 index 000000000..6910bd32f --- /dev/null +++ b/project/SiteMap.scala @@ -0,0 +1,91 @@ +import sbt._ + +object SiteMap +{ + // represents the configurable aspects of a sitemap entry + final case class Entry(changeFreq: String, priority: Double) { + assert(priority >= 0.0 && priority <= 1.0, s"Priority must be between 0.0 and 1.0:, was $priority") + } + def generate(repoBase: File, remoteBase: URI, gzip: Boolean, entry: (File, String) => Option[Entry], log: Logger): (File, Seq[File]) = + { + def relativize(files: PathFinder): Seq[(File, String)] = files pair relativeTo(repoBase) + def entries(files: PathFinder) = + relativize(files) flatMap { case (f, path) => + entry(f, path).toList map { e => + entryXML(e, f, path) + } + } + def entriesXML(entries: Seq[xml.Node]): xml.Elem = + { + assert(entries.size <= 50000, "A site map cannot contain more than 50,000 entries.") + + {entries} + + } + + def entryXML(e: Entry, f: File, relPath: String) = + + {remoteBase.resolve(relPath).toString} + {lastModifiedString(f)} + {e.changeFreq} + {e.priority.toString} + + + def singleSiteMap(dir: File, files: PathFinder): Option[File] = { + val es = entries(files) + if(es.isEmpty) None else Some( writeXMLgz(dir / "sitemap.xml", dir / "sitemap.xml.gz", gzip, entriesXML(es)) ) + } + def indexEntryXML(sub: File, relPath: String): xml.Elem = + + {remoteBase.resolve(relPath).toString} + {lastModifiedString(sub)} + + def indexEntriesXML(entries: Seq[xml.Node]): xml.Elem = + + {entries} + + def indexEntries(subs: Seq[File]) = + relativize(subs) map { case (f, path) => indexEntryXML(f, path) } + def siteMapIndex(dir: File, subs: Seq[File]): File = + { + val xml = indexEntriesXML(indexEntries(subs)) + writeXMLgz(dir / "sitemap_index.xml", dir / "sitemap_index.xml.gz", gzip, xml) + } + def isSymlink(f: File) = f.getCanonicalFile != f.getAbsoluteFile + + val (symlinks, normal) = (repoBase * DirectoryFilter).get.partition(dir => isSymlink(dir)) + log.debug("Detected symlinks: " + symlinks.mkString("\n\t", "\n\t", "")) + val subMaps = + singleSiteMap(repoBase, (repoBase * "*.html") +++ (symlinks ** "*.html") ).toList ++ + normal.flatMap( dir => + singleSiteMap(dir, dir ** "*.html").toList + ) + val index = siteMapIndex(repoBase, subMaps) + (index, subMaps) + } + // generates a string suitable for a sitemap file representing the last modified time of the given File + private[this] def lastModifiedString(f: File): String = + { + val formatter = new java.text.SimpleDateFormat("yyyy-MM-dd") + formatter.format(new java.util.Date(f.lastModified)) + } + // writes the provided XML node to `output` and then gzips it to `gzipped` if `gzip` is true + private[this] def writeXMLgz(output: File, gzipped: File, gzip: Boolean, node: xml.Node): File = + { + writeXML(output, node) + if(gzip) { + IO.gzip(output, gzipped) + gzipped + } else + output + } + private[this] def writeXML(output: File, node: xml.Node): Unit = + write(output, new xml.PrettyPrinter(1000, 4).format(node)) + + private[this] def write(output: File, xmlString: String) + { + // use \n as newline because toString uses PrettyPrinter, which hard codes line endings to be \n + IO.write(output, s"\n") + IO.append(output, xmlString) + } +} \ No newline at end of file diff --git a/src/sphinx/robots.txt b/src/sphinx/robots.txt new file mode 100644 index 000000000..9636d425d --- /dev/null +++ b/src/sphinx/robots.txt @@ -0,0 +1,4 @@ +User-agent: * +Disallow: /snapshot/ +Sitemap: http://www.scala-sbt.org/sitemap_index.xml.gz +Host: www.scala-sbt.org