repo2html/main.scm
pho4cexa 917788e29f remove some env vars; make output land in html/
CLONE-URL, TITLE, and H1 are no longer processed. instead, the user
should modify the templates/default.html to their liking.

the output used to be structured like this:

target-html-directory/
+-- index.html
+-- files.html
+-- README.md.html
+-- (all the other files)

the output is now structured like this:

target-html-directory/
+-- index.html
+-- html/
    +-- files.html
    +-- README.md.html
    +-- (all the other files...)

this makes it so people who are hosting bare repos for cloning on static
webservers will have only two new directory entries (index.html and
html) in the bare repo directory, reducing the risk of clobbering
something.

finally, i tried something hacky with gitattributes to get an automatic
version idenifier to show up. but it turns out this is not the git hash
of the commit, but instead the hash of the blob for main.scm, which
could remain the same across releases. doesn't hurt, so i'll look for a
better approach in the future.
2022-12-18 10:19:36 -08:00

339 lines
12 KiB
Scheme
Executable file

#!/usr/bin/csi -s
(import
(chicken file)
(chicken format)
(chicken io)
(chicken pathname)
(chicken port)
(chicken process)
(chicken process-context)
(chicken string)
(clojurian syntax)
ersatz
lowdown
scss
srfi-1 ;; list utils
srfi-13 ;; string utils
srfi-14 ;; charsets
sxml-transforms
symbol-utils ;; (unspecified-value)
utf8
)
;; small utilities ---------------------------------
;; (bail [message [exit-status]])
;; end the program immediately.
;; if a message is provided, print it to the screen.
;; exit-status defaults to 1.
(define (bail #!optional msg (status 1))
(when msg (print msg))
(exit status))
;; decompose a path s into its constituent parts. returns values:
;;
;; root: "/" if it's an absolute path, "" if relative directory-elements: a list
;; of each directory from root, () if none basename: the filename with extension
;; removed like "readme" or ".bashrc" extension: the file extension with the
;; dot, like ".txt" or "" if none relative-root: the relative path from the
;; given path to the root
;; e.g foo/bar/baz.html -> ../../
;;
;; this is intended to provide default values that make for easier reassembly
;; into filenames.
;;
;; typical use:
;; (->> source-file
;; (pathparts)
;; (define-values (root elements basename extension relative-root)))
;;
(define (pathparts s)
(define-values (dirname basename extension)
(decompose-pathname s))
(define-values (origin root directory-elements)
(decompose-directory (or dirname "")))
;; discarding origin because idgaf about windows
(values (or root "")
(or directory-elements '())
basename
(if extension (string-append "." extension) "")
(->>
(or directory-elements '())
(map (constantly "../"))
(apply string-append))))
;; like (substring) but doesn't break if start and end are too big/small
(define (substring* s start end)
(substring s (max start 0) (min end (string-length s))))
;; merge alists a and b. values in b "win"
(define (alist-merge a b)
(lset-union (lambda (x y) (eq? (car x) (car y))) a b))
;; auto-apply ids to headings ---------------------------------
(define (slugify _ inner)
(->
inner
(pre-post-order*
`((*text* .
,(lambda (_ str)
(if (string? str)
(->
str
(string-downcase)
(string-translate "/,:;\"[]{}()=+")
(string-translate " _." "---"))
str)))
,@alist-conv-rules*))
(flatten)
((flip map) ->string)
(string-intersperse "")
(substring* 0 40)))
(define (enumerate-tag tag inner)
`(,tag (@ (id ,tag "-" ,(slugify tag inner))) ,inner))
(define sxml-html-rules
`(;; assign all headings an id so you can link to them
(h1 . ,enumerate-tag)
(h2 . ,enumerate-tag)
(h3 . ,enumerate-tag)
(h4 . ,enumerate-tag)
(h5 . ,enumerate-tag)
;; this copied from lowdown html-serialization-rules* because it
;; is for some reason not exported??
(*COMMENT* . ,(lambda (_ str) (list #\< "!--" str "--" #\>)))
;; ignore #<unspecified> in tree
(*text* . ,(lambda (_ str) (if (unspecified? str) "" str)))
,@alist-conv-rules*))
;; reading in data from git commands
(define (in-git-directory?)
(not (eof-object? (call-with-input-pipe "git rev-parse --git-dir" read-line))))
(define (git-file-is-text? source-file)
(not (equal?
"-\t-\t"
(call-with-input-pipe
(string-append "git diff 4b825dc642cb6eb9a060e54bf8d69288fbee4904 --numstat HEAD -- " source-file)
(lambda (port) (read-line port 4))))))
(define (git-repository->paths-list)
(call-with-input-pipe "git ls-tree -r --name-only HEAD" read-lines))
(define (git-file->string source-file)
(->
(format "git show HEAD:~a" source-file)
(call-with-input-pipe read-lines)
(string-intersperse "\n")))
;; sxml generators for constructed pages
(define (source->sxml source-file) ;; src/main.scm
(define-values (_ _ basename extension _)
(pathparts source-file))
(define (image-link)
`(p (img (@ src ,basename ,extension))))
(define (plaintext)
`(pre ,(git-file->string source-file)))
(define (binary)
'(p "(Binary file)"))
`((p (@ id "file-path") ,source-file)
,(case (string->symbol extension)
((.md .markdown)
(handle-exceptions exn
(begin
(format (current-error-port) "Error parsing ~a\n" source-file)
`((p (b "There was an error parsing this file as Markdown."))
,(plaintext)))
(markdown->sxml (git-file->string source-file))))
((.jpg .jpeg .png .gif .webp .webm .apng .avif .svgz .ico)
(image-link))
((.svg)
(list (image-link) (plaintext)))
((.gz .pack .idx)
(binary))
(else
(if (git-file-is-text? source-file)
(plaintext)
(binary))))))
(define (filelist->sxml source-files-list relative-root)
`((h1 "Files")
((ul
,(map
(lambda (source-file)
`(li (a (@ href ,(make-pathname relative-root source-file ".html")) ,source-file)))
source-files-list)))))
(define (commits->sxml)
`((h1 "Commits")
(table
(tr (th "Date") (th "Ref") (th "Log") (th "Author"))
,(map
(lambda (line)
(let-values (((date ref title author) (apply values (string-split line "\t"))))
`(tr (td ,date) (td ,ref) (td ,title) (td ,author))))
(call-with-input-pipe "git log --format=format:%as%x09%h%x09%s%x09%aN HEAD" read-lines)))))
(define (contributors->sxml)
`((h1 "Contributors")
(table
(tr (th "Author") (th "Commits"))
,(map
(lambda (line)
(let-values (((commits author) (apply values (string-split line "\t"))))
`(tr (td ,author) (td ,commits))))
(call-with-input-pipe "git shortlog -ns HEAD" read-lines)))))
(define (issueslist->sxml source-files-list)
`((h1 "Issues")
((ul
,(filter-map
(lambda (source-file)
(and
(string-prefix? "ISSUES/" source-file)
`(li (a (@ href ,source-file ".html")
,(->
source-file
((flip format) "git show HEAD:~a")
(call-with-input-pipe read-line)
(string-trim (string->char-set "# ")))))))
source-files-list)))))
;; used by ersatz writer
(define (alist->tvals vars)
(map (lambda (pair)
`(,(car pair) . ,(sexpr->tvalue (cdr pair)))) vars))
;; this version uses a jinja-style template via ersatz
(define (make-template-writer-ersatz templates-directory #!optional vars)
(define template (statements-from-file (template-std-env search-path: (list templates-directory)) "default.html"))
(lambda (output-filename body-sxml #!optional newvars)
;; create destination directory if needed
(if-let (destination-directory (pathname-directory output-filename))
(create-directory destination-directory #t)
'())
(let* (;; render the sxml to a html string that we can hand to the template
(body-html
(with-output-to-string
(lambda ()
(SXML->HTML (pre-post-order* body-sxml sxml-html-rules)))))
;; vars = global vars + file-specific vars + body k/v pair
(vars
(alist-cons
'content body-html
(alist-merge vars (or newvars '())))))
(with-output-to-file output-filename
(lambda ()
(display (eval-statements template models: (alist->tvals vars))))))))
;; main program ------------------------------------------------------------------------------
(define (generate-html-files html-repo-path templates-directory)
(let* ((version-ident "$Id$")
(source-files-list (git-repository->paths-list))
(template-alist
`(;; variables provided to template at all times. beware: ersatz
;; templates break if you attempt to use a variable with a hyphen.
;; the list of all files in the git repo
(source_files_list . ,source-files-list)
;; the description of the repo, taken from env, falling back to
;; description file
(repository_description
. ,(or (get-environment-variable "REPO2HTML_DESCRIPTION")
(if-let (f (file-exists? "description"))
(with-input-from-file f read-lines)
#f)
""))
;; the repository name, which we detect from the output directory
;; name. TODO: more heuristics if this doesn't work well
(repository_name
. ,(pathname-strip-directory (string-chomp html-repo-path "/")))
;; the first README file found, if any.
(readme_file
. ,(find (lambda (x) (member x source-files-list))
'("README" "README.md" "README.txt")))
;; the first LICENSE file found, if any.
(license_file
. ,(find (lambda (x) (member x source-files-list))
'("LICENSE" "LICENSE.md" "LICENSE.txt")))
;; the string "ISSUES" if any files exist in ISSUES/
(issues_file
. ,(and (find (lambda (x) (string-prefix? "ISSUES/" x)) source-files-list) "ISSUES"))
(repo2html_version
. ,(if (equal? version-ident (list->string '(#\$ #\I #\d #\$)))
""
(substring* version-ident 5 12)))
))
(write-with-template
(make-template-writer-ersatz templates-directory template-alist)))
(define html-path (make-pathname html-repo-path "html"))
(create-directory html-repo-path #t)
;; special files
(write-with-template (make-pathname html-path "files" "html") (filelist->sxml source-files-list ""))
(write-with-template (make-pathname html-path "contributors" "html") (contributors->sxml))
(write-with-template (make-pathname html-path "commits" "html") (commits->sxml))
;; htmlified repo contents
(for-each
(lambda (source-file)
(->> source-file
(pathparts)
(define-values (root elements basename extension relative-root)))
(write-with-template
(make-pathname html-path source-file "html")
(source->sxml source-file)
`(;; additional per-page variables provided to template
(source_file . ,source-file)
(root . ,root)
(elements . ,elements)
(basename . ,basename)
(extension . ,extension)
(relative_root . ,relative-root)
))
;; if it's an image, also write it verbatim to output directory
(case (string->symbol (or (pathname-extension source-file) ""))
((jpg jpeg png gif webp webm svg apng avif svgz ico)
(system (format "git show HEAD:~a > ~a"
source-file
(make-pathname html-path source-file))))))
source-files-list)
;; if README.md, README, or README.txt exists, regenerate it as index.html.
;; otherwise regenerate files.html as index.html.
(write-with-template
(make-pathname html-repo-path "index" "html")
(if-let (readme-file
(alist-ref 'readme_file template-alist))
(source->sxml readme-file)
(filelist->sxml source-files-list "html"))
;; TODO: do we need the full set of template variables defined here?
;; if so maybe this and the set above should be lifted out somewhere
`((relative_root . "html/")))
;; if the ISSUES directory got created, write out an index file for the
;; stuff in there.
(when (file-exists? (make-pathname html-path "ISSUES"))
(write-with-template (make-pathname html-path "ISSUES" "html") (issueslist->sxml source-files-list)))))
(define (main #!optional html-repo-path templates-directory)
(unless html-repo-path
(bail "please specify a destination directory for html files"))
(unless (in-git-directory?)
(bail "woops this isn't a git directory"))
(unless templates-directory
(bail "please specify the directory containing the templates.\nnote: built-in sxml templates have been removed."))
(generate-html-files html-repo-path templates-directory))
(apply main (command-line-arguments))