repo2html/main.scm
2023-02-03 23:22:19 -05:00

459 lines
18 KiB
Scheme
Executable file

#!/usr/bin/csi -s
(import
(chicken file)
(chicken format)
(chicken io)
(chicken pathname)
(chicken port)
(chicken process)
(chicken process-context)
(chicken string)
(clojurian syntax)
ersatz
lowdown
scss
srfi-1 ;; list utils
srfi-13 ;; string utils
srfi-14 ;; charsets
sxml-transforms
symbol-utils ;; (unspecified-value)
utf8
)
;; small utilities ---------------------------------
;; (bail [message [exit-status]])
;; end the program immediately.
;; if a message is provided, print it to the screen.
;; exit-status defaults to 1.
(define (bail #!optional msg (status 1))
(when msg (print msg))
(exit status))
;; clojureish "debugging by print statement" tool since i still haven't reached
;; lisp enlightenment
(define ((inspect #!optional label #!rest args) s)
(display (list label ":" args " => " s) (current-error-port))
(newline (current-error-port))
s)
;; decompose a path s into its constituent parts. returns values:
;;
;; root: "/" if it's an absolute path, "" if relative directory-elements: a list
;; of each directory from root, () if none basename: the filename with extension
;; removed like "readme" or ".bashrc" extension: the file extension with the
;; dot, like ".txt" or "" if none relative-root: the relative path from the
;; given path to the root
;; e.g foo/bar/baz.html -> ../../
;;
;; this is intended to provide default values that make for easier reassembly
;; into filenames.
;;
;; typical use:
;; (->> source-file
;; (pathparts)
;; (define-values (root elements basename extension relative-root)))
;;
(define (pathparts s)
(define-values (dirname basename extension)
(decompose-pathname s))
(define-values (origin root directory-elements)
(decompose-directory (or dirname "")))
;; discarding origin because idgaf about windows
(values (or root "")
(or directory-elements '())
basename
(if extension (string-append "." extension) "")
(->>
(or directory-elements '())
(map (constantly "../"))
(apply string-append))))
;; like (substring) but doesn't break if start and end are too big/small
(define (substring* s start end)
(substring s (max start 0) (min end (string-length s))))
;; merge alists a and b. values in b "win"
(define (alist-merge a b)
(lset-union (lambda (x y) (eq? (car x) (car y))) a b))
;; like alist-ref but works on nested alists by specifying a path (list of keys)
(define (alist-ref-in keys alist #!optional (test eqv?))
(if (null? (cdr keys))
(alist-ref (car keys) alist test)
(alist-ref-in (cdr keys) (alist-ref (car keys) alist test) test)))
;; like alist-update, but works on nested alists by specifying a path (list of
;; keys)
(define (alist-update-in keys value alist #!optional (test eqv?))
(cond
((not alist) #f)
((null? (cdr keys))
(alist-update (car keys) value alist test))
(else
(alist-update (car keys)
(alist-update-in (cdr keys) value (alist-ref (car keys) alist test) test)
alist test))))
(define (unless-equals s1 s2)
;; if s1 == s2, then s1, otherwise #f
(and (equal? s1 s2) s1))
;; auto-apply ids to headings ---------------------------------
(define (slugify inner)
(->
inner
(pre-post-order*
`((*text* .
,(lambda (_ str)
(if (string? str)
(->
str
(string-downcase)
(string-translate "/,:;\"[]{}()=+")
(string-translate " _." "---"))
str)))
,@alist-conv-rules*))
(flatten)
((flip map) ->string)
(string-intersperse "")
(substring* 0 40)))
(define (enumerate-tag inner)
(let ((slug (slugify inner)))
`((@ (id ,slug))
,inner
(a (@ ((title "Permalink to this section")
(href "#" ,slug)))))))
;; a relative link to a file within our own repo should get .html added to the
;; target, since we make that filename change when rendering files for the web.
;;
;; thought it might also be good to apply that same treatment to any absolute
;; links into our repo (or other repos on the same forge?) but that gets a bit
;; messy, would need to drag variables holding current site, path, repo name all
;; the way into here
;;
;; if adjust-relative is not false, it is a prefix to be added to relative
;; links, to make the top-level readme link correctly into the site.
(define (adjust-relative-link adjust-relative inner)
(let* ((linkurl (alist-ref-in '(@ href) inner equal?))
(linkurl-startswith (cute string-prefix? <> (car linkurl))))
(if
(any linkurl-startswith '("#" "mailto:" "gemini:" "http://" "https://"))
inner
(alist-update-in '(@ href) (cons adjust-relative (append linkurl '(".html"))) inner equal?))))
;; TODO FIXME for some reason, lowdown renders links differently than images:
;; (markdown->sxml "[x](x)") => ((p (a (@ (href "x")) "x")))
;; (markdown->sxml "![x](x)") => ((p (img (@ (src ("x")) (alt "x")))))
(define (adjust-relative-src adjust-relative inner)
(let* ((srcurl
(->
;; ugh why
(alist-ref-in '(@ src) inner equal?)
(car)
((lambda (x) (if (list? x) (car x) x)))))
(srcurl-startswith (cute string-prefix? <> srcurl)))
(if
(or (not adjust-relative)
(not srcurl)
(any srcurl-startswith '("/" "http://" "https://")))
inner
(alist-update-in '(@ src) `((,(string-append adjust-relative srcurl))) inner equal?))))
(define (sxml-html-rules adjust-relative)
`(;; assign all headings an id so you can link to them
(h1 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h2 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h3 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h4 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h5 . ,(lambda (t i) (cons t (enumerate-tag i))))
;; if adjust-relative is true, all relative links should get prefixed with
;; the relative-root
(a . ,(lambda (t i) (cons t (adjust-relative-link adjust-relative i))))
(img . ,(lambda (t i) (cons t (adjust-relative-src adjust-relative i))))
;; this copied from lowdown's html-serialization-rules* because it is for
;; some reason not exported, so i can't just import it??
(*COMMENT* . ,(lambda (_t i) (list #\< "!--" i "--" #\>)))
;; ignore any #<unspecified> values in the tree
(*text* . ,(lambda (_t i) (if (unspecified? i) "" i)))
,@alist-conv-rules*))
;; reading in data from git commands ---------------------------------
(define (in-git-directory?)
(not (eof-object? (call-with-input-pipe "git rev-parse --git-dir" read-line))))
;; a weird hack to detect whether git considers a blob to be a binary or a text
;; file. works by requesting the numstat diff between it and the empty tree.
;; binary files give not a line count but '-' placeholders.
(define (git-file-is-text? source-file)
(not (equal?
"-\t-\t"
(call-with-input-pipe
(string-append "git diff 4b825dc642cb6eb9a060e54bf8d69288fbee4904 --numstat HEAD -- " (qs source-file))
(cute read-line <> 4)))))
(define (git-repository->paths-list)
(call-with-input-pipe "git ls-tree -r --name-only HEAD" read-lines))
(define (lines->numbered-lines lines)
`(table
(@ (id "file-contents"))
,@(map (lambda (number line)
`(tr (@ ((class "line")
(id ,number)))
(td (@ (class "line-number"))
(a (@ (href "#" ,number)) ,number))
(td (@ (class "line-contents"))
(code ,line))))
(map number->string (iota (length lines) 1))
lines)))
(define (git-file->string source-file mode)
(let ((handle-lines (lambda (lines)
(if (equal? mode 'plaintext)
(string-intersperse lines "\n")
(lines->numbered-lines lines)))))
(->
source-file
(qs)
((flip format) "git show HEAD:~a")
(call-with-input-pipe read-lines)
handle-lines)))
;; the result of asking git for some configuration; #f if no result.
(define (git-config->string key)
(let [(result (call-with-input-pipe (string-append "git config " key) read-line))]
(if (eof-object? result) #f result)))
;; environment always takes precedent over git-config
(define (config key)
(or
(get-environment-variable (string-append "REPO2HTML_" (string-upcase key)))
(git-config->string (string-append "repo2html." (string-downcase key)))))
;; sxml generators for constructed pages ---------------------------------
(define (source->sxml source-file) ;; src/main.scm
(define-values (_ _ basename extension _)
(pathparts source-file))
(define (image-link)
`(p (img (@ (src (,(string-append basename extension)))))))
(define (plaintext)
`(pre ,(git-file->string source-file 'plaintext)))
(define (numbered-lines)
(git-file->string source-file 'table))
(define (binary)
'(p "(Binary file)"))
(case (string->symbol extension)
((.md .markdown)
(handle-exceptions exn
(begin
(format (current-error-port) "Error parsing ~a\n" source-file)
`((p (b "There was an error parsing this file as Markdown."))
,(plaintext)))
(markdown->sxml (git-file->string source-file 'plaintext))))
((.jpg .jpeg .png .gif .webp .webm .apng .avif .svgz .ico)
(image-link))
((.svg)
(list (image-link) (plaintext)))
((.gz .pack .idx)
(binary))
(else
(if (git-file-is-text? source-file)
(numbered-lines)
(binary)))))
(define (filelist->sxml source-files-list relative-root)
`((h1 "Files")
((ul
,(map
(lambda (source-file)
`(li (a (@ (href ,(make-pathname relative-root source-file))) ,source-file)))
source-files-list)))))
(define (commits->sxml)
`((h1 "Commits")
(table
(tr
,@(map (lambda x `(th ,x)) '("Date" "Ref" "Log" "Author")))
,(map
(lambda (line)
(let-values (((date ref title author) (apply values (string-split line "\t"))))
`(tr (td ,date) (td ,ref) (td ,title) (td ,author))))
(call-with-input-pipe "git log --format=format:%as%x09%h%x09%s%x09%aN HEAD" read-lines)))))
(define (contributors->sxml)
`((h1 "Contributors")
(table
(tr (th "Author") (th "Commits"))
,(map
(lambda (line)
(let-values (((commits author) (apply values (string-split line "\t"))))
`(tr (td ,author) (td ,commits))))
(call-with-input-pipe "git shortlog -ns HEAD" read-lines)))))
(define (issueslist->sxml source-files-list)
`((h1 "Issues")
((ul
,(filter-map
(lambda (source-file)
(and
(string-prefix? "ISSUES/" source-file)
`(li (a (@ (href ,source-file))
,(->
(string-append "HEAD:" source-file)
(qs)
((flip format) "git show ~a")
(call-with-input-pipe read-line)
((lambda (x) (if (eof-object? x) (pathname-strip-directory source-file) x)))
(string-trim (string->char-set "# ")))))))
source-files-list)))))
;; used by ersatz writer
(define (alist->tvals vars)
(map (lambda (pair)
`(,(car pair) . ,(sexpr->tvalue (cdr pair)))) vars))
;; this version uses a jinja-style template via ersatz
(define (make-template-writer-ersatz templates-directory #!optional vars)
(define template (statements-from-file (template-std-env search-path: (list templates-directory)) "default.html"))
(lambda (output-filename body-sxml #!optional newvars)
;; create destination directory if needed
(if-let (destination-directory (pathname-directory output-filename))
(create-directory destination-directory #t)
'())
(let* (;; vars = global vars + file-specific vars
(vars (alist-merge vars (or newvars '())))
(adjust-relative (unless-equals (alist-ref 'relative_root vars) "html/"))
;; render the sxml to a html string that we can hand to the template
(body-html
(with-output-to-string
(lambda ()
(SXML->HTML (pre-post-order* body-sxml (sxml-html-rules adjust-relative))))))
;; vars = vars + body k/v pair
(vars (alist-cons 'content body-html vars)))
(with-output-to-file output-filename
(lambda ()
(display (eval-statements template models: (alist->tvals vars))))))))
;; main program ------------------------------------------------------------------------------
(define (generate-html-files html-repo-path templates-directory)
;; git automatically updates this hash when you checkout/pull/etc.
(let* ((version-ident "$Id$")
(source-files-list (git-repository->paths-list))
(forge-root (string-append (string-chomp (or (config "forgeroot") "") "/") "/"))
(repository-path (or (config "path")
(and (not (equal? forge-root "/"))
(string-prefix? forge-root html-repo-path)
(string-drop html-repo-path (string-length forge-root)))
(pathname-strip-directory html-repo-path)))
(template-alist
`(;; variables provided to template at all times. beware: ersatz
;; templates break if you attempt to use a variable with a hyphen.
;; the list of all files in the git repo
(source_files_list . ,source-files-list)
;; the description of the repo, taken from: env, config, cgit-like
;; description file
(repository_description . ,(or (config "description")
(if-let (f (file-exists? "description"))
(with-input-from-file f read-lines) #f)
""))
;; the name of the repo, which is usually but not necessarily the
;; same as its directory name (and last path element of the url)
(repository_name . ,(or (config "name")
(-> html-repo-path
(string-chomp ".git")
(pathname-strip-directory))))
;; the path from the forge root to the repository
(repository_path . ,repository-path)
;; the repository_path with the last path element removed
(repository_path_parent . ,(or (pathname-directory repository-path) ""))
;; the repository_path_parent as a list of path components
(repository_ancestors . ,(or (string-split (or (pathname-directory repository-path) "") "/") '()))
;; the first README file found among these, if any.
(readme_file . ,(find (cut member <> source-files-list)
'("README.md" "README" "README.txt")))
;; the first LICENSE file found among these, if any.
(license_file . ,(find (cut member <> source-files-list)
'("LICENSE.md" "LICENSE" "LICENSE.txt")))
;; the string "ISSUES" if any files exist in ISSUES/
(issues_file . ,(and (find (cut string-prefix? "ISSUES/" <>) source-files-list) "ISSUES"))
(repo2html_version . ,(if (equal? version-ident (list->string '(#\$ #\I #\d #\$)))
""
(substring* version-ident 5 12)))
))
(write-with-template
(make-template-writer-ersatz templates-directory template-alist)))
(define html-path (make-pathname html-repo-path "html"))
(create-directory html-repo-path #t)
;; special files
(write-with-template (make-pathname html-path "files" "html") (filelist->sxml source-files-list ""))
(write-with-template (make-pathname html-path "contributors" "html") (contributors->sxml))
(write-with-template (make-pathname html-path "commits" "html") (commits->sxml))
;; htmlified repo contents
(for-each
(lambda (source-file)
(->> source-file
(pathparts)
(define-values (root elements basename extension relative-root)))
(write-with-template
(make-pathname html-path source-file "html")
(source->sxml source-file)
`(;; additional per-page variables provided to template
(source_file . ,source-file)
(root . ,root)
(elements . ,elements)
(basename . ,basename)
(extension . ,extension)
(relative_root . ,relative-root)
))
;; if it's an image, also write it verbatim to output directory
(case (string->symbol (or (pathname-extension source-file) ""))
((jpg jpeg png gif webp webm svg apng avif svgz ico)
(system (format "git show HEAD:~a > ~a"
(qs source-file)
(make-pathname html-path (qs source-file)))))))
source-files-list)
;; if README.md, README, or README.txt exists, regenerate it as index.html.
;; otherwise regenerate files.html as index.html.
(write-with-template
(make-pathname html-repo-path "index" "html")
(if-let (readme-file
(alist-ref 'readme_file template-alist))
(source->sxml readme-file)
(filelist->sxml source-files-list ""))
;; TODO: do we need the full set of template variables defined here?
;; if so maybe this and the set above should be lifted out somewhere
`((relative_root . "html/")))
;; if the ISSUES directory got created, write out an index file for the
;; stuff in there.
(when (file-exists? (make-pathname html-path "ISSUES"))
(write-with-template (make-pathname html-path "ISSUES" "html") (issueslist->sxml source-files-list)))))
(define (main #!optional html-repo-path templates-directory)
(unless html-repo-path
(bail "please specify a destination directory for html files"))
(unless (in-git-directory?)
(bail "woops this isn't a git directory"))
(unless templates-directory
(bail "please specify the directory containing the templates.\nnote: built-in sxml templates have been removed."))
(generate-html-files (string-chomp html-repo-path "/") templates-directory))
(apply main (command-line-arguments))