repo2html/repo2html.scm

348 lines
14 KiB
Scheme
Raw Normal View History

#!/usr/bin/csi -s
(import
(chicken file)
(chicken format) ;; format
(chicken io) ;; read-line
(chicken pathname)
(chicken port)
(chicken process) ;; call-with-input-pipe
(chicken process-context)
(chicken string) ;; string-intersperse
(clojurian syntax)
ersatz
lowdown
scss
srfi-1 ;; list utils
srfi-13 ;; string utils
srfi-14 ;; charsets
sxml-transforms
symbol-utils ;; (unspecified-value)
utf8
utils
utils-git
)
2022-12-02 16:41:55 -05:00
;; auto-apply ids to headings ---------------------------------
(define (slugify inner)
(->
inner
(pre-post-order*
`((*text* .
,(lambda (_ str)
(if (string? str)
(->
str
(string-downcase)
(string-translate "/,:;\"[]{}()=+")
(string-translate " _." "---"))
str)))
,@alist-conv-rules*))
(flatten)
((flip map) ->string)
(string-intersperse "")
(substring* 0 40)))
(define (enumerate-tag inner)
(let ((slug (slugify inner)))
`((@ (id ,slug))
,inner
(a (@ ((title "Permalink to this section")
(href "#" ,slug)))))))
;; a relative link to a file within our own repo should get .html added to the
;; target, since we make that filename change when rendering files for the web.
;;
;; thought it might also be good to apply that same treatment to any absolute
;; links into our repo (or other repos on the same forge?) but that gets a bit
;; messy, would need to drag variables holding current site, path, repo name all
;; the way into here
;;
;; if adjust-relative is not false, it is a prefix to be added to relative
;; links, to make the top-level readme link correctly into the site.
(define (adjust-relative-link adjust-relative inner)
(let* ((linkurl (alist-ref-in '(@ href) inner equal?))
(linkurl-startswith (cute string-prefix? <> (car linkurl))))
(if
(any linkurl-startswith '("#" "mailto:" "gemini:" "http://" "https://"))
inner
(alist-update-in '(@ href) (cons adjust-relative (append linkurl '(".html"))) inner equal?))))
;; TODO FIXME for some reason, lowdown renders links differently than images:
;; (markdown->sxml "[x](x)") => ((p (a (@ (href "x")) "x")))
;; (markdown->sxml "![x](x)") => ((p (img (@ (src ("x")) (alt "x")))))
2023-01-15 14:30:14 -05:00
(define (adjust-relative-src adjust-relative inner)
(let* ((srcurl
(->
;; ugh why
(alist-ref-in '(@ src) inner equal?)
(car)
((lambda (x) (if (list? x) (car x) x)))))
(srcurl-startswith (cute string-prefix? <> srcurl)))
2023-01-15 14:30:14 -05:00
(if
(or (not adjust-relative)
(not srcurl)
(any srcurl-startswith '("/" "http://" "https://")))
2023-01-15 14:30:14 -05:00
inner
(alist-update-in '(@ src) `((,(string-append adjust-relative srcurl))) inner equal?))))
2023-01-15 14:30:14 -05:00
(define (sxml-html-rules adjust-relative)
`(;; assign all headings an id so you can link to them
(h1 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h2 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h3 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h4 . ,(lambda (t i) (cons t (enumerate-tag i))))
(h5 . ,(lambda (t i) (cons t (enumerate-tag i))))
;; if adjust-relative is true, all relative links should get prefixed with
;; the relative-root
(a . ,(lambda (t i) (cons t (adjust-relative-link adjust-relative i))))
2023-01-15 14:30:14 -05:00
(img . ,(lambda (t i) (cons t (adjust-relative-src adjust-relative i))))
;; this copied from lowdown's html-serialization-rules* because it is for
;; some reason not exported, so i can't just import it??
(*COMMENT* . ,(lambda (_t i) (list #\< "!--" i "--" #\>)))
;; ignore any #<unspecified> values in the tree
(*text* . ,(lambda (_t i) (if (unspecified? i) "" i)))
,@alist-conv-rules*))
;; environment always takes precedence over git-config
(define (config key)
(or
(get-environment-variable (string-append "REPO2HTML_" (string-upcase key)))
(git-config->string (string-append "repo2html." (string-downcase key)))))
;; sxml generators for constructed pages ---------------------------------
(define (lines->string xs) (string-intersperse xs "\n"))
(define (lines->numbered-sxml lines)
`(table
(@ (id "file-contents"))
,@(map (lambda (number line)
`(tr (@ ((class "line")
(id ,number)))
(td (@ (class "line-number"))
(a (@ (href "#" ,number)) ,number))
(td (@ (class "line-contents"))
(code ,line))))
(map number->string (iota (length lines) 1))
lines)))
(define (source->sxml source-file) ;; src/main.scm
(define-values (_ _ basename extension _)
(pathparts source-file))
(define (image-link)
`(p (img (@ (src (,(string-append basename extension)))))))
(define (plaintext)
`(pre ,(git-file->lines source-file)))
(define (numbered-sxml)
(-> source-file
git-file->lines
lines->numbered-sxml))
(define (binary)
'(p "(Binary file)"))
(case (string->symbol extension)
((.md .markdown)
(handle-exceptions exn
(begin
(format (current-error-port) "Error parsing ~a\n" source-file)
`((p (b "There was an error parsing this file as Markdown."))
,(plaintext)))
(-> source-file
git-file->lines
lines->string
markdown->sxml))
((.jpg .jpeg .png .gif .webp .webm .apng .avif .svgz .ico)
(image-link))
((.svg)
(list (image-link) (plaintext)))
((.gz .pack .idx)
(binary))
(else
(if (git-file-is-text? source-file)
(numbered-sxml)
(binary))))))
(define (filelist->sxml source-files-list relative-root)
`((h1 "Files")
((ul
,(map
(lambda (source-file)
`(li (a (@ (href ,(make-pathname relative-root source-file))) ,source-file)))
source-files-list)))))
(define (commits->sxml)
`((h1 "Commits")
(table
(tr ,@(map (lambda x `(th ,x)) '("Date" "Ref" "Log" "Author")))
,(map
(lambda (commit) `(tr ,@(map (lambda x `(td ,x)) commit)))
(git-commits)))))
(define (contributors->sxml)
`((h1 "Contributors")
(table
(tr (th "Author") (th "Commits"))
,(map
(lambda (line)
(let-values (((commits author) (apply values (string-split line "\t"))))
`(tr (td ,author) (td ,commits))))
(git-contributors)))))
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(define (issueslist->sxml source-files-list)
`((h1 "Issues")
((ul
,(filter-map
(lambda (source-file)
(and
(string-prefix? "ISSUES/" source-file)
`(li (a (@ (href ,source-file))
,(->
source-file
git-file->lines
((lambda (x) (if (eof-object? x) (list (pathname-strip-directory source-file)) x)))
car
(string-trim (string->char-set "# ")))))))
source-files-list)))))
;; used by ersatz writer
(define (alist->tvals vars)
(map (lambda (pair)
`(,(car pair) . ,(sexpr->tvalue (cdr pair)))) vars))
;; this version uses a jinja-style template via ersatz
(define (make-template-writer-ersatz templates-directory #!optional vars)
(define template (statements-from-file (template-std-env search-path: (list templates-directory)) "default.html"))
(lambda (output-filename body-sxml #!optional newvars)
;; create destination directory if needed
(if-let (destination-directory (pathname-directory output-filename))
(create-directory destination-directory #t)
'())
(let* (;; vars = global vars + file-specific vars
(vars (alist-merge vars (or newvars '())))
(adjust-relative (unless-equals (alist-ref 'relative_root vars) "html/"))
;; render the sxml to a html string that we can hand to the template
(body-html
(with-output-to-string
(lambda ()
(SXML->HTML (pre-post-order* body-sxml (sxml-html-rules adjust-relative))))))
;; vars = vars + body k/v pair
(vars (alist-cons 'content body-html vars)))
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(with-output-to-file output-filename
(lambda ()
(display (eval-statements template models: (alist->tvals vars))))))))
2022-12-18 12:56:47 -05:00
;; main program ------------------------------------------------------------------------------
(define (generate-html-files html-repo-path templates-directory)
;; git automatically updates this hash when you checkout/pull/etc.
(let* ((version-ident "$Id$")
(source-files-list (git-repository->paths-list))
(forge-root (string-append (string-chomp (or (config "forgeroot") "") "/") "/"))
(repository-path (or (config "path")
(and (not (equal? forge-root "/"))
(string-prefix? forge-root html-repo-path)
(string-drop html-repo-path (string-length forge-root)))
(pathname-strip-directory html-repo-path)))
(template-alist
`(;; variables provided to template at all times. beware: ersatz
;; templates break if you attempt to use a variable with a hyphen.
;; the list of all files in the git repo
(source_files_list . ,source-files-list)
;; the description of the repo, taken from: env, config, cgit-like
;; description file
(repository_description . ,(or (config "description")
(if-let (f (file-exists? "description"))
(with-input-from-file f read-lines) #f)
""))
;; the name of the repo, which is usually but not necessarily the
;; same as its directory name (and last path element of the url)
(repository_name . ,(or (config "name")
(-> html-repo-path
(string-chomp ".git")
(pathname-strip-directory))))
;; the path from the forge root to the repository
(repository_path . ,repository-path)
;; the repository_path with the last path element removed
(repository_path_parent . ,(or (pathname-directory repository-path) ""))
;; the repository_path_parent as a list of path components
(repository_ancestors . ,(or (string-split (or (pathname-directory repository-path) "") "/") '()))
;; the first README file found among these, if any.
(readme_file . ,(find (cut member <> source-files-list)
'("README.md" "README" "README.txt")))
;; the first LICENSE file found among these, if any.
(license_file . ,(find (cut member <> source-files-list)
'("LICENSE.md" "LICENSE" "LICENSE.txt")))
;; the string "ISSUES" if any files exist in ISSUES/
(issues_file . ,(and (find (cut string-prefix? "ISSUES/" <>) source-files-list) "ISSUES"))
(repo2html_version . ,(if (equal? version-ident (list->string '(#\$ #\I #\d #\$)))
""
(substring* version-ident 5 12)))
))
(write-with-template
2022-12-18 12:56:47 -05:00
(make-template-writer-ersatz templates-directory template-alist)))
(define html-path (make-pathname html-repo-path "html"))
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(create-directory html-repo-path #t)
;; special files
(write-with-template (make-pathname html-path "files" "html") (filelist->sxml source-files-list ""))
(write-with-template (make-pathname html-path "contributors" "html") (contributors->sxml))
(write-with-template (make-pathname html-path "commits" "html") (commits->sxml))
;; htmlified repo contents
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(for-each
(lambda (source-file)
(->> source-file
(pathparts)
(define-values (root elements basename extension relative-root)))
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(write-with-template
(make-pathname html-path source-file "html")
(source->sxml source-file)
`(;; additional per-page variables provided to template
(source_file . ,source-file)
(root . ,root)
(elements . ,elements)
(basename . ,basename)
(extension . ,extension)
(relative_root . ,relative-root)
))
;; if it's an image, also write it verbatim to output directory
massive changes incl. support for images, markdown i started this off by trying to learn more about how scheme does file i/o. it seems like many of its functions just expect you'll want them to write to (current-output-port) instead of returning a string. so i thought, i wonder what it would look like if i tweak these content generator functions to just (display) their stuff, and call (with-output-to-file) and apply the html template each time i call them? and whew it kindof got away from me i totally understand if you feel like this is an unpleasant overhaul of your whole project and don't want to merge this change! anyway let's see if i can summarize the changes: - image support!! - svg image support!! it shows both the svg and its source code! - markdown support; we now render all .md files instead of showing source - using string->goodHTML instead of clean-html. turns out, it's a little annoying, in that it only returns a string if it makes no changes, but if it does, it returns a list of strings. it expects to be passed to one of the other functions in the sxml library, so that's what i do. - moved "wrap the template" outside of various "generate content" functions - simple command line use: from any git work-tree OR bare repo, run repo2html /path/to/www/output/repo-foo-bar and it will create that directory and use "repo-foo-bar" as the repo name. - made our example post-receive a bit more robust - changed env var names to have REPO2HTML_ prefix - better repo name detection and automatic caching of it in git config - moved the post-receive-specific logic to avoid generating if we're not updating HEAD into the post-receive hook itself, along with some status messages - checked directory permissions in the hook so it doesn't even attempt to run repo2html and avoids a crash
2022-12-07 04:00:52 -05:00
(case (string->symbol (or (pathname-extension source-file) ""))
((jpg jpeg png gif webp webm svg apng avif svgz ico)
(git-copy source-file (make-pathname html-path source-file)))))
source-files-list)
;; if README.md, README, or README.txt exists, regenerate it as index.html.
;; otherwise regenerate files.html as index.html.
(write-with-template
(make-pathname html-repo-path "index" "html")
(if-let (readme-file
(alist-ref 'readme_file template-alist))
(source->sxml readme-file)
(filelist->sxml source-files-list ""))
;; TODO: do we need the full set of template variables defined here?
;; if so maybe this and the set above should be lifted out somewhere
`((relative_root . "html/")))
;; if the ISSUES directory got created, write out an index file for the
;; stuff in there.
(when (file-exists? (make-pathname html-path "ISSUES"))
(write-with-template (make-pathname html-path "ISSUES" "html") (issueslist->sxml source-files-list)))))
2022-12-02 16:41:55 -05:00
2022-12-18 12:56:47 -05:00
(define (main #!optional html-repo-path templates-directory)
2022-12-02 16:41:55 -05:00
(unless html-repo-path
(bail "please specify a destination directory for html files"))
(unless (in-git-directory?)
(bail "woops this isn't a git directory"))
2022-12-18 12:56:47 -05:00
(unless templates-directory
(bail "please specify the directory containing the templates.\nnote: built-in sxml templates have been removed."))
(generate-html-files (string-chomp html-repo-path "/") templates-directory))
(apply main (command-line-arguments))