overleaf/services/web/frontend/js/features/source-editor/lezer-latex/tokens.mjs

/* Hand-written tokenizer for LaTeX. */

import { ExternalTokenizer, ContextTracker } from '@lezer/lr'

import {
  LiteralArgContent,
  SpaceDelimitedLiteralArgContent,
  VerbContent,
  VerbatimContent,
  LstInlineContent,
  Begin,
  End,
  KnownEnvironment,
  MathDelimiter,
  Csname,
  TrailingWhitespaceOnly,
  TrailingContent,
  RefCtrlSeq,
  RefStarrableCtrlSeq,
  CiteCtrlSeq,
  CiteStarrableCtrlSeq,
  LabelCtrlSeq,
  MathTextCtrlSeq,
  HboxCtrlSeq,
  TitleCtrlSeq,
  AuthorCtrlSeq,
  DocumentClassCtrlSeq,
  UsePackageCtrlSeq,
  HrefCtrlSeq,
  VerbCtrlSeq,
  LstInlineCtrlSeq,
  IncludeGraphicsCtrlSeq,
  CaptionCtrlSeq,
  DefCtrlSeq,
  LeftCtrlSeq,
  RightCtrlSeq,
  NewCommandCtrlSeq,
  RenewCommandCtrlSeq,
  NewEnvironmentCtrlSeq,
  RenewEnvironmentCtrlSeq,
  DocumentEnvName,
  TabularEnvName,
  EquationEnvName,
  EquationArrayEnvName,
  VerbatimEnvName,
  TikzPictureEnvName,
  FigureEnvName,
  OpenParenCtrlSym,
  CloseParenCtrlSym,
  OpenBracketCtrlSym,
  CloseBracketCtrlSym,
  // Sectioning commands
  BookCtrlSeq,
  PartCtrlSeq,
  ChapterCtrlSeq,
  SectionCtrlSeq,
  SubSectionCtrlSeq,
  SubSubSectionCtrlSeq,
  ParagraphCtrlSeq,
  SubParagraphCtrlSeq,
  InputCtrlSeq,
  IncludeCtrlSeq,
  ItemCtrlSeq,
  BibliographyCtrlSeq,
  BibliographyStyleCtrlSeq,
  CenteringCtrlSeq,
  ListEnvName,
} from './latex.terms.mjs'

function nameChar(ch) {
  // we accept A-Z a-z 0-9 * + @ in environment names
  return (
    (ch >= 65 && ch <= 90) ||
    (ch >= 97 && ch <= 122) ||
    (ch >= 48 && ch <= 57) ||
    ch === 42 ||
    ch === 43 ||
    ch === 64
  )
}

// match [a-zA-Z]
function alphaChar(ch) {
  return (ch >= 65 && ch <= 90) || (ch >= 97 && ch <= 122)
}

let cachedName = null
let cachedInput = null
let cachedPos = 0
function envNameAfter(input, offset) {
  const pos = input.pos + offset
  if (cachedInput === input && cachedPos === pos) {
    return cachedName
  }
  if (input.peek(offset) !== '{'.charCodeAt(0)) return
  offset++
  let name = ''
  for (;;) {
    const next = input.peek(offset)
    if (!nameChar(next)) break
    name += String.fromCharCode(next)
    offset++
  }
  cachedInput = input
  cachedPos = pos
  return (cachedName = name || null)
}

function ElementContext(name, parent) {
  this.name = name
  this.parent = parent
  this.hash = parent ? parent.hash : 0
  for (let i = 0; i < name.length; i++)
    this.hash +=
      (this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)
}

export const elementContext = new ContextTracker({
  start: null,
  shift(context, term, stack, input) {
    return term === Begin
      ? new ElementContext(envNameAfter(input, '\\begin'.length) || '', context)
      : context
  },
  reduce(context, term) {
    return term === KnownEnvironment && context ? context.parent : context
  },
  reuse(context, node, _stack, input) {
    const type = node.type.id
    return type === Begin
      ? new ElementContext(envNameAfter(input, 0) || '', context)
      : context
  },
  hash(context) {
    return context ? context.hash : 0
  },
  strict: false,
})

// tokenizer for \verb|...| commands
export const verbTokenizer = new ExternalTokenizer(
  (input, stack) => {
    if (input.next === '*'.charCodeAt(0)) input.advance()
    const delimiter = input.next
    if (delimiter === -1) return // hit end of file
    if (/\s|\*/.test(String.fromCharCode(delimiter))) return // invalid delimiter
    input.advance()
    for (;;) {
      const next = input.next
      if (next === -1 || next === CHAR_NEWLINE) return
      input.advance()
      if (next === delimiter) break
    }
    return input.acceptToken(VerbContent)
  },
  { contextual: false }
)

// tokenizer for \lstinline|...| commands
export const lstinlineTokenizer = new ExternalTokenizer(
  (input, stack) => {
    let delimiter = input.next
    if (delimiter === -1) return // hit end of file
    if (/\s/.test(String.fromCharCode(delimiter))) {
      return // invalid delimiter
    }
    if (delimiter === CHAR_OPEN_BRACE) {
      delimiter = CHAR_CLOSE_BRACE
    }
    input.advance()
    let content = ''
    for (;;) {
      let next = input.next
      if (next === -1 || next === CHAR_NEWLINE) return
      content += String.fromCharCode(next)
      input.advance()
      if (next === delimiter) break
    }
    return input.acceptToken(LstInlineContent)
  },
  { contextual: false }
)

const matchForward = (input, expected, offset = 0) => {
  for (let i = 0; i < expected.length; i++) {
    if (String.fromCharCode(input.peek(offset + i)) !== expected[i]) {
      return false
    }
  }
  return true
}

// tokenizer for \begin{verbatim}...\end{verbatim} environments
export const verbatimTokenizer = new ExternalTokenizer(
  (input, stack) => {
    const delimiter = '\\end{' + stack.context.name + '}'
    let offset = 0
    let end = -1
    for (;;) {
      const next = input.peek(offset)
      if (next === -1) {
        end = offset - 1
        break
      }
      if (matchForward(input, delimiter, offset)) {
        // Found the end marker
        end = offset - 1
        break
      }
      offset++
    }
    return input.acceptToken(VerbatimContent, end + 1)
  },
  { contextual: false }
)

// tokenizer for \href{...} and similar commands
export const literalArgTokenizer = new ExternalTokenizer(
  (input, stack) => {
    const delimiter = '}'
    let content = ''
    let offset = 0
    let end = -1
    for (;;) {
      const next = input.peek(offset)
      if (next === -1) {
        end = offset - 1
        break
      }
      content += String.fromCharCode(next)
      if (content.slice(-delimiter.length) === delimiter) {
        // found the '}'
        end = offset - delimiter.length
        break
      }
      offset++
    }
    return input.acceptToken(LiteralArgContent, end + 1)
  },
  { contextual: false }
)

// tokenizer for literal content delimited by whitespace, such as in `\input foo.tex`
export const spaceDelimitedLiteralArgTokenizer = new ExternalTokenizer(
  (input, stack) => {
    let content = ''
    let offset = 0
    let end = -1
    for (;;) {
      const next = input.peek(offset)
      if (next === -1) {
        end = offset - 1
        break
      }
      content += String.fromCharCode(next)
      if (content.slice(-1) === ' ' || content.slice(-1) === '\n') {
        // found the whitespace
        end = offset - 1
        break
      }
      offset++
    }
    return input.acceptToken(SpaceDelimitedLiteralArgContent, end + 1)
  },
  { contextual: false }
)

// helper function to look up charCodes
function _char(s) {
  return s.charCodeAt(0)
}

// Allowed delimiters, from the LaTeX manual, table 3.10
// (  ) [ ] / |  \{ \}  \| and additional names below
// The empty delimiter . is also allowed

const CHAR_SLASH = _char('/')
const CHAR_PIPE = _char('|')
const CHAR_OPEN_PAREN = _char('(')
const CHAR_CLOSE_PAREN = _char(')')
const CHAR_OPEN_BRACKET = _char('[')
const CHAR_CLOSE_BRACKET = _char(']')
const CHAR_FULL_STOP = _char('.')
const CHAR_BACKSLASH = _char('\\')
const CHAR_OPEN_BRACE = _char('{')
const CHAR_CLOSE_BRACE = _char('}')

const ALLOWED_DELIMITER_NAMES = [
  'lfloor',
  'rfloor',
  'lceil',
  'rceil',
  'langle',
  'rangle',
  'backslash',
  'uparrow',
  'downarrow',
  'Uparrow',
  'Downarrow',
  'updownarrow',
  'Updownarrow',
  'lvert',
  'rvert',
  'lVert',
  'rVert',
]

// Given a list of allowed command names, return those with leading characters that are the same as the matchString
function findPartialMatches(list, matchString) {
  const size = matchString.length
  return list.filter(
    entry => entry.length >= size && entry.substring(0, size) === matchString
  )
}

// tokenizer for \leftX ... \rightX delimiter tokens
export const mathDelimiterTokenizer = new ExternalTokenizer(
  (input, stack) => {
    let content = ''
    let offset = 0
    let end = -1
    // look at the first character, we only accept the following /|()[].
    let next = input.peek(offset)
    if (next === -1) {
      return
    }
    if (
      next === CHAR_SLASH ||
      next === CHAR_PIPE ||
      next === CHAR_OPEN_PAREN ||
      next === CHAR_CLOSE_PAREN ||
      next === CHAR_OPEN_BRACKET ||
      next === CHAR_CLOSE_BRACKET ||
      next === CHAR_FULL_STOP
    ) {
      return input.acceptToken(MathDelimiter, 1)
    }
    // reject anything else not starting with a backslash,
    // we only accept control symbols or control sequences
    if (next !== CHAR_BACKSLASH) {
      return
    }
    // look at the second character, we only accept \{ and \} and \| as control symbols
    offset++
    next = input.peek(offset)
    if (next === -1) {
      return
    }
    if (
      next === CHAR_OPEN_BRACE ||
      next === CHAR_CLOSE_BRACE ||
      next === CHAR_PIPE
    ) {
      return input.acceptToken(MathDelimiter, 2)
    }
    // We haven't matched any symbols, so now try matching command names.
    // Is this character a potential match to the remaining allowed delimiter names?
    content = String.fromCharCode(next)
    let candidates = findPartialMatches(ALLOWED_DELIMITER_NAMES, content)
    if (!candidates.length) return
    // we have some candidates, look at subsequent characters
    offset++
    for (;;) {
      const next = input.peek(offset)
      // stop when we reach the end of file or a non-alphabetic character
      if (next === -1 || !nameChar(next)) {
        end = offset - 1
        break
      }
      content += String.fromCharCode(next)
      // find how many candidates remain with the new input
      candidates = findPartialMatches(candidates, content)
      if (!candidates.length) return // no matches remaining
      end = offset
      offset++
    }
    if (!candidates.includes(content)) return // not a valid delimiter
    // accept the content as a valid delimiter
    return input.acceptToken(MathDelimiter, end + 1)
  },
  { contextual: false }
)

const CHAR_AT_SYMBOL = _char('@')

export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
  let offset = 0
  let end = -1
  // look at the first character, we are looking for acceptable control sequence names
  // including @ signs, \\[a-zA-Z@]+
  const next = input.peek(offset)
  if (next === -1) {
    return
  }
  // reject anything not starting with a backslash,
  // we only accept control sequences
  if (next !== CHAR_BACKSLASH) {
    return
  }
  offset++
  for (;;) {
    const next = input.peek(offset)
    // stop when we reach the end of file or a non-csname character
    if (next === -1 || !(alphaChar(next) || next === CHAR_AT_SYMBOL)) {
      end = offset - 1
      break
    }
    end = offset
    offset++
  }
  if (end === -1) return
  // accept the content as a valid control sequence
  return input.acceptToken(Csname, end + 1)
})

const CHAR_SPACE = _char(' ')
const CHAR_NEWLINE = _char('\n')
const END_DOCUMENT_MARK = '\\end{document}'.split('').reverse()

export const trailingContentTokenizer = new ExternalTokenizer(
  (input, stack) => {
    if (input.next === -1) return // no trailing content
    // Look back for end-document mark, bail out if any characters do not match
    for (let i = 1; i < END_DOCUMENT_MARK.length + 1; i++) {
      if (String.fromCharCode(input.peek(-i)) !== END_DOCUMENT_MARK[i - 1]) {
        return
      }
    }
    while (input.next === CHAR_SPACE || input.next === CHAR_NEWLINE) {
      const next = input.advance()
      if (next === -1) return input.acceptToken(TrailingWhitespaceOnly) // trailing whitespace only
    }
    // accept the all content up to the end of the document
    while (input.advance() !== -1) {
      //
    }
    return input.acceptToken(TrailingContent)
  }
)

const refCommands = new Set([
  '\\fullref',
  '\\Vref',
  '\\autopageref',
  '\\autoref',
  '\\eqref',
  '\\labelcpageref',
  '\\labelcref',
  '\\lcnamecref',
  '\\lcnamecrefs',
  '\\namecref',
  '\\nameCref',
  '\\namecrefs',
  '\\nameCrefs',
  '\\thnameref',
  '\\thref',
  '\\titleref',
  '\\vrefrange',
  '\\Crefrange',
  '\\Crefrang',
])

const refStarrableCommands = new Set([
  '\\vpageref',
  '\\vref',
  '\\zcpageref',
  '\\zcref',
  '\\zfullref',
  '\\zref',
  '\\zvpageref',
  '\\zvref',
  '\\cref',
  '\\Cref',
  '\\pageref',
  '\\ref',
  '\\Ref',
  '\\zpageref',
  '\\ztitleref',
  '\\vpagerefrange',
  '\\zvpagerefrange',
  '\\zvrefrange',
  '\\crefrange',
])

const citeCommands = new Set([
  '\\autocites',
  '\\Autocites',
  '\\Cite',
  '\\citeA',
  '\\citealp',
  '\\Citealp',
  '\\citealt',
  '\\Citealt',
  '\\citeauthorNP',
  '\\citeauthorp',
  '\\Citeauthorp',
  '\\citeauthort',
  '\\Citeauthort',
  '\\citeNP',
  '\\citenum',
  '\\cites',
  '\\Cites',
  '\\citeurl',
  '\\citeyearpar',
  '\\defcitealias',
  '\\fnotecite',
  '\\footcite',
  '\\footcitetext',
  '\\footfullcite',
  '\\footnotecites',
  '\\Footnotecites',
  '\\fullcite',
  '\\fullciteA',
  '\\fullciteauthor',
  '\\fullciteauthorNP',
  '\\maskcite',
  '\\maskciteA',
  '\\maskcitealp',
  '\\maskCitealp',
  '\\maskcitealt',
  '\\maskCitealt',
  '\\maskciteauthor',
  '\\maskciteauthorNP',
  '\\maskciteauthorp',
  '\\maskCiteauthorp',
  '\\maskciteauthort',
  '\\maskCiteauthort',
  '\\maskciteNP',
  '\\maskcitenum',
  '\\maskcitep',
  '\\maskCitep',
  '\\maskcitepalias',
  '\\maskcitet',
  '\\maskCitet',
  '\\maskcitetalias',
  '\\maskciteyear',
  '\\maskciteyearNP',
  '\\maskciteyearpar',
  '\\maskfullcite',
  '\\maskfullciteA',
  '\\maskfullciteauthor',
  '\\maskfullciteauthorNP',
  '\\masknocite',
  '\\maskshortcite',
  '\\maskshortciteA',
  '\\maskshortciteauthor',
  '\\maskshortciteauthorNP',
  '\\maskshortciteNP',
  '\\mautocite',
  '\\Mautocite',
  '\\mcite',
  '\\Mcite',
  '\\mfootcite',
  '\\mfootcitetext',
  '\\mparencite',
  '\\Mparencite',
  '\\msupercite',
  '\\mtextcite',
  '\\Mtextcite',
  '\\nocite',
  '\\nocitemeta',
  '\\notecite',
  '\\Parencite',
  '\\parencites',
  '\\Parencites',
  '\\pnotecite',
  '\\shortcite',
  '\\shortciteA',
  '\\shortciteauthor',
  '\\shortciteauthorNP',
  '\\shortciteNP',
  '\\smartcite',
  '\\Smartcite',
  '\\smartcites',
  '\\Smartcites',
  '\\supercite',
  '\\supercites',
  '\\textcite',
  '\\Textcite',
  '\\textcites',
  '\\Textcites',
])

const citeStarredCommands = new Set([
  '\\cite',
  '\\citeauthor',
  '\\Citeauthor',
  '\\citedate',
  '\\citep',
  '\\Citep',
  '\\citetitle',
  '\\citeyear',
  '\\parencite',
  '\\citet',
  '\\autocite',
  '\\Autocite',
])

const labelCommands = new Set(['\\label', '\\thlabel', '\\zlabel'])

const mathTextCommands = new Set(['\\text', '\\tag', '\\textrm', '\\intertext'])

const otherKnowncommands = {
  '\\hbox': HboxCtrlSeq,
  '\\title': TitleCtrlSeq,
  '\\author': AuthorCtrlSeq,
  '\\documentclass': DocumentClassCtrlSeq,
  '\\usepackage': UsePackageCtrlSeq,
  '\\href': HrefCtrlSeq,
  '\\verb': VerbCtrlSeq,
  '\\lstinline': LstInlineCtrlSeq,
  '\\includegraphics': IncludeGraphicsCtrlSeq,
  '\\caption': CaptionCtrlSeq,
  '\\def': DefCtrlSeq,
  '\\left': LeftCtrlSeq,
  '\\right': RightCtrlSeq,
  '\\newcommand': NewCommandCtrlSeq,
  '\\renewcommand': RenewCommandCtrlSeq,
  '\\newenvironment': NewEnvironmentCtrlSeq,
  '\\renewenvironment': RenewEnvironmentCtrlSeq,
  '\\book': BookCtrlSeq,
  '\\part': PartCtrlSeq,
  '\\addpart': PartCtrlSeq,
  '\\chapter': ChapterCtrlSeq,
  '\\addchap': ChapterCtrlSeq,
  '\\section': SectionCtrlSeq,
  '\\addseq': SectionCtrlSeq,
  '\\subsection': SubSectionCtrlSeq,
  '\\subsubsection': SubSubSectionCtrlSeq,
  '\\paragraph': ParagraphCtrlSeq,
  '\\subparagraph': SubParagraphCtrlSeq,
  '\\input': InputCtrlSeq,
  '\\include': IncludeCtrlSeq,
  '\\item': ItemCtrlSeq,
  '\\centering': CenteringCtrlSeq,
  '\\bibliography': BibliographyCtrlSeq,
  '\\bibliographystyle': BibliographyStyleCtrlSeq,
}
// specializer for control sequences
// return new tokens for specific control sequences
export const specializeCtrlSeq = (name, terms) => {
  if (name === '\\begin') return Begin
  if (name === '\\end') return End
  if (refCommands.has(name)) {
    return RefCtrlSeq
  }
  if (refStarrableCommands.has(name)) {
    return RefStarrableCtrlSeq
  }
  if (citeCommands.has(name)) {
    return CiteCtrlSeq
  }
  if (citeStarredCommands.has(name)) {
    return CiteStarrableCtrlSeq
  }
  if (labelCommands.has(name)) {
    return LabelCtrlSeq
  }
  if (mathTextCommands.has(name)) {
    return MathTextCtrlSeq
  }
  return otherKnowncommands[name] || -1
}

const tabularEnvNames = new Set([
  'tabular',
  'xltabular',
  'tabularx',
  'longtable',
])

const equationEnvNames = new Set([
  'equation',
  'equation*',
  'displaymath',
  'displaymath*',
  'math',
  'math*',
  'multline',
  'multline*',
  'matrix',
  'tikzcd',
])

const equationArrayEnvNames = new Set([
  'array',
  'eqnarray',
  'eqnarray*',
  'align',
  'align*',
  'alignat',
  'alignat*',
  'flalign',
  'flalign*',
  'gather',
  'gather*',
  'pmatrix',
  'pmatrix*',
  'bmatrix',
  'bmatrix*',
  'Bmatrix',
  'Bmatrix*',
  'vmatrix',
  'vmatrix*',
  'Vmatrix',
  'Vmatrix*',
  'smallmatrix',
  'smallmatrix*',
  'split',
  'split*',
  'gathered',
  'gathered*',
  'aligned',
  'aligned*',
  'alignedat',
  'alignedat*',
  'cases',
  'cases*',
  'dcases',
  'dcases*',
  'IEEEeqnarray',
  'IEEEeqnarray*',
])

const verbatimEnvNames = new Set([
  'verbatim',
  'boxedverbatim',
  'lstlisting',
  'minted',
  'Verbatim',
  'lstlisting',
  'codeexample',
  'comment',
])

const otherKnownEnvNames = {
  document: DocumentEnvName,
  tikzpicture: TikzPictureEnvName,
  figure: FigureEnvName,
  subfigure: FigureEnvName,
  enumerate: ListEnvName,
  itemize: ListEnvName,
}

export const specializeEnvName = (name, terms) => {
  if (tabularEnvNames.has(name)) {
    return TabularEnvName
  }
  if (equationEnvNames.has(name)) {
    return EquationEnvName
  }
  if (equationArrayEnvNames.has(name)) {
    return EquationArrayEnvName
  }
  if (verbatimEnvNames.has(name)) {
    return VerbatimEnvName
  }
  return otherKnownEnvNames[name] || -1
}

const otherKnownCtrlSyms = {
  '\\(': OpenParenCtrlSym,
  '\\)': CloseParenCtrlSym,
  '\\[': OpenBracketCtrlSym,
  '\\]': CloseBracketCtrlSym,
}

export const specializeCtrlSym = (name, terms) => {
  return otherKnownCtrlSyms[name] || -1
}
Move source editor out of module (#12457) * Update Copybara options in preparation for open-sourcing the source editor * Move files * Update paths * Remove source-editor module and checks for its existence * Explicitly mention CM6 license in files that contain code adapted from CM6 GitOrigin-RevId: 89b7cc2b409db01ad103198ccbd1b126ab56349b 2023-04-13 04:21:25 -04:00			`/* Hand-written tokenizer for LaTeX. */`

			`import { ExternalTokenizer, ContextTracker } from '@lezer/lr'`

			`import {`
			`LiteralArgContent,`
			`SpaceDelimitedLiteralArgContent,`
			`VerbContent,`
			`VerbatimContent,`
			`LstInlineContent,`
			`Begin,`
			`End,`
			`KnownEnvironment,`
			`MathDelimiter,`
			`Csname,`
			`TrailingWhitespaceOnly,`
			`TrailingContent,`
			`RefCtrlSeq,`
			`RefStarrableCtrlSeq,`
			`CiteCtrlSeq,`
			`CiteStarrableCtrlSeq,`
			`LabelCtrlSeq,`
			`MathTextCtrlSeq,`
			`HboxCtrlSeq,`
			`TitleCtrlSeq,`
			`AuthorCtrlSeq,`
			`DocumentClassCtrlSeq,`
			`UsePackageCtrlSeq,`
			`HrefCtrlSeq,`
			`VerbCtrlSeq,`
			`LstInlineCtrlSeq,`
			`IncludeGraphicsCtrlSeq,`
			`CaptionCtrlSeq,`
			`DefCtrlSeq,`
			`LeftCtrlSeq,`
			`RightCtrlSeq,`
			`NewCommandCtrlSeq,`
			`RenewCommandCtrlSeq,`
			`NewEnvironmentCtrlSeq,`
			`RenewEnvironmentCtrlSeq,`
			`DocumentEnvName,`
			`TabularEnvName,`
			`EquationEnvName,`
			`EquationArrayEnvName,`
			`VerbatimEnvName,`
			`TikzPictureEnvName,`
			`FigureEnvName,`
			`OpenParenCtrlSym,`
			`CloseParenCtrlSym,`
			`OpenBracketCtrlSym,`
			`CloseBracketCtrlSym,`
			`// Sectioning commands`
			`BookCtrlSeq,`
			`PartCtrlSeq,`
			`ChapterCtrlSeq,`
			`SectionCtrlSeq,`
			`SubSectionCtrlSeq,`
			`SubSubSectionCtrlSeq,`
			`ParagraphCtrlSeq,`
			`SubParagraphCtrlSeq,`
			`InputCtrlSeq,`
			`IncludeCtrlSeq,`
			`ItemCtrlSeq,`
			`BibliographyCtrlSeq,`
			`BibliographyStyleCtrlSeq,`
			`CenteringCtrlSeq,`
			`ListEnvName,`
			`} from './latex.terms.mjs'`

			`function nameChar(ch) {`
			`// we accept A-Z a-z 0-9 * + @ in environment names`
			`return (`
			`(ch >= 65 && ch <= 90) \|\|`
			`(ch >= 97 && ch <= 122) \|\|`
			`(ch >= 48 && ch <= 57) \|\|`
			`ch === 42 \|\|`
			`ch === 43 \|\|`
			`ch === 64`
			`)`
			`}`

			`// match [a-zA-Z]`
			`function alphaChar(ch) {`
			`return (ch >= 65 && ch <= 90) \|\| (ch >= 97 && ch <= 122)`
			`}`

			`let cachedName = null`
			`let cachedInput = null`
			`let cachedPos = 0`
			`function envNameAfter(input, offset) {`
			`const pos = input.pos + offset`
			`if (cachedInput === input && cachedPos === pos) {`
			`return cachedName`
			`}`
			`if (input.peek(offset) !== '{'.charCodeAt(0)) return`
			`offset++`
			`let name = ''`
			`for (;;) {`
			`const next = input.peek(offset)`
			`if (!nameChar(next)) break`
			`name += String.fromCharCode(next)`
			`offset++`
			`}`
			`cachedInput = input`
			`cachedPos = pos`
			`return (cachedName = name \|\| null)`
			`}`

			`function ElementContext(name, parent) {`
			`this.name = name`
			`this.parent = parent`
			`this.hash = parent ? parent.hash : 0`
			`for (let i = 0; i < name.length; i++)`
			`this.hash +=`
			`(this.hash << 4) + name.charCodeAt(i) + (name.charCodeAt(i) << 8)`
			`}`

			`export const elementContext = new ContextTracker({`
			`start: null,`
			`shift(context, term, stack, input) {`
			`return term === Begin`
			`? new ElementContext(envNameAfter(input, '\\begin'.length) \|\| '', context)`
			`: context`
			`},`
			`reduce(context, term) {`
			`return term === KnownEnvironment && context ? context.parent : context`
			`},`
			`reuse(context, node, _stack, input) {`
			`const type = node.type.id`
			`return type === Begin`
			`? new ElementContext(envNameAfter(input, 0) \|\| '', context)`
			`: context`
			`},`
			`hash(context) {`
			`return context ? context.hash : 0`
			`},`
			`strict: false,`
			`})`

			`// tokenizer for \verb\|...\| commands`
			`export const verbTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`if (input.next === '*'.charCodeAt(0)) input.advance()`
			`const delimiter = input.next`
			`if (delimiter === -1) return // hit end of file`
			`if (/\s\|\*/.test(String.fromCharCode(delimiter))) return // invalid delimiter`
			`input.advance()`
			`for (;;) {`
			`const next = input.next`
			`if (next === -1 \|\| next === CHAR_NEWLINE) return`
			`input.advance()`
			`if (next === delimiter) break`
			`}`
			`return input.acceptToken(VerbContent)`
			`},`
			`{ contextual: false }`
			`)`

			`// tokenizer for \lstinline\|...\| commands`
			`export const lstinlineTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`let delimiter = input.next`
			`if (delimiter === -1) return // hit end of file`
			`if (/\s/.test(String.fromCharCode(delimiter))) {`
			`return // invalid delimiter`
			`}`
			`if (delimiter === CHAR_OPEN_BRACE) {`
			`delimiter = CHAR_CLOSE_BRACE`
			`}`
			`input.advance()`
			`let content = ''`
			`for (;;) {`
			`let next = input.next`
			`if (next === -1 \|\| next === CHAR_NEWLINE) return`
			`content += String.fromCharCode(next)`
			`input.advance()`
			`if (next === delimiter) break`
			`}`
			`return input.acceptToken(LstInlineContent)`
			`},`
			`{ contextual: false }`
			`)`

			`const matchForward = (input, expected, offset = 0) => {`
			`for (let i = 0; i < expected.length; i++) {`
			`if (String.fromCharCode(input.peek(offset + i)) !== expected[i]) {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// tokenizer for \begin{verbatim}...\end{verbatim} environments`
			`export const verbatimTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`const delimiter = '\\end{' + stack.context.name + '}'`
			`let offset = 0`
			`let end = -1`
			`for (;;) {`
			`const next = input.peek(offset)`
			`if (next === -1) {`
			`end = offset - 1`
			`break`
			`}`
			`if (matchForward(input, delimiter, offset)) {`
			`// Found the end marker`
			`end = offset - 1`
			`break`
			`}`
			`offset++`
			`}`
			`return input.acceptToken(VerbatimContent, end + 1)`
			`},`
			`{ contextual: false }`
			`)`

			`// tokenizer for \href{...} and similar commands`
			`export const literalArgTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`const delimiter = '}'`
			`let content = ''`
			`let offset = 0`
			`let end = -1`
			`for (;;) {`
			`const next = input.peek(offset)`
			`if (next === -1) {`
			`end = offset - 1`
			`break`
			`}`
			`content += String.fromCharCode(next)`
			`if (content.slice(-delimiter.length) === delimiter) {`
			`// found the '}'`
			`end = offset - delimiter.length`
			`break`
			`}`
			`offset++`
			`}`
			`return input.acceptToken(LiteralArgContent, end + 1)`
			`},`
			`{ contextual: false }`
			`)`

			// tokenizer for literal content delimited by whitespace, such as in `\input foo.tex`
			`export const spaceDelimitedLiteralArgTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`let content = ''`
			`let offset = 0`
			`let end = -1`
			`for (;;) {`
			`const next = input.peek(offset)`
			`if (next === -1) {`
			`end = offset - 1`
			`break`
			`}`
			`content += String.fromCharCode(next)`
			`if (content.slice(-1) === ' ' \|\| content.slice(-1) === '\n') {`
			`// found the whitespace`
			`end = offset - 1`
			`break`
			`}`
			`offset++`
			`}`
			`return input.acceptToken(SpaceDelimitedLiteralArgContent, end + 1)`
			`},`
			`{ contextual: false }`
			`)`

			`// helper function to look up charCodes`
			`function _char(s) {`
			`return s.charCodeAt(0)`
			`}`

			`// Allowed delimiters, from the LaTeX manual, table 3.10`
			`// ( ) [ ] / \| \{ \} \\| and additional names below`
			`// The empty delimiter . is also allowed`

			`const CHAR_SLASH = _char('/')`
			`const CHAR_PIPE = _char('\|')`
			`const CHAR_OPEN_PAREN = _char('(')`
			`const CHAR_CLOSE_PAREN = _char(')')`
			`const CHAR_OPEN_BRACKET = _char('[')`
			`const CHAR_CLOSE_BRACKET = _char(']')`
			`const CHAR_FULL_STOP = _char('.')`
			`const CHAR_BACKSLASH = _char('\\')`
			`const CHAR_OPEN_BRACE = _char('{')`
			`const CHAR_CLOSE_BRACE = _char('}')`

			`const ALLOWED_DELIMITER_NAMES = [`
			`'lfloor',`
			`'rfloor',`
			`'lceil',`
			`'rceil',`
			`'langle',`
			`'rangle',`
			`'backslash',`
			`'uparrow',`
			`'downarrow',`
			`'Uparrow',`
			`'Downarrow',`
			`'updownarrow',`
			`'Updownarrow',`
			`'lvert',`
			`'rvert',`
			`'lVert',`
			`'rVert',`
			`]`

			`// Given a list of allowed command names, return those with leading characters that are the same as the matchString`
			`function findPartialMatches(list, matchString) {`
			`const size = matchString.length`
			`return list.filter(`
			`entry => entry.length >= size && entry.substring(0, size) === matchString`
			`)`
			`}`

			`// tokenizer for \leftX ... \rightX delimiter tokens`
			`export const mathDelimiterTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`let content = ''`
			`let offset = 0`
			`let end = -1`
			`// look at the first character, we only accept the following /\|()[].`
			`let next = input.peek(offset)`
			`if (next === -1) {`
			`return`
			`}`
			`if (`
			`next === CHAR_SLASH \|\|`
			`next === CHAR_PIPE \|\|`
			`next === CHAR_OPEN_PAREN \|\|`
			`next === CHAR_CLOSE_PAREN \|\|`
			`next === CHAR_OPEN_BRACKET \|\|`
			`next === CHAR_CLOSE_BRACKET \|\|`
			`next === CHAR_FULL_STOP`
			`) {`
			`return input.acceptToken(MathDelimiter, 1)`
			`}`
			`// reject anything else not starting with a backslash,`
			`// we only accept control symbols or control sequences`
			`if (next !== CHAR_BACKSLASH) {`
			`return`
			`}`
			`// look at the second character, we only accept \{ and \} and \\| as control symbols`
			`offset++`
			`next = input.peek(offset)`
			`if (next === -1) {`
			`return`
			`}`
			`if (`
			`next === CHAR_OPEN_BRACE \|\|`
			`next === CHAR_CLOSE_BRACE \|\|`
			`next === CHAR_PIPE`
			`) {`
			`return input.acceptToken(MathDelimiter, 2)`
			`}`
			`// We haven't matched any symbols, so now try matching command names.`
			`// Is this character a potential match to the remaining allowed delimiter names?`
			`content = String.fromCharCode(next)`
			`let candidates = findPartialMatches(ALLOWED_DELIMITER_NAMES, content)`
			`if (!candidates.length) return`
			`// we have some candidates, look at subsequent characters`
			`offset++`
			`for (;;) {`
			`const next = input.peek(offset)`
			`// stop when we reach the end of file or a non-alphabetic character`
			`if (next === -1 \|\| !nameChar(next)) {`
			`end = offset - 1`
			`break`
			`}`
			`content += String.fromCharCode(next)`
			`// find how many candidates remain with the new input`
			`candidates = findPartialMatches(candidates, content)`
			`if (!candidates.length) return // no matches remaining`
			`end = offset`
			`offset++`
			`}`
			`if (!candidates.includes(content)) return // not a valid delimiter`
			`// accept the content as a valid delimiter`
			`return input.acceptToken(MathDelimiter, end + 1)`
			`},`
			`{ contextual: false }`
			`)`

			`const CHAR_AT_SYMBOL = _char('@')`

			`export const csnameTokenizer = new ExternalTokenizer((input, stack) => {`
			`let offset = 0`
			`let end = -1`
			`// look at the first character, we are looking for acceptable control sequence names`
			`// including @ signs, \\[a-zA-Z@]+`
			`const next = input.peek(offset)`
			`if (next === -1) {`
			`return`
			`}`
			`// reject anything not starting with a backslash,`
			`// we only accept control sequences`
			`if (next !== CHAR_BACKSLASH) {`
			`return`
			`}`
			`offset++`
			`for (;;) {`
			`const next = input.peek(offset)`
			`// stop when we reach the end of file or a non-csname character`
			`if (next === -1 \|\| !(alphaChar(next) \|\| next === CHAR_AT_SYMBOL)) {`
			`end = offset - 1`
			`break`
			`}`
			`end = offset`
			`offset++`
			`}`
			`if (end === -1) return`
			`// accept the content as a valid control sequence`
			`return input.acceptToken(Csname, end + 1)`
			`})`

			`const CHAR_SPACE = _char(' ')`
			`const CHAR_NEWLINE = _char('\n')`
			`const END_DOCUMENT_MARK = '\\end{document}'.split('').reverse()`

			`export const trailingContentTokenizer = new ExternalTokenizer(`
			`(input, stack) => {`
			`if (input.next === -1) return // no trailing content`
			`// Look back for end-document mark, bail out if any characters do not match`
			`for (let i = 1; i < END_DOCUMENT_MARK.length + 1; i++) {`
			`if (String.fromCharCode(input.peek(-i)) !== END_DOCUMENT_MARK[i - 1]) {`
			`return`
			`}`
			`}`
			`while (input.next === CHAR_SPACE \|\| input.next === CHAR_NEWLINE) {`
			`const next = input.advance()`
			`if (next === -1) return input.acceptToken(TrailingWhitespaceOnly) // trailing whitespace only`
			`}`
			`// accept the all content up to the end of the document`
			`while (input.advance() !== -1) {`
			`//`
			`}`
			`return input.acceptToken(TrailingContent)`
			`}`
			`)`

			`const refCommands = new Set([`
			`'\\fullref',`
			`'\\Vref',`
			`'\\autopageref',`
			`'\\autoref',`
			`'\\eqref',`
			`'\\labelcpageref',`
			`'\\labelcref',`
			`'\\lcnamecref',`
			`'\\lcnamecrefs',`
			`'\\namecref',`
			`'\\nameCref',`
			`'\\namecrefs',`
			`'\\nameCrefs',`
			`'\\thnameref',`
			`'\\thref',`
			`'\\titleref',`
			`'\\vrefrange',`
			`'\\Crefrange',`
			`'\\Crefrang',`
			`])`

			`const refStarrableCommands = new Set([`
			`'\\vpageref',`
			`'\\vref',`
			`'\\zcpageref',`
			`'\\zcref',`
			`'\\zfullref',`
			`'\\zref',`
			`'\\zvpageref',`
			`'\\zvref',`
			`'\\cref',`
			`'\\Cref',`
			`'\\pageref',`
			`'\\ref',`
			`'\\Ref',`
			`'\\zpageref',`
			`'\\ztitleref',`
			`'\\vpagerefrange',`
			`'\\zvpagerefrange',`
			`'\\zvrefrange',`
			`'\\crefrange',`
			`])`

			`const citeCommands = new Set([`
			`'\\autocites',`
			`'\\Autocites',`
			`'\\Cite',`
			`'\\citeA',`
			`'\\citealp',`
			`'\\Citealp',`
			`'\\citealt',`
			`'\\Citealt',`
			`'\\citeauthorNP',`
			`'\\citeauthorp',`
			`'\\Citeauthorp',`
			`'\\citeauthort',`
			`'\\Citeauthort',`
			`'\\citeNP',`
			`'\\citenum',`
			`'\\cites',`
			`'\\Cites',`
			`'\\citeurl',`
			`'\\citeyearpar',`
			`'\\defcitealias',`
			`'\\fnotecite',`
			`'\\footcite',`
			`'\\footcitetext',`
			`'\\footfullcite',`
			`'\\footnotecites',`
			`'\\Footnotecites',`
			`'\\fullcite',`
			`'\\fullciteA',`
			`'\\fullciteauthor',`
			`'\\fullciteauthorNP',`
			`'\\maskcite',`
			`'\\maskciteA',`
			`'\\maskcitealp',`
			`'\\maskCitealp',`
			`'\\maskcitealt',`
			`'\\maskCitealt',`
			`'\\maskciteauthor',`
			`'\\maskciteauthorNP',`
			`'\\maskciteauthorp',`
			`'\\maskCiteauthorp',`
			`'\\maskciteauthort',`
			`'\\maskCiteauthort',`
			`'\\maskciteNP',`
			`'\\maskcitenum',`
			`'\\maskcitep',`
			`'\\maskCitep',`
			`'\\maskcitepalias',`
			`'\\maskcitet',`
			`'\\maskCitet',`
			`'\\maskcitetalias',`
			`'\\maskciteyear',`
			`'\\maskciteyearNP',`
			`'\\maskciteyearpar',`
			`'\\maskfullcite',`
			`'\\maskfullciteA',`
			`'\\maskfullciteauthor',`
			`'\\maskfullciteauthorNP',`
			`'\\masknocite',`
			`'\\maskshortcite',`
			`'\\maskshortciteA',`
			`'\\maskshortciteauthor',`
			`'\\maskshortciteauthorNP',`
			`'\\maskshortciteNP',`
			`'\\mautocite',`
			`'\\Mautocite',`
			`'\\mcite',`
			`'\\Mcite',`
			`'\\mfootcite',`
			`'\\mfootcitetext',`
			`'\\mparencite',`
			`'\\Mparencite',`
			`'\\msupercite',`
			`'\\mtextcite',`
			`'\\Mtextcite',`
			`'\\nocite',`
			`'\\nocitemeta',`
			`'\\notecite',`
			`'\\Parencite',`
			`'\\parencites',`
			`'\\Parencites',`
			`'\\pnotecite',`
			`'\\shortcite',`
			`'\\shortciteA',`
			`'\\shortciteauthor',`
			`'\\shortciteauthorNP',`
			`'\\shortciteNP',`
			`'\\smartcite',`
			`'\\Smartcite',`
			`'\\smartcites',`
			`'\\Smartcites',`
			`'\\supercite',`
			`'\\supercites',`
			`'\\textcite',`
			`'\\Textcite',`
			`'\\textcites',`
			`'\\Textcites',`
			`])`

			`const citeStarredCommands = new Set([`
			`'\\cite',`
			`'\\citeauthor',`
			`'\\Citeauthor',`
			`'\\citedate',`
			`'\\citep',`
			`'\\Citep',`
			`'\\citetitle',`
			`'\\citeyear',`
			`'\\parencite',`
			`'\\citet',`
			`'\\autocite',`
			`'\\Autocite',`
			`])`

			`const labelCommands = new Set(['\\label', '\\thlabel', '\\zlabel'])`

			`const mathTextCommands = new Set(['\\text', '\\tag', '\\textrm', '\\intertext'])`

			`const otherKnowncommands = {`
			`'\\hbox': HboxCtrlSeq,`
			`'\\title': TitleCtrlSeq,`
			`'\\author': AuthorCtrlSeq,`
			`'\\documentclass': DocumentClassCtrlSeq,`
			`'\\usepackage': UsePackageCtrlSeq,`
			`'\\href': HrefCtrlSeq,`
			`'\\verb': VerbCtrlSeq,`
			`'\\lstinline': LstInlineCtrlSeq,`
			`'\\includegraphics': IncludeGraphicsCtrlSeq,`
			`'\\caption': CaptionCtrlSeq,`
			`'\\def': DefCtrlSeq,`
			`'\\left': LeftCtrlSeq,`
			`'\\right': RightCtrlSeq,`
			`'\\newcommand': NewCommandCtrlSeq,`
			`'\\renewcommand': RenewCommandCtrlSeq,`
			`'\\newenvironment': NewEnvironmentCtrlSeq,`
			`'\\renewenvironment': RenewEnvironmentCtrlSeq,`
			`'\\book': BookCtrlSeq,`
			`'\\part': PartCtrlSeq,`
			`'\\addpart': PartCtrlSeq,`
			`'\\chapter': ChapterCtrlSeq,`
			`'\\addchap': ChapterCtrlSeq,`
			`'\\section': SectionCtrlSeq,`
			`'\\addseq': SectionCtrlSeq,`
			`'\\subsection': SubSectionCtrlSeq,`
			`'\\subsubsection': SubSubSectionCtrlSeq,`
			`'\\paragraph': ParagraphCtrlSeq,`
			`'\\subparagraph': SubParagraphCtrlSeq,`
			`'\\input': InputCtrlSeq,`
			`'\\include': IncludeCtrlSeq,`
			`'\\item': ItemCtrlSeq,`
			`'\\centering': CenteringCtrlSeq,`
			`'\\bibliography': BibliographyCtrlSeq,`
			`'\\bibliographystyle': BibliographyStyleCtrlSeq,`
			`}`
			`// specializer for control sequences`
			`// return new tokens for specific control sequences`
			`export const specializeCtrlSeq = (name, terms) => {`
			`if (name === '\\begin') return Begin`
			`if (name === '\\end') return End`
			`if (refCommands.has(name)) {`
			`return RefCtrlSeq`
			`}`
			`if (refStarrableCommands.has(name)) {`
			`return RefStarrableCtrlSeq`
			`}`
			`if (citeCommands.has(name)) {`
			`return CiteCtrlSeq`
			`}`
			`if (citeStarredCommands.has(name)) {`
			`return CiteStarrableCtrlSeq`
			`}`
			`if (labelCommands.has(name)) {`
			`return LabelCtrlSeq`
			`}`
			`if (mathTextCommands.has(name)) {`
			`return MathTextCtrlSeq`
			`}`
			`return otherKnowncommands[name] \|\| -1`
			`}`

			`const tabularEnvNames = new Set([`
			`'tabular',`
			`'xltabular',`
			`'tabularx',`
			`'longtable',`
			`])`

			`const equationEnvNames = new Set([`
			`'equation',`
			`'equation*',`
			`'displaymath',`
			`'displaymath*',`
			`'math',`
			`'math*',`
			`'multline',`
			`'multline*',`
			`'matrix',`
			`'tikzcd',`
			`])`

			`const equationArrayEnvNames = new Set([`
			`'array',`
			`'eqnarray',`
			`'eqnarray*',`
			`'align',`
			`'align*',`
			`'alignat',`
			`'alignat*',`
			`'flalign',`
			`'flalign*',`
			`'gather',`
			`'gather*',`
			`'pmatrix',`
			`'pmatrix*',`
			`'bmatrix',`
			`'bmatrix*',`
			`'Bmatrix',`
			`'Bmatrix*',`
			`'vmatrix',`
			`'vmatrix*',`
			`'Vmatrix',`
			`'Vmatrix*',`
			`'smallmatrix',`
			`'smallmatrix*',`
			`'split',`
			`'split*',`
			`'gathered',`
			`'gathered*',`
			`'aligned',`
			`'aligned*',`
			`'alignedat',`
			`'alignedat*',`
			`'cases',`
			`'cases*',`
			`'dcases',`
			`'dcases*',`
			`'IEEEeqnarray',`
			`'IEEEeqnarray*',`
			`])`

			`const verbatimEnvNames = new Set([`
			`'verbatim',`
			`'boxedverbatim',`
			`'lstlisting',`
			`'minted',`
			`'Verbatim',`
			`'lstlisting',`
			`'codeexample',`
			`'comment',`
			`])`

			`const otherKnownEnvNames = {`
			`document: DocumentEnvName,`
			`tikzpicture: TikzPictureEnvName,`
			`figure: FigureEnvName,`
			`subfigure: FigureEnvName,`
			`enumerate: ListEnvName,`
			`itemize: ListEnvName,`
			`}`

			`export const specializeEnvName = (name, terms) => {`
			`if (tabularEnvNames.has(name)) {`
			`return TabularEnvName`
			`}`
			`if (equationEnvNames.has(name)) {`
			`return EquationEnvName`
			`}`
			`if (equationArrayEnvNames.has(name)) {`
			`return EquationArrayEnvName`
			`}`
			`if (verbatimEnvNames.has(name)) {`
			`return VerbatimEnvName`
			`}`
			`return otherKnownEnvNames[name] \|\| -1`
			`}`

			`const otherKnownCtrlSyms = {`
			`'\\(': OpenParenCtrlSym,`
			`'\\)': CloseParenCtrlSym,`
			`'\\[': OpenBracketCtrlSym,`
			`'\\]': CloseBracketCtrlSym,`
			`}`

			`export const specializeCtrlSym = (name, terms) => {`
			`return otherKnownCtrlSyms[name] \|\| -1`
			`}`