Merge pull request #19050 from overleaf/mj-lezer-parse-comments-in-unknown-commands

[lezer] Introduce hasMoreArguments token for argument parsing

GitOrigin-RevId: a5898a2be01c19a39de15c784f184fe61140799a
This commit is contained in:
Mathias Jakobsen 2024-06-25 09:09:05 +01:00 committed by Copybot
parent 46a160f127
commit ddfdafd54c
2 changed files with 19 additions and 9 deletions

View file

@ -694,12 +694,13 @@ MathCommand {
| KnownCtrlSym
}
@external tokens endOfArgumentListTokenizer from "./tokens.mjs" {
@external tokens argumentListTokenizer from "./tokens.mjs" {
hasMoreArguments,
endOfArguments
}
MathUnknownCommand {
CtrlSeq (optionalWhitespace? MathArgument)* optionalWhitespace? endOfArguments
CtrlSeq (hasMoreArguments optionalWhitespace? MathArgument)* endOfArguments
| CtrlSym
}

View file

@ -81,8 +81,11 @@ import {
MultiColumnCtrlSeq,
// Marker for end of argument lists
endOfArguments,
hasMoreArguments,
} from './latex.terms.mjs'
const MAX_ARGUMENT_LOOKAHEAD = 100
function nameChar(ch) {
// we accept A-Z a-z 0-9 * + @ in environment names
return (
@ -256,14 +259,20 @@ const CHAR_TAB = _char('\t')
const CHAR_SPACE = _char(' ')
const CHAR_NEWLINE = _char('\n')
export const endOfArgumentListTokenizer = new ExternalTokenizer(
export const argumentListTokenizer = new ExternalTokenizer(
input => {
const { next } = input
if (next === CHAR_SPACE || next === CHAR_TAB) {
return
}
if (next !== CHAR_OPEN_BRACE) {
input.acceptToken(endOfArguments)
for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) {
const next = input.peek(i)
if (next === CHAR_SPACE || next === CHAR_TAB) {
continue
}
if (next === CHAR_OPEN_BRACE) {
input.acceptToken(hasMoreArguments)
return
} else {
input.acceptToken(endOfArguments)
return
}
}
},
{ contextual: false, fallback: true }