Merge pull request #19119 from overleaf/mj-lezer-definition-unknown-commands

[lezer] Parse unknown commands in definitions

GitOrigin-RevId: 586bf9e0de8e1d9e7903dd2fad8638cb2624ed9e
This commit is contained in:
Mathias Jakobsen 2024-06-26 09:59:40 +01:00 committed by Copybot
parent 26d7524c93
commit 3b5af94404
2 changed files with 61 additions and 27 deletions

View file

@ -205,7 +205,7 @@ BareFilePathArgument {
}
DefinitionArgument {
!argument NewLine? Whitespace* OpenBrace DefinitionFragment CloseBrace
!argument NewLine? Whitespace* OpenBrace DefinitionFragment? CloseBrace
}
MacroParameter {
@ -481,12 +481,10 @@ TikzPictureContent { /// same as Text but with added allowed characters
}
DefinitionFragment {
( KnownCommand<TextArgument>
| CtrlSeq optionalWhitespace?
| CtrlSym
( DefinitionFragmentCommand
| Begin
| End
| NonEmptyGroup<DefinitionFragment>
| Group<DefinitionFragment>
| Dollar
| OpenParenCtrlSym
| CloseParenCtrlSym
@ -494,7 +492,6 @@ DefinitionFragment {
| CloseBracketCtrlSym
| LeftCtrlSeq
| RightCtrlSeq
| KnownCtrlSym
| BlankLine
| NewLine
| Normal
@ -515,8 +512,17 @@ DefinitionFragment {
ParagraphCtrlSeq |
SubParagraphCtrlSeq
>
)*
)+
}
DefinitionFragmentArgument {
OpenBrace DefinitionFragment? CloseBrace
}
DefinitionFragmentCommand {
KnownCommand<TextArgument>
| UnknownCommand { genericUnknownCommandWithOptionalArguments<DefinitionFragmentArgument, OptionalArgument> }
| KnownCtrlSym
}
KnownEnvironment {
@ -690,7 +696,7 @@ Math {
MathCommand {
KnownCommand<MathArgument>
| MathUnknownCommand
| MathUnknownCommand { genericUnknownCommand<MathArgument> }
| KnownCtrlSym
}
@ -699,8 +705,18 @@ MathCommand {
endOfArguments
}
MathUnknownCommand {
CtrlSeq (hasMoreArguments optionalWhitespace? MathArgument)* endOfArguments
@external tokens argumentListWithOptionalTokenizer from "./tokens.mjs" {
hasMoreArgumentsOrOptionals,
endOfArgumentsAndOptionals
}
genericUnknownCommand<ArgumentType> {
CtrlSeq (hasMoreArguments optionalWhitespace? ArgumentType)* endOfArguments
| CtrlSym
}
genericUnknownCommandWithOptionalArguments<ArgumentType, OptionalArgumentType> {
CtrlSeq (hasMoreArgumentsOrOptionals optionalWhitespace? (ArgumentType | OptionalArgumentType))* endOfArgumentsAndOptionals
| CtrlSym
}

View file

@ -82,6 +82,8 @@ import {
// Marker for end of argument lists
endOfArguments,
hasMoreArguments,
hasMoreArgumentsOrOptionals,
endOfArgumentsAndOptionals,
} from './latex.terms.mjs'
const MAX_ARGUMENT_LOOKAHEAD = 100
@ -254,29 +256,45 @@ function _char(s) {
const CHAR_BACKSLASH = _char('\\')
const CHAR_OPEN_BRACE = _char('{')
const CHAR_OPEN_BRACKET = _char('[')
const CHAR_CLOSE_BRACE = _char('}')
const CHAR_TAB = _char('\t')
const CHAR_SPACE = _char(' ')
const CHAR_NEWLINE = _char('\n')
export const argumentListTokenizer = new ExternalTokenizer(
input => {
for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) {
const next = input.peek(i)
if (next === CHAR_SPACE || next === CHAR_TAB) {
continue
const lookaheadTokenizer = getToken =>
new ExternalTokenizer(
input => {
for (let i = 0; i < MAX_ARGUMENT_LOOKAHEAD; ++i) {
const next = input.peek(i)
if (next === CHAR_SPACE || next === CHAR_TAB) {
continue
}
const token = getToken(next)
if (token) {
input.acceptToken(token)
return
}
}
if (next === CHAR_OPEN_BRACE) {
input.acceptToken(hasMoreArguments)
return
} else {
input.acceptToken(endOfArguments)
return
}
}
},
{ contextual: false, fallback: true }
)
},
{ contextual: false, fallback: true }
)
export const argumentListTokenizer = lookaheadTokenizer(next => {
if (next === CHAR_OPEN_BRACE) {
return hasMoreArguments
} else {
return endOfArguments
}
})
export const argumentListWithOptionalTokenizer = lookaheadTokenizer(next => {
if (next === CHAR_OPEN_BRACE || next === CHAR_OPEN_BRACKET) {
return hasMoreArgumentsOrOptionals
} else {
return endOfArgumentsAndOptionals
}
})
const CHAR_AT_SYMBOL = _char('@')