overleaf/services/web/frontend/js/features/source-editor/lezer-latex/latex.grammar
Mathias Jakobsen ddfdafd54c Merge pull request #19050 from overleaf/mj-lezer-parse-comments-in-unknown-commands
[lezer] Introduce hasMoreArguments token for argument parsing

GitOrigin-RevId: a5898a2be01c19a39de15c784f184fe61140799a
2024-06-25 11:12:18 +00:00

750 lines
18 KiB
Text

// Track environments
@context elementContext from "./tokens.mjs"
// External tokens must be defined before normal @tokens to take precedence
// over them.
@external tokens verbTokenizer from "./tokens.mjs" {
VerbContent
}
@external tokens lstinlineTokenizer from "./tokens.mjs" {
LstInlineContent
}
@external tokens literalArgTokenizer from "./tokens.mjs" {
LiteralArgContent
}
@external tokens spaceDelimitedLiteralArgTokenizer from "./tokens.mjs" {
SpaceDelimitedLiteralArgContent
}
@external tokens verbatimTokenizer from "./tokens.mjs" {
VerbatimContent
}
// external tokenizer to read control sequence names including @ signs
// (which are often used in TeX definitions).
@external tokens csnameTokenizer from "./tokens.mjs" {
Csname
}
@external tokens trailingContentTokenizer from "./tokens.mjs" {
TrailingWhitespaceOnly,
TrailingContent
}
// It doesn't seem to be possible to access specialized tokens in the context tracker.
// They have id's which are not exported in the latex.terms.js file.
// This is a workaround: use an external specializer to explicitly choose the terms
// to use for the specialized tokens.
@external specialize {CtrlSeq} specializeCtrlSeq from "./tokens.mjs" {
Begin,
End,
RefCtrlSeq,
RefStarrableCtrlSeq,
CiteCtrlSeq,
CiteStarrableCtrlSeq,
LabelCtrlSeq,
MathTextCtrlSeq,
HboxCtrlSeq,
TitleCtrlSeq,
DocumentClassCtrlSeq,
UsePackageCtrlSeq,
HrefCtrlSeq,
UrlCtrlSeq,
VerbCtrlSeq,
LstInlineCtrlSeq,
IncludeGraphicsCtrlSeq,
CaptionCtrlSeq,
DefCtrlSeq,
LeftCtrlSeq,
RightCtrlSeq,
NewCommandCtrlSeq,
RenewCommandCtrlSeq,
NewEnvironmentCtrlSeq,
RenewEnvironmentCtrlSeq,
// services/web/frontend/js/features/outline/outline-parser.js
BookCtrlSeq,
PartCtrlSeq,
ChapterCtrlSeq,
SectionCtrlSeq,
SubSectionCtrlSeq,
SubSubSectionCtrlSeq,
ParagraphCtrlSeq,
SubParagraphCtrlSeq,
InputCtrlSeq,
IncludeCtrlSeq,
ItemCtrlSeq,
NewTheoremCtrlSeq,
TheoremStyleCtrlSeq,
CenteringCtrlSeq,
BibliographyCtrlSeq,
BibliographyStyleCtrlSeq,
AuthorCtrlSeq,
AffilCtrlSeq,
AffiliationCtrlSeq,
MaketitleCtrlSeq,
TextColorCtrlSeq,
ColorBoxCtrlSeq,
HLineCtrlSeq,
TopRuleCtrlSeq,
MidRuleCtrlSeq,
BottomRuleCtrlSeq,
MultiColumnCtrlSeq
}
@external specialize {EnvName} specializeEnvName from "./tokens.mjs" {
DocumentEnvName,
TabularEnvName,
EquationEnvName,
EquationArrayEnvName,
VerbatimEnvName,
TikzPictureEnvName,
FigureEnvName,
ListEnvName,
TableEnvName
}
@external specialize {CtrlSym} specializeCtrlSym from "./tokens.mjs" {
OpenParenCtrlSym,
CloseParenCtrlSym,
OpenBracketCtrlSym,
CloseBracketCtrlSym,
LineBreakCtrlSym
}
@tokens {
CtrlSeq { "\\" $[a-zA-Z]+ }
CtrlSym { "\\" ![a-zA-Z] }
// tokens for paragraphs
Whitespace { $[ \t]+ }
NewLine { "\n" }
BlankLine { "\n" "\n"+ }
Normal { ![\\{}\[\]$&~#^_% \t\n] ![\\{}\[\]$&~#^_%\t\n]* } // everything is normal text, except these characters
@precedence { CtrlSeq, CtrlSym, BlankLine, NewLine, Whitespace, Normal }
OpenBrace[closedBy=CloseBrace] { "{" }
CloseBrace[openedBy=OpenBrace] { "}" }
OpenBracket[closedBy=CloseBracket] { "[" }
CloseBracket[openedBy=OpenBracket] { "]" }
Comment { "%" ![\n]* "\n"? }
Dollar { "$" }
Number { $[0-9]+ ("." $[0-9]*)? }
MathSpecialChar { $[^_=<>()\-+/*]+ } // FIXME not all of these are special
MathChar { ![0-9^_=<>()\-+/*\\{}\[\]$%&~ \t\n]+ }
@precedence { Number, MathSpecialChar, MathChar }
Ampersand { "&" }
Tilde { "~" }
EnvName { $[a-zA-Z]+ $[*]? }
}
@top LaTeX {
Text
}
@skip { Comment }
// TEXT MODE
optionalWhitespace {
!argument Whitespace
}
OptionalArgument {
!argument OpenBracket ShortOptionalArg CloseBracket
}
TextArgument {
!argument OpenBrace LongArg CloseBrace
}
SectioningArgument {
!argument OpenBrace LongArg CloseBrace
}
LabelArgument {
!argument ShortTextArgument
}
RefArgument {
!argument ShortTextArgument
}
BibKeyArgument {
!argument ShortTextArgument
}
PackageArgument {
!argument ShortTextArgument
}
TabularArgument {
!argument OpenBrace TabularContent CloseBrace
}
UrlArgument {
OpenBrace LiteralArgContent CloseBrace
}
FilePathArgument {
OpenBrace LiteralArgContent CloseBrace
}
BareFilePathArgument {
Whitespace SpaceDelimitedLiteralArgContent
}
DefinitionArgument {
!argument NewLine? Whitespace* OpenBrace DefinitionFragment CloseBrace
}
MacroParameter {
"#" ("1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9")
}
OptionalMacroParameter {
OpenBracket MacroParameter CloseBracket
}
// The autocompletion code in services/web/frontend/js/features/source-editor/utils/tree-operations/commands.ts
// depends on following the `KnownCommand { Command { CommandCtrlSeq [args] } }`
// structure
KnownCommand<ArgumentType> {
Title {
TitleCtrlSeq optionalWhitespace? OptionalArgument? TextArgument
} |
Author {
AuthorCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
} |
Affil {
AffilCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
} |
Affiliation {
AffiliationCtrlSeq optionalWhitespace? OptionalArgument? optionalWhitespace? TextArgument
} |
DocumentClass {
DocumentClassCtrlSeq optionalWhitespace? OptionalArgument?
DocumentClassArgument { ShortTextArgument }
} |
BibliographyCommand {
BibliographyCtrlSeq optionalWhitespace?
BibliographyArgument { ShortTextArgument }
} |
BibliographyStyleCommand {
BibliographyStyleCtrlSeq optionalWhitespace?
BibliographyStyleArgument { ShortTextArgument }
} |
UsePackage {
UsePackageCtrlSeq optionalWhitespace? OptionalArgument?
PackageArgument
} |
TextColorCommand {
TextColorCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
} |
ColorBoxCommand {
ColorBoxCtrlSeq optionalWhitespace? ShortTextArgument optionalWhitespace? ArgumentType
} |
HrefCommand {
HrefCtrlSeq optionalWhitespace? UrlArgument ShortTextArgument
} |
NewTheoremCommand {
NewTheoremCtrlSeq "*"? optionalWhitespace? ShortTextArgument ((OptionalArgument? TextArgument) | (TextArgument OptionalArgument))
} |
TheoremStyleCommand {
TheoremStyleCtrlSeq optionalWhitespace? ShortTextArgument
} |
UrlCommand {
UrlCtrlSeq optionalWhitespace? UrlArgument
} |
VerbCommand {
VerbCtrlSeq VerbContent
} |
LstInlineCommand {
LstInlineCtrlSeq optionalWhitespace? OptionalArgument? LstInlineContent
} |
IncludeGraphics {
IncludeGraphicsCtrlSeq optionalWhitespace? OptionalArgument?
IncludeGraphicsArgument { FilePathArgument }
} |
Caption {
CaptionCtrlSeq "*"? optionalWhitespace? OptionalArgument? TextArgument
} |
Label {
LabelCtrlSeq optionalWhitespace? LabelArgument
} |
Ref {
(RefCtrlSeq | RefStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? RefArgument
} |
Cite {
(CiteCtrlSeq | CiteStarrableCtrlSeq "*"?) optionalWhitespace? OptionalArgument? optionalWhitespace? OptionalArgument? optionalWhitespace? BibKeyArgument
} |
Def {
// allow more general Csname argument to \def commands, since other symbols such as '@' are often used in definitions
DefCtrlSeq optionalWhitespace? (Csname | CtrlSym) optionalWhitespace? (MacroParameter | OptionalMacroParameter)* optionalWhitespace? DefinitionArgument
} |
Hbox {
HboxCtrlSeq optionalWhitespace? TextArgument
} |
NewCommand {
NewCommandCtrlSeq optionalWhitespace?
(Csname | OpenBrace LiteralArgContent CloseBrace)
(OptionalArgument)*
DefinitionArgument
} |
RenewCommand {
RenewCommandCtrlSeq optionalWhitespace?
(Csname | OpenBrace LiteralArgContent CloseBrace)
(OptionalArgument)*
DefinitionArgument
} |
NewEnvironment {
NewEnvironmentCtrlSeq optionalWhitespace?
(OpenBrace LiteralArgContent CloseBrace)
(OptionalArgument)*
DefinitionArgument
DefinitionArgument
} |
RenewEnvironment {
RenewEnvironmentCtrlSeq optionalWhitespace?
(Csname | OpenBrace LiteralArgContent CloseBrace)
(OptionalArgument)*
DefinitionArgument
DefinitionArgument
} |
Input {
InputCtrlSeq InputArgument { ( FilePathArgument | BareFilePathArgument ) }
} |
Include {
IncludeCtrlSeq IncludeArgument { FilePathArgument }
} |
Centering {
CenteringCtrlSeq
} |
Item {
ItemCtrlSeq OptionalArgument? optionalWhitespace?
} |
Maketitle {
MaketitleCtrlSeq optionalWhitespace?
} |
HorizontalLine {
(HLineCtrlSeq | TopRuleCtrlSeq | MidRuleCtrlSeq | BottomRuleCtrlSeq) optionalWhitespace?
} |
MultiColumn {
MultiColumnCtrlSeq
optionalWhitespace? SpanArgument { ShortTextArgument }
optionalWhitespace? ColumnArgument { ShortTextArgument }
optionalWhitespace? TabularArgument
} |
MathTextCommand {
MathTextCtrlSeq optionalWhitespace? "*"? TextArgument
}
}
UnknownCommand {
(CtrlSeq !argument Whitespace (OptionalArgument | TextArgument)+)
| (CtrlSeq (OptionalArgument | TextArgument)+)
| CtrlSeq Whitespace?
| CtrlSym
}
Command {
KnownCommand<TextArgument>
| UnknownCommand
| KnownCtrlSym
// Not technically allowed in normal mode, but not worth failing the parse over
| LeftCtrlSeq
| RightCtrlSeq
}
KnownCtrlSym {
LineBreak {
LineBreakCtrlSym OptionalArgument?
}
}
textBase {
( Command
| DollarMath
| BracketMath
| ParenMath
| NewLine
| Normal
| Whitespace
| Ampersand
| Tilde
)
}
textWithBrackets {
( textBase
| OpenBracket
| CloseBracket
)
}
textWithEnvironmentsAndBlankLines {
( BlankLine
| KnownEnvironment
| Environment
| textWithBrackets
)
}
textWithGroupsEnvironmentsAndBlankLines {
textWithEnvironmentsAndBlankLines
| Group<Text>
}
Content<Element> {
Element
}
SectioningCommand<Command> {
Command optionalWhitespace? "*"? optionalWhitespace? OptionalArgument? optionalWhitespace? SectioningArgument
}
documentSection<Command, Next> {
SectioningCommand<Command> Content<(sectionText | !section Next)*>
}
Book[@isGroup="$SectioningCommand"] { documentSection<BookCtrlSeq, Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
Part[@isGroup="$SectioningCommand"] { documentSection<PartCtrlSeq, Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
Chapter[@isGroup="$SectioningCommand"] { documentSection<ChapterCtrlSeq, Section | SubSection | SubSubSection | Paragraph | SubParagraph> }
Section[@isGroup="$SectioningCommand"] { documentSection<SectionCtrlSeq, SubSection | SubSubSection | Paragraph | SubParagraph> }
SubSection[@isGroup="$SectioningCommand"] { documentSection<SubSectionCtrlSeq, SubSubSection | Paragraph | SubParagraph> }
SubSubSection[@isGroup="$SectioningCommand"] { documentSection<SubSubSectionCtrlSeq, Paragraph | SubParagraph> }
Paragraph[@isGroup="$SectioningCommand"] { documentSection<ParagraphCtrlSeq, SubParagraph> }
SubParagraph[@isGroup="$SectioningCommand"] { SectioningCommand<SubParagraphCtrlSeq> Content<sectionText*> }
sectioningCommand {
Book | Part | Chapter | Section | SubSection | SubSubSection | Paragraph | SubParagraph
}
sectionText {
!section (
textWithGroupsEnvironmentsAndBlankLines
)+
}
Text {
( sectionText
| sectioningCommand)+
}
LongArg {
( textWithBrackets
| NonEmptyGroup<LongArg>
| KnownEnvironment
| Environment
| BlankLine
| "#" // macro character
| "_" | "^" // other math chars
)*
}
ShortTextArgument {
OpenBrace ShortArg CloseBrace
}
ShortArg {
( textWithBrackets
| NonEmptyGroup<ShortArg>
| "#" // macro character
| "_" | "^" // other math chars
)*
}
ShortOptionalArg {
( textBase
| NonEmptyGroup<ShortOptionalArg>
| "#" // macro character
| "_" // underscore is used in some parameter names
)*
}
TikzPictureContent { /// same as Text but with added allowed characters
( textWithEnvironmentsAndBlankLines
| NonEmptyGroup<TikzPictureContent>
| "#" // macro character
| "_" | "^" // other math chars
)+
}
DefinitionFragment {
( KnownCommand<TextArgument>
| CtrlSeq optionalWhitespace?
| CtrlSym
| Begin
| End
| NonEmptyGroup<DefinitionFragment>
| Dollar
| OpenParenCtrlSym
| CloseParenCtrlSym
| OpenBracketCtrlSym
| CloseBracketCtrlSym
| LeftCtrlSeq
| RightCtrlSeq
| KnownCtrlSym
| BlankLine
| NewLine
| Normal
| Whitespace
| OpenBracket
| CloseBracket
| "#" // macro character
| Ampersand // for tables
| Tilde // unbreakable space
| "_" | "^" // other math chars
| SectioningCommand<
BookCtrlSeq |
PartCtrlSeq |
ChapterCtrlSeq |
SectionCtrlSeq |
SubSectionCtrlSeq |
SubSubSectionCtrlSeq |
ParagraphCtrlSeq |
SubParagraphCtrlSeq
>
)*
}
KnownEnvironment {
( DocumentEnvironment
| TabularEnvironment
| EquationEnvironment
| EquationArrayEnvironment
| VerbatimEnvironment
| TikzPictureEnvironment
| FigureEnvironment
| ListEnvironment
| TableEnvironment
)
}
BeginEnv<name> {
Begin
EnvNameGroup<name>
OptionalArgument?
(!argument TextArgument)*
}
EndEnv<name> {
End
EnvNameGroup<name>
}
DocumentEnvironment[@isGroup="$Environment"] {
BeginEnv<DocumentEnvName>
Content<Text>
EndEnv<DocumentEnvName>
(TrailingWhitespaceOnly | TrailingContent)?
}
TabularContent {
(textWithGroupsEnvironmentsAndBlankLines)*
}
TabularEnvironment[@isGroup="$Environment"] {
BeginEnv<TabularEnvName>
Content<TabularContent>
EndEnv<TabularEnvName>
}
TableEnvironment[@isGroup="$Environment"] {
BeginEnv<TableEnvName>
Content<Text>
EndEnv<TableEnvName>
}
EquationEnvironment[@isGroup="$Environment"] {
BeginEnv<EquationEnvName>
Content<Math?>
EndEnv<EquationEnvName>
}
EquationArrayEnvironment[@isGroup="$Environment"] {
BeginEnv<EquationArrayEnvName>
Content<Math?>
EndEnv<EquationArrayEnvName>
}
VerbatimEnvironment[@isGroup="$Environment"] {
BeginEnv<VerbatimEnvName>
Content<VerbatimContent>
EndEnv<VerbatimEnvName>
}
TikzPictureEnvironment[@isGroup="$Environment"] {
BeginEnv<TikzPictureEnvName>
Content<TikzPictureContent>
EndEnv<TikzPictureEnvName>
}
FigureEnvironment[@isGroup="$Environment"] {
BeginEnv<FigureEnvName>
Content<Text>
EndEnv<FigureEnvName>
}
ListEnvironment[@isGroup="$Environment"] {
BeginEnv<ListEnvName>
Content<Text>
EndEnv<ListEnvName>
}
EnvNameGroup<name> {
OpenBrace name CloseBrace
}
Environment[@isGroup="$Environment"] {
BeginEnv<EnvName?>
Content<Text>
EndEnv<EnvName?>
}
Group<GroupContent> {
OpenBrace GroupContent? CloseBrace
}
NonEmptyGroup<GroupContent> {
OpenBrace GroupContent CloseBrace
}
/// MATH MODE
DollarMath {
Dollar (InlineMath | DisplayMath) Dollar
}
InlineMath {
Math
}
DisplayMath {
Dollar Math? Dollar
}
OpenParenMath[closedBy=CloseParenMath] {
OpenParenCtrlSym
}
CloseParenMath[openedBy=OpenParenMath] {
CloseParenCtrlSym
}
// alternative syntax \( math \) for inline math, it is the same as $ math $
ParenMath {
OpenParenMath
Math?
CloseParenMath
}
OpenBracketMath[closedBy=CloseBracketMath] {
OpenBracketCtrlSym
}
CloseBracketMath[openedBy=OpenBracketMath] {
CloseBracketCtrlSym
}
// alternative syntax \[ math \] for display math, it is the same as $$ math $$
BracketMath {
OpenBracketMath
Math?
CloseBracketMath
}
// FIXME: we should have separate math modes for inline and display math,
// because display math can contain blank lines while inline math cannot.
Math {
( MathCommand
| Group<Math>
| MathDelimitedGroup
| MathSpecialChar
| Number
| NewLine
| Whitespace
| KnownEnvironment
| Environment
| MathChar
| OpenBracket
| CloseBracket
| Ampersand
| Tilde
)+
}
MathCommand {
KnownCommand<MathArgument>
| MathUnknownCommand
| KnownCtrlSym
}
@external tokens argumentListTokenizer from "./tokens.mjs" {
hasMoreArguments,
endOfArguments
}
MathUnknownCommand {
CtrlSeq (hasMoreArguments optionalWhitespace? MathArgument)* endOfArguments
| CtrlSym
}
MathArgument {
OpenBrace Math? CloseBrace
}
MathDelimitedGroup {
MathOpening Math? MathClosing
}
// FIXME: we have the same problem with specialize on \left,\right as the delimiters
MathOpening {
LeftCtrlSeq optionalWhitespace? MathDelimiter
}
MathClosing {
RightCtrlSeq optionalWhitespace? MathDelimiter
}
MathDelimiter {
// Allowed delimiters, from the LaTeX manual, table 3.10
"/" | "|" | "(" | ")" | "[" | "]" |
"\\{" | "\\}" | "\\|" |
"\\lfloor" | "\\rfloor" |
"\\lceil" | "\\rceil" |
"\\langle" | "\\rangle" |
"\\backslash" | "\\uparrow" |
"\\Uparrow" | "\\Downarrow" |
"\\updownarrow" | "\\Updownarrow" |
"\\downarrow" | "\\lvert" |
"\\lVert" | "\\rVert" |
"\\rvert" | "\\vert" | "\\Vert" |
"\\lbrace" | "\\rbrace" |
"\\lbrack" | "\\rbrack" |
// Also allow the empty match
"."
}
// NOTE: precedence works differently for rules and token, in the rule
// you have to give a specifier !foo which is defined in the @precedence
// block here.
@precedence {
section @left,
argument @left // make CtrlSeq arguments left associative
}