mirror of
https://github.com/overleaf/overleaf.git
synced 2024-11-07 20:31:06 -05:00
Merge pull request #19032 from overleaf/mj-lezer-mathdelim
[lezer] Remove custom tokeniser for MathDelimiter GitOrigin-RevId: acbbbe439d51a8a9d5b30b91c55c8f8ef8c3b9fb
This commit is contained in:
parent
884eebd82d
commit
283c972842
2 changed files with 19 additions and 112 deletions
|
@ -25,10 +25,6 @@
|
||||||
VerbatimContent
|
VerbatimContent
|
||||||
}
|
}
|
||||||
|
|
||||||
@external tokens mathDelimiterTokenizer from "./tokens.mjs" {
|
|
||||||
MathDelimiter
|
|
||||||
}
|
|
||||||
|
|
||||||
// external tokenizer to read control sequence names including @ signs
|
// external tokenizer to read control sequence names including @ signs
|
||||||
// (which are often used in TeX definitions).
|
// (which are often used in TeX definitions).
|
||||||
@external tokens csnameTokenizer from "./tokens.mjs" {
|
@external tokens csnameTokenizer from "./tokens.mjs" {
|
||||||
|
@ -733,6 +729,25 @@ MathClosing {
|
||||||
RightCtrlSeq optionalWhitespace? MathDelimiter
|
RightCtrlSeq optionalWhitespace? MathDelimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MathDelimiter {
|
||||||
|
// Allowed delimiters, from the LaTeX manual, table 3.10
|
||||||
|
"/" | "|" | "(" | ")" | "[" | "]" |
|
||||||
|
"\\{" | "\\}" | "\\|" |
|
||||||
|
"\\lfloor" | "\\rfloor" |
|
||||||
|
"\\lceil" | "\\rceil" |
|
||||||
|
"\\langle" | "\\rangle" |
|
||||||
|
"\\backslash" | "\\uparrow" |
|
||||||
|
"\\Uparrow" | "\\Downarrow" |
|
||||||
|
"\\updownarrow" | "\\Updownarrow" |
|
||||||
|
"\\downarrow" | "\\lvert" |
|
||||||
|
"\\lVert" | "\\rVert" |
|
||||||
|
"\\rvert" | "\\vert" | "\\Vert" |
|
||||||
|
"\\lbrace" | "\\rbrace" |
|
||||||
|
"\\lbrack" | "\\rbrack" |
|
||||||
|
// Also allow the empty match
|
||||||
|
"."
|
||||||
|
}
|
||||||
|
|
||||||
// NOTE: precedence works differently for rules and token, in the rule
|
// NOTE: precedence works differently for rules and token, in the rule
|
||||||
// you have to give a specifier !foo which is defined in the @precedence
|
// you have to give a specifier !foo which is defined in the @precedence
|
||||||
// block here.
|
// block here.
|
||||||
|
|
|
@ -11,7 +11,6 @@ import {
|
||||||
Begin,
|
Begin,
|
||||||
End,
|
End,
|
||||||
KnownEnvironment,
|
KnownEnvironment,
|
||||||
MathDelimiter,
|
|
||||||
Csname,
|
Csname,
|
||||||
TrailingWhitespaceOnly,
|
TrailingWhitespaceOnly,
|
||||||
TrailingContent,
|
TrailingContent,
|
||||||
|
@ -250,17 +249,6 @@ function _char(s) {
|
||||||
return s.charCodeAt(0)
|
return s.charCodeAt(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allowed delimiters, from the LaTeX manual, table 3.10
|
|
||||||
// ( ) [ ] / | \{ \} \| and additional names below
|
|
||||||
// The empty delimiter . is also allowed
|
|
||||||
|
|
||||||
const CHAR_SLASH = _char('/')
|
|
||||||
const CHAR_PIPE = _char('|')
|
|
||||||
const CHAR_OPEN_PAREN = _char('(')
|
|
||||||
const CHAR_CLOSE_PAREN = _char(')')
|
|
||||||
const CHAR_OPEN_BRACKET = _char('[')
|
|
||||||
const CHAR_CLOSE_BRACKET = _char(']')
|
|
||||||
const CHAR_FULL_STOP = _char('.')
|
|
||||||
const CHAR_BACKSLASH = _char('\\')
|
const CHAR_BACKSLASH = _char('\\')
|
||||||
const CHAR_OPEN_BRACE = _char('{')
|
const CHAR_OPEN_BRACE = _char('{')
|
||||||
const CHAR_CLOSE_BRACE = _char('}')
|
const CHAR_CLOSE_BRACE = _char('}')
|
||||||
|
@ -281,102 +269,6 @@ export const endOfArgumentListTokenizer = new ExternalTokenizer(
|
||||||
{ contextual: false, fallback: true }
|
{ contextual: false, fallback: true }
|
||||||
)
|
)
|
||||||
|
|
||||||
const ALLOWED_DELIMITER_NAMES = [
|
|
||||||
'lfloor',
|
|
||||||
'rfloor',
|
|
||||||
'lceil',
|
|
||||||
'rceil',
|
|
||||||
'langle',
|
|
||||||
'rangle',
|
|
||||||
'backslash',
|
|
||||||
'uparrow',
|
|
||||||
'downarrow',
|
|
||||||
'Uparrow',
|
|
||||||
'Downarrow',
|
|
||||||
'updownarrow',
|
|
||||||
'Updownarrow',
|
|
||||||
'lvert',
|
|
||||||
'rvert',
|
|
||||||
'lVert',
|
|
||||||
'rVert',
|
|
||||||
]
|
|
||||||
|
|
||||||
// Given a list of allowed command names, return those with leading characters that are the same as the matchString
|
|
||||||
function findPartialMatches(list, matchString) {
|
|
||||||
const size = matchString.length
|
|
||||||
return list.filter(
|
|
||||||
entry => entry.length >= size && entry.substring(0, size) === matchString
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// tokenizer for \leftX ... \rightX delimiter tokens
|
|
||||||
export const mathDelimiterTokenizer = new ExternalTokenizer(
|
|
||||||
(input, stack) => {
|
|
||||||
let content = ''
|
|
||||||
let offset = 0
|
|
||||||
let end = -1
|
|
||||||
// look at the first character, we only accept the following /|()[].
|
|
||||||
let next = input.peek(offset)
|
|
||||||
if (next === -1) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
next === CHAR_SLASH ||
|
|
||||||
next === CHAR_PIPE ||
|
|
||||||
next === CHAR_OPEN_PAREN ||
|
|
||||||
next === CHAR_CLOSE_PAREN ||
|
|
||||||
next === CHAR_OPEN_BRACKET ||
|
|
||||||
next === CHAR_CLOSE_BRACKET ||
|
|
||||||
next === CHAR_FULL_STOP
|
|
||||||
) {
|
|
||||||
return input.acceptToken(MathDelimiter, 1)
|
|
||||||
}
|
|
||||||
// reject anything else not starting with a backslash,
|
|
||||||
// we only accept control symbols or control sequences
|
|
||||||
if (next !== CHAR_BACKSLASH) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// look at the second character, we only accept \{ and \} and \| as control symbols
|
|
||||||
offset++
|
|
||||||
next = input.peek(offset)
|
|
||||||
if (next === -1) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (
|
|
||||||
next === CHAR_OPEN_BRACE ||
|
|
||||||
next === CHAR_CLOSE_BRACE ||
|
|
||||||
next === CHAR_PIPE
|
|
||||||
) {
|
|
||||||
return input.acceptToken(MathDelimiter, 2)
|
|
||||||
}
|
|
||||||
// We haven't matched any symbols, so now try matching command names.
|
|
||||||
// Is this character a potential match to the remaining allowed delimiter names?
|
|
||||||
content = String.fromCharCode(next)
|
|
||||||
let candidates = findPartialMatches(ALLOWED_DELIMITER_NAMES, content)
|
|
||||||
if (!candidates.length) return
|
|
||||||
// we have some candidates, look at subsequent characters
|
|
||||||
offset++
|
|
||||||
for (;;) {
|
|
||||||
const next = input.peek(offset)
|
|
||||||
// stop when we reach the end of file or a non-alphabetic character
|
|
||||||
if (next === -1 || !nameChar(next)) {
|
|
||||||
end = offset - 1
|
|
||||||
break
|
|
||||||
}
|
|
||||||
content += String.fromCharCode(next)
|
|
||||||
// find how many candidates remain with the new input
|
|
||||||
candidates = findPartialMatches(candidates, content)
|
|
||||||
if (!candidates.length) return // no matches remaining
|
|
||||||
end = offset
|
|
||||||
offset++
|
|
||||||
}
|
|
||||||
if (!candidates.includes(content)) return // not a valid delimiter
|
|
||||||
// accept the content as a valid delimiter
|
|
||||||
return input.acceptToken(MathDelimiter, end + 1)
|
|
||||||
},
|
|
||||||
{ contextual: false }
|
|
||||||
)
|
|
||||||
|
|
||||||
const CHAR_AT_SYMBOL = _char('@')
|
const CHAR_AT_SYMBOL = _char('@')
|
||||||
|
|
||||||
export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
|
export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
|
||||||
|
|
Loading…
Reference in a new issue