mirror of
https://github.com/overleaf/overleaf.git
synced 2024-09-16 02:52:31 -04:00
Merge pull request #19032 from overleaf/mj-lezer-mathdelim
[lezer] Remove custom tokeniser for MathDelimiter GitOrigin-RevId: acbbbe439d51a8a9d5b30b91c55c8f8ef8c3b9fb
This commit is contained in:
parent
884eebd82d
commit
283c972842
2 changed files with 19 additions and 112 deletions
|
@ -25,10 +25,6 @@
|
|||
VerbatimContent
|
||||
}
|
||||
|
||||
@external tokens mathDelimiterTokenizer from "./tokens.mjs" {
|
||||
MathDelimiter
|
||||
}
|
||||
|
||||
// external tokenizer to read control sequence names including @ signs
|
||||
// (which are often used in TeX definitions).
|
||||
@external tokens csnameTokenizer from "./tokens.mjs" {
|
||||
|
@ -733,6 +729,25 @@ MathClosing {
|
|||
RightCtrlSeq optionalWhitespace? MathDelimiter
|
||||
}
|
||||
|
||||
MathDelimiter {
|
||||
// Allowed delimiters, from the LaTeX manual, table 3.10
|
||||
"/" | "|" | "(" | ")" | "[" | "]" |
|
||||
"\\{" | "\\}" | "\\|" |
|
||||
"\\lfloor" | "\\rfloor" |
|
||||
"\\lceil" | "\\rceil" |
|
||||
"\\langle" | "\\rangle" |
|
||||
"\\backslash" | "\\uparrow" |
|
||||
"\\Uparrow" | "\\Downarrow" |
|
||||
"\\updownarrow" | "\\Updownarrow" |
|
||||
"\\downarrow" | "\\lvert" |
|
||||
"\\lVert" | "\\rVert" |
|
||||
"\\rvert" | "\\vert" | "\\Vert" |
|
||||
"\\lbrace" | "\\rbrace" |
|
||||
"\\lbrack" | "\\rbrack" |
|
||||
// Also allow the empty match
|
||||
"."
|
||||
}
|
||||
|
||||
// NOTE: precedence works differently for rules and token, in the rule
|
||||
// you have to give a specifier !foo which is defined in the @precedence
|
||||
// block here.
|
||||
|
|
|
@ -11,7 +11,6 @@ import {
|
|||
Begin,
|
||||
End,
|
||||
KnownEnvironment,
|
||||
MathDelimiter,
|
||||
Csname,
|
||||
TrailingWhitespaceOnly,
|
||||
TrailingContent,
|
||||
|
@ -250,17 +249,6 @@ function _char(s) {
|
|||
return s.charCodeAt(0)
|
||||
}
|
||||
|
||||
// Allowed delimiters, from the LaTeX manual, table 3.10
|
||||
// ( ) [ ] / | \{ \} \| and additional names below
|
||||
// The empty delimiter . is also allowed
|
||||
|
||||
const CHAR_SLASH = _char('/')
|
||||
const CHAR_PIPE = _char('|')
|
||||
const CHAR_OPEN_PAREN = _char('(')
|
||||
const CHAR_CLOSE_PAREN = _char(')')
|
||||
const CHAR_OPEN_BRACKET = _char('[')
|
||||
const CHAR_CLOSE_BRACKET = _char(']')
|
||||
const CHAR_FULL_STOP = _char('.')
|
||||
const CHAR_BACKSLASH = _char('\\')
|
||||
const CHAR_OPEN_BRACE = _char('{')
|
||||
const CHAR_CLOSE_BRACE = _char('}')
|
||||
|
@ -281,102 +269,6 @@ export const endOfArgumentListTokenizer = new ExternalTokenizer(
|
|||
{ contextual: false, fallback: true }
|
||||
)
|
||||
|
||||
const ALLOWED_DELIMITER_NAMES = [
|
||||
'lfloor',
|
||||
'rfloor',
|
||||
'lceil',
|
||||
'rceil',
|
||||
'langle',
|
||||
'rangle',
|
||||
'backslash',
|
||||
'uparrow',
|
||||
'downarrow',
|
||||
'Uparrow',
|
||||
'Downarrow',
|
||||
'updownarrow',
|
||||
'Updownarrow',
|
||||
'lvert',
|
||||
'rvert',
|
||||
'lVert',
|
||||
'rVert',
|
||||
]
|
||||
|
||||
// Given a list of allowed command names, return those with leading characters that are the same as the matchString
|
||||
function findPartialMatches(list, matchString) {
|
||||
const size = matchString.length
|
||||
return list.filter(
|
||||
entry => entry.length >= size && entry.substring(0, size) === matchString
|
||||
)
|
||||
}
|
||||
|
||||
// tokenizer for \leftX ... \rightX delimiter tokens
|
||||
export const mathDelimiterTokenizer = new ExternalTokenizer(
|
||||
(input, stack) => {
|
||||
let content = ''
|
||||
let offset = 0
|
||||
let end = -1
|
||||
// look at the first character, we only accept the following /|()[].
|
||||
let next = input.peek(offset)
|
||||
if (next === -1) {
|
||||
return
|
||||
}
|
||||
if (
|
||||
next === CHAR_SLASH ||
|
||||
next === CHAR_PIPE ||
|
||||
next === CHAR_OPEN_PAREN ||
|
||||
next === CHAR_CLOSE_PAREN ||
|
||||
next === CHAR_OPEN_BRACKET ||
|
||||
next === CHAR_CLOSE_BRACKET ||
|
||||
next === CHAR_FULL_STOP
|
||||
) {
|
||||
return input.acceptToken(MathDelimiter, 1)
|
||||
}
|
||||
// reject anything else not starting with a backslash,
|
||||
// we only accept control symbols or control sequences
|
||||
if (next !== CHAR_BACKSLASH) {
|
||||
return
|
||||
}
|
||||
// look at the second character, we only accept \{ and \} and \| as control symbols
|
||||
offset++
|
||||
next = input.peek(offset)
|
||||
if (next === -1) {
|
||||
return
|
||||
}
|
||||
if (
|
||||
next === CHAR_OPEN_BRACE ||
|
||||
next === CHAR_CLOSE_BRACE ||
|
||||
next === CHAR_PIPE
|
||||
) {
|
||||
return input.acceptToken(MathDelimiter, 2)
|
||||
}
|
||||
// We haven't matched any symbols, so now try matching command names.
|
||||
// Is this character a potential match to the remaining allowed delimiter names?
|
||||
content = String.fromCharCode(next)
|
||||
let candidates = findPartialMatches(ALLOWED_DELIMITER_NAMES, content)
|
||||
if (!candidates.length) return
|
||||
// we have some candidates, look at subsequent characters
|
||||
offset++
|
||||
for (;;) {
|
||||
const next = input.peek(offset)
|
||||
// stop when we reach the end of file or a non-alphabetic character
|
||||
if (next === -1 || !nameChar(next)) {
|
||||
end = offset - 1
|
||||
break
|
||||
}
|
||||
content += String.fromCharCode(next)
|
||||
// find how many candidates remain with the new input
|
||||
candidates = findPartialMatches(candidates, content)
|
||||
if (!candidates.length) return // no matches remaining
|
||||
end = offset
|
||||
offset++
|
||||
}
|
||||
if (!candidates.includes(content)) return // not a valid delimiter
|
||||
// accept the content as a valid delimiter
|
||||
return input.acceptToken(MathDelimiter, end + 1)
|
||||
},
|
||||
{ contextual: false }
|
||||
)
|
||||
|
||||
const CHAR_AT_SYMBOL = _char('@')
|
||||
|
||||
export const csnameTokenizer = new ExternalTokenizer((input, stack) => {
|
||||
|
|
Loading…
Reference in a new issue