[cm6] Use less string casting, refactor tokenizers (#13898)

* Use more char definitions in tokenizers
* Refactor tokenizers

GitOrigin-RevId: 89f49da15f2bd64419fd2f19b0f255625e1d75cc
This commit is contained in:
Alf Eaton 2023-08-04 08:37:00 +01:00 committed by Copybot
parent 1f173010c7
commit ea9a9fbe1c

View file

@ -197,73 +197,38 @@ const matchForward = (input, expected, offset = 0) => {
export const verbatimTokenizer = new ExternalTokenizer(
(input, stack) => {
const delimiter = '\\end{' + stack.context.name + '}'
let offset = 0
let end = -1
for (;;) {
for (let offset = 0; ; offset++) {
const next = input.peek(offset)
if (next === -1) {
end = offset - 1
break
if (next === -1 || matchForward(input, delimiter, offset)) {
return input.acceptToken(VerbatimContent, offset)
}
if (matchForward(input, delimiter, offset)) {
// Found the end marker
end = offset - 1
break
}
offset++
}
return input.acceptToken(VerbatimContent, end + 1)
},
{ contextual: false }
)
// tokenizer for \href{...} and similar commands
export const literalArgTokenizer = new ExternalTokenizer(
(input, stack) => {
const delimiter = '}'
let content = ''
let offset = 0
let end = -1
for (;;) {
input => {
for (let offset = 0; ; offset++) {
const next = input.peek(offset)
if (next === -1) {
end = offset - 1
break
if (next === -1 || next === CHAR_CLOSE_BRACE) {
return input.acceptToken(LiteralArgContent, offset)
}
content += String.fromCharCode(next)
if (content.slice(-delimiter.length) === delimiter) {
// found the '}'
end = offset - delimiter.length
break
}
offset++
}
return input.acceptToken(LiteralArgContent, end + 1)
},
{ contextual: false }
)
// tokenizer for literal content delimited by whitespace, such as in `\input foo.tex`
export const spaceDelimitedLiteralArgTokenizer = new ExternalTokenizer(
(input, stack) => {
let content = ''
let offset = 0
let end = -1
for (;;) {
input => {
for (let offset = 0; ; offset++) {
const next = input.peek(offset)
if (next === -1) {
end = offset - 1
break
if (next === -1 || next === CHAR_SPACE || next === CHAR_NEWLINE) {
return input.acceptToken(SpaceDelimitedLiteralArgContent, offset)
}
content += String.fromCharCode(next)
if (content.slice(-1) === ' ' || content.slice(-1) === '\n') {
// found the whitespace
end = offset - 1
break
}
offset++
}
return input.acceptToken(SpaceDelimitedLiteralArgContent, end + 1)
},
{ contextual: false }
)