2018-11-15 04:40:33 -05:00
|
|
|
module.exports = DocumentHelper =
|
|
|
|
getTitleFromTexContent: (content, maxContentToScan = 30000) ->
|
|
|
|
TITLE_WITH_CURLY_BRACES = /\\[tT]itle\*?\s*{([^}]+)}/
|
|
|
|
TITLE_WITH_SQUARE_BRACES = /\\[tT]itle\s*\[([^\]]+)\]/
|
2018-12-03 06:06:05 -05:00
|
|
|
for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
|
2018-12-10 04:39:58 -05:00
|
|
|
if match = line.match(TITLE_WITH_CURLY_BRACES) || line.match(TITLE_WITH_SQUARE_BRACES)
|
|
|
|
return DocumentHelper.detex(match[1])
|
2018-12-03 06:06:05 -05:00
|
|
|
|
|
|
|
return null
|
|
|
|
|
|
|
|
contentHasDocumentclass: (content, maxContentToScan = 30000) ->
|
|
|
|
for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
|
|
|
|
# We've had problems with this regex locking up CPU.
|
|
|
|
# Previously /.*\\documentclass/ would totally lock up on lines of 500kb (data text files :()
|
|
|
|
# This regex will only look from the start of the line, including whitespace so will return quickly
|
|
|
|
# regardless of line length.
|
|
|
|
return true if line.match /^\s*\\documentclass/
|
|
|
|
|
|
|
|
return false
|
|
|
|
|
2018-12-10 04:39:58 -05:00
|
|
|
detex: (string) ->
|
|
|
|
return string.replace(/\\LaTeX/g, 'LaTeX')
|
|
|
|
.replace(/\\TeX/g, 'TeX')
|
|
|
|
.replace(/\\TikZ/g, 'TikZ')
|
|
|
|
.replace(/\\BibTeX/g, 'BibTeX')
|
|
|
|
.replace(/\\\[[A-Za-z0-9. ]*\]/g, ' ') # line spacing
|
|
|
|
.replace(/\\(?:[a-zA-Z]+|.|)/g, '')
|
|
|
|
.replace(/{}|~/g, ' ')
|
|
|
|
.replace(/[${}]/g, '')
|
|
|
|
.replace(/ +/g, ' ')
|
|
|
|
.trim()
|
|
|
|
|
2018-12-03 06:06:05 -05:00
|
|
|
_getLinesFromContent: (content, maxContentToScan) ->
|
|
|
|
return if typeof content is 'string' then content.substring(0, maxContentToScan).split("\n") else content
|