overleaf/services/web/app/coffee/Features/Documents/DocumentHelper.coffee

module.exports = DocumentHelper =
	getTitleFromTexContent: (content, maxContentToScan = 30000) ->
		TITLE_WITH_CURLY_BRACES = /\\[tT]itle\*?\s*{([^}]+)}/
		TITLE_WITH_SQUARE_BRACES = /\\[tT]itle\s*\[([^\]]+)\]/
		for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
			if match = line.match(TITLE_WITH_CURLY_BRACES) || line.match(TITLE_WITH_SQUARE_BRACES)
				return DocumentHelper.detex(match[1])

		return null

	contentHasDocumentclass: (content, maxContentToScan = 30000) ->
		for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
			# We've had problems with this regex locking up CPU.
			# Previously /.*\\documentclass/ would totally lock up on lines of 500kb (data text files :()
			# This regex will only look from the start of the line, including whitespace so will return quickly
			# regardless of line length.
			return true if line.match /^\s*\\documentclass/

		return false

	detex: (string) ->
		return string.replace(/\\LaTeX/g, 'LaTeX')
			.replace(/\\TeX/g, 'TeX')
			.replace(/\\TikZ/g, 'TikZ')
			.replace(/\\BibTeX/g, 'BibTeX')
			.replace(/\\\[[A-Za-z0-9. ]*\]/g, ' ') # line spacing
			.replace(/\\(?:[a-zA-Z]+|.|)/g, '')
			.replace(/{}|~/g, ' ')
			.replace(/[${}]/g, '')
			.replace(/ +/g, ' ')
			.trim()

	_getLinesFromContent: (content, maxContentToScan) ->
		return if typeof content is 'string' then content.substring(0, maxContentToScan).split("\n") else content
Merge pull request #1072 from sharelatex/spd-open-with-overleaf Implement v1 open-with-overleaf API in v2 (part 1) GitOrigin-RevId: 488f4eeefc29086a72295ccbc7c63d2f927add12 2018-11-15 04:40:33 -05:00			`module.exports = DocumentHelper =`
			`getTitleFromTexContent: (content, maxContentToScan = 30000) ->`
			`TITLE_WITH_CURLY_BRACES = /\\[tT]itle\?\s{([^}]+)}/`
			`TITLE_WITH_SQUARE_BRACES = /\\[tT]itle\s*\[([^\]]+)\]/`
Merge pull request #1184 from sharelatex/spd-zip-project-name-from-tex-content zip upload: Read project name from title in zip contents GitOrigin-RevId: 27122674a0374f86a10c04485d787f4caaf21f5b 2018-12-03 06:06:05 -05:00			`for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)`
Merge pull request #1227 from sharelatex/spd-better-project-name-parsing Handle embedded commands when parsing project name from title GitOrigin-RevId: 09c1720897056a40d38b4f2f80f9d48caad4258b 2018-12-10 04:39:58 -05:00			`if match = line.match(TITLE_WITH_CURLY_BRACES) \|\| line.match(TITLE_WITH_SQUARE_BRACES)`
			`return DocumentHelper.detex(match[1])`
Merge pull request #1184 from sharelatex/spd-zip-project-name-from-tex-content zip upload: Read project name from title in zip contents GitOrigin-RevId: 27122674a0374f86a10c04485d787f4caaf21f5b 2018-12-03 06:06:05 -05:00
			`return null`

			`contentHasDocumentclass: (content, maxContentToScan = 30000) ->`
			`for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)`
			`# We've had problems with this regex locking up CPU.`
			`# Previously /.*\\documentclass/ would totally lock up on lines of 500kb (data text files :()`
			`# This regex will only look from the start of the line, including whitespace so will return quickly`
			`# regardless of line length.`
			`return true if line.match /^\s*\\documentclass/`

			`return false`

Merge pull request #1227 from sharelatex/spd-better-project-name-parsing Handle embedded commands when parsing project name from title GitOrigin-RevId: 09c1720897056a40d38b4f2f80f9d48caad4258b 2018-12-10 04:39:58 -05:00			`detex: (string) ->`
			`return string.replace(/\\LaTeX/g, 'LaTeX')`
			`.replace(/\\TeX/g, 'TeX')`
			`.replace(/\\TikZ/g, 'TikZ')`
			`.replace(/\\BibTeX/g, 'BibTeX')`
			`.replace(/\\\[[A-Za-z0-9. ]*\]/g, ' ') # line spacing`
			`.replace(/\\(?:[a-zA-Z]+\|.\|)/g, '')`
			`.replace(/{}\|~/g, ' ')`
			`.replace(/[${}]/g, '')`
			`.replace(/ +/g, ' ')`
			`.trim()`

Merge pull request #1184 from sharelatex/spd-zip-project-name-from-tex-content zip upload: Read project name from title in zip contents GitOrigin-RevId: 27122674a0374f86a10c04485d787f4caaf21f5b 2018-12-03 06:06:05 -05:00			`_getLinesFromContent: (content, maxContentToScan) ->`
			`return if typeof content is 'string' then content.substring(0, maxContentToScan).split("\n") else content`