module.exports = DocumentHelper =
	getTitleFromTexContent: (content, maxContentToScan = 30000) ->
		TITLE_WITH_CURLY_BRACES = /\\[tT]itle\*?\s*{([^}]+)}/
		TITLE_WITH_SQUARE_BRACES = /\\[tT]itle\s*\[([^\]]+)\]/
		for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
			if match = line.match(TITLE_WITH_CURLY_BRACES) || line.match(TITLE_WITH_SQUARE_BRACES)
				return DocumentHelper.detex(match[1])

		return null

	contentHasDocumentclass: (content, maxContentToScan = 30000) ->
		for line in DocumentHelper._getLinesFromContent(content, maxContentToScan)
			# We've had problems with this regex locking up CPU.
			# Previously /.*\\documentclass/ would totally lock up on lines of 500kb (data text files :()
			# This regex will only look from the start of the line, including whitespace so will return quickly
			# regardless of line length.
			return true if line.match /^\s*\\documentclass/

		return false

	detex: (string) ->
		return string.replace(/\\LaTeX/g, 'LaTeX')
			.replace(/\\TeX/g, 'TeX')
			.replace(/\\TikZ/g, 'TikZ')
			.replace(/\\BibTeX/g, 'BibTeX')
			.replace(/\\\[[A-Za-z0-9. ]*\]/g, ' ') # line spacing
			.replace(/\\(?:[a-zA-Z]+|.|)/g, '')
			.replace(/{}|~/g, ' ')
			.replace(/[${}]/g, '')
			.replace(/ +/g, ' ')
			.trim()

	_getLinesFromContent: (content, maxContentToScan) ->
		return if typeof content is 'string' then content.substring(0, maxContentToScan).split("\n") else content