overleaf/services/web/app/coffee/Features/Uploads/ArchiveManager.coffee

143 lines
4.8 KiB
CoffeeScript
Raw Normal View History

2014-02-12 05:23:40 -05:00
logger = require "logger-sharelatex"
metrics = require "metrics-sharelatex"
fs = require "fs"
Path = require "path"
2017-10-13 11:49:22 -04:00
fse = require "fs-extra"
yauzl = require "yauzl"
Settings = require "settings-sharelatex"
2016-03-12 10:43:16 -05:00
_ = require("underscore")
2014-02-12 05:23:40 -05:00
2016-03-12 07:38:21 -05:00
ONE_MEG = 1024 * 1024
2014-02-12 05:23:40 -05:00
module.exports = ArchiveManager =
2016-03-12 10:43:16 -05:00
_isZipTooLarge: (source, callback = (err, isTooLarge)->)->
callback = _.once callback
2017-10-16 06:13:53 -04:00
totalSizeInBytes = null
2017-10-13 11:49:22 -04:00
yauzl.open source, {lazyEntries: true}, (err, zipfile) ->
return callback(err) if err?
if Settings.maxEntitiesPerProject? and zipfile.entryCount > Settings.maxEntitiesPerProject
return callback(null, true) # too many files in zip file
zipfile.on "error", callback
# read all the entries
zipfile.readEntry()
zipfile.on "entry", (entry) ->
totalSizeInBytes += entry.uncompressedSize
zipfile.readEntry() # get the next entry
# no more entries to read
zipfile.on "end", () ->
if !totalSizeInBytes? or isNaN(totalSizeInBytes)
logger.err source:source, totalSizeInBytes:totalSizeInBytes, "error getting bytes of zip"
return callback(new Error("error getting bytes of zip"))
isTooLarge = totalSizeInBytes > (ONE_MEG * 300)
callback(null, isTooLarge)
_checkFilePath: (entry, destination, callback = (err, destFile) ->) ->
# transform backslashes to forwardslashes to accommodate badly-behaved zip archives
transformedFilename = entry.fileName.replace(/\\/g, '/')
2017-10-13 11:49:22 -04:00
# check if the entry is a directory
2017-10-16 10:17:33 -04:00
endsWithSlash = /\/$/
if endsWithSlash.test(transformedFilename)
2017-10-13 11:49:22 -04:00
return callback() # don't give a destfile for directory
# check that the file does not use a relative path
for dir in transformedFilename.split('/')
2017-10-13 11:49:22 -04:00
if dir == '..'
return callback(new Error("relative path"))
# check that the destination file path is normalized
dest = "#{destination}/#{transformedFilename}"
2017-10-13 11:49:22 -04:00
if dest != Path.normalize(dest)
return callback(new Error("unnormalized path"))
else
return callback(null, dest)
_writeFileEntry: (zipfile, entry, destFile, callback = (err)->) ->
callback = _.once callback
2016-03-12 10:43:16 -05:00
2017-10-13 11:49:22 -04:00
zipfile.openReadStream entry, (err, readStream) ->
return callback(err) if err?
readStream.on "error", callback
readStream.on "end", callback
2016-03-12 10:43:16 -05:00
2017-10-16 06:13:53 -04:00
errorHandler = (err) -> # clean up before calling callback
readStream.unpipe()
readStream.destroy()
callback(err)
2017-10-13 11:49:22 -04:00
fse.ensureDir Path.dirname(destFile), (err) ->
2017-10-16 06:13:53 -04:00
return errorHandler(err) if err?
2017-10-13 11:49:22 -04:00
writeStream = fs.createWriteStream destFile
2017-10-16 06:13:53 -04:00
writeStream.on 'error', errorHandler
2017-10-13 11:49:22 -04:00
readStream.pipe(writeStream)
2016-03-12 10:43:16 -05:00
2017-10-13 11:49:22 -04:00
_extractZipFiles: (source, destination, callback = (err) ->) ->
callback = _.once callback
2016-03-12 10:43:16 -05:00
2017-10-13 11:49:22 -04:00
yauzl.open source, {lazyEntries: true}, (err, zipfile) ->
return callback(err) if err?
zipfile.on "error", callback
# read all the entries
zipfile.readEntry()
zipfile.on "entry", (entry) ->
2017-10-17 11:31:56 -04:00
logger.log {source:source, fileName: entry.fileName}, "processing zip file entry"
2017-10-13 11:49:22 -04:00
ArchiveManager._checkFilePath entry, destination, (err, destFile) ->
if err?
logger.warn err:err, source:source, destination:destination, "skipping bad file path"
zipfile.readEntry() # bad path, just skip to the next file
return
if destFile? # only write files
ArchiveManager._writeFileEntry zipfile, entry, destFile, (err) ->
if err?
logger.error err:err, source:source, destFile:destFile, "error unzipping file entry"
zipfile.close() # bail out, stop reading file entries
return callback(err)
else
zipfile.readEntry() # continue to the next file
2017-10-17 11:31:35 -04:00
else # if it's a directory, continue
zipfile.readEntry()
2017-10-13 11:49:22 -04:00
# no more entries to read
zipfile.on "end", callback
2016-03-12 10:43:16 -05:00
extractZipArchive: (source, destination, _callback = (err) ->) ->
callback = (args...) ->
_callback(args...)
_callback = () ->
2016-03-12 10:43:16 -05:00
ArchiveManager._isZipTooLarge source, (err, isTooLarge)->
2016-03-12 07:38:21 -05:00
if err?
logger.err err:err, "error checking size of zip file"
return callback(err)
2016-03-12 10:43:16 -05:00
if isTooLarge
2017-10-13 11:49:22 -04:00
return callback(new Error("zip_too_large"))
2016-03-12 07:38:21 -05:00
timer = new metrics.Timer("unzipDirectory")
logger.log source: source, destination: destination, "unzipping file"
2014-02-12 05:23:40 -05:00
2017-10-13 11:49:22 -04:00
ArchiveManager._extractZipFiles source, destination, (err) ->
2017-10-16 06:13:53 -04:00
timer.done()
2017-10-13 11:49:22 -04:00
if err?
logger.error {err, source, destination}, "unzip failed"
callback(err)
else
callback()
findTopLevelDirectory: (directory, callback = (error, topLevelDir) ->) ->
fs.readdir directory, (error, files) ->
return callback(error) if error?
if files.length == 1
childPath = Path.join(directory, files[0])
fs.stat childPath, (error, stat) ->
return callback(error) if error?
if stat.isDirectory()
return callback(null, childPath)
else
return callback(null, directory)
else
return callback(null, directory)
2014-02-12 05:23:40 -05:00