Merge pull request #2031 from overleaf/bg-avoid-binary-text-mismatch-on-dropbox-updates

avoid writing binary data into existing docs via dropbox

GitOrigin-RevId: c6bc0ee3854c737ad80ea10a91bc27c88db2f838
This commit is contained in:
Eric Mc Sween 2019-08-06 08:21:05 -04:00 committed by sharelatex
parent e0c3a971bb
commit 2269e4c4f1
3 changed files with 215 additions and 50 deletions

View file

@ -13,6 +13,7 @@
*/
let UpdateMerger
const _ = require('underscore')
const async = require('async')
const fs = require('fs')
const logger = require('logger-sharelatex')
const EditorController = require('../Editor/EditorController')
@ -50,34 +51,66 @@ module.exports = UpdateMerger = {
})
},
_findExistingFileType(project_id, path, callback) {
ProjectEntityHandler.getAllEntities(project_id, function(err, docs, files) {
if (err != null) {
return callback(err)
}
var existingFileType = null
if (_.some(files, f => f.path === path)) {
existingFileType = 'file'
}
if (_.some(docs, d => d.path === path)) {
existingFileType = 'doc'
}
callback(null, existingFileType)
})
},
_determineFileType(project_id, path, fsPath, callback) {
if (callback == null) {
callback = function(err, fileType) {}
}
return ProjectEntityHandler.getAllEntities(project_id, function(
// check if there is an existing file with the same path (we either need
// to overwrite it or delete it)
UpdateMerger._findExistingFileType(project_id, path, function(
err,
docs,
files
existingFileType
) {
if (err != null) {
if (err) {
return callback(err)
}
if (_.some(files, f => f.path === path)) {
return callback(null, 'existing-file')
}
if (_.some(docs, d => d.path === path)) {
return callback(null, 'existing-doc')
}
// existing file not found in project, so check the file type to determine if doc
return FileTypeManager.getType(path, fsPath, function(err, isBinary) {
// determine whether the update should create a doc or binary file
FileTypeManager.getStrictType(path, fsPath, function(err, isBinary) {
if (err != null) {
return callback(err)
}
if (isBinary) {
return callback(null, 'new-file') // extension was not text
} else {
return callback(null, 'new-doc')
// Existing | Update | Action
// ---------|-----------|-------
// file | isBinary | existing-file
// file | !isBinary | existing-file
// doc | isBinary | new-file, delete-existing-doc
// doc | !isBinary | existing-doc
// null | isBinary | new-file
// null | !isBinary | new-doc
// if a binary file already exists, always keep it as a binary file
// even if the update looks like a text file
if (existingFileType === 'file') {
return callback(null, 'existing-file')
}
// if there is an existing doc, keep it as a doc except when the
// incoming update is binary. In that case delete the doc and replace
// it with a new file.
if (existingFileType === 'doc') {
if (isBinary) {
return callback(null, 'new-file', 'delete-existing-doc')
} else {
return callback(null, 'existing-doc')
}
}
// if there no existing file, create a file or doc as needed
return callback(null, isBinary ? 'new-file' : 'new-doc')
})
})
},
@ -88,32 +121,48 @@ module.exports = UpdateMerger = {
}
return UpdateMerger._determineFileType(project_id, path, fsPath, function(
err,
fileType
fileType,
deleteOriginalEntity
) {
if (err != null) {
return callback(err)
}
if (['existing-file', 'new-file'].includes(fileType)) {
return UpdateMerger.p.processFile(
project_id,
fsPath,
path,
source,
user_id,
callback
)
} else if (['existing-doc', 'new-doc'].includes(fileType)) {
return UpdateMerger.p.processDoc(
project_id,
user_id,
fsPath,
path,
source,
callback
)
} else {
return callback(new Error('unrecognized file'))
}
async.series(
[
function(cb) {
if (deleteOriginalEntity) {
// currently we only delete docs
UpdateMerger.deleteUpdate(user_id, project_id, path, source, cb)
} else {
cb()
}
},
function(cb) {
if (['existing-file', 'new-file'].includes(fileType)) {
return UpdateMerger.p.processFile(
project_id,
fsPath,
path,
source,
user_id,
cb
)
} else if (['existing-doc', 'new-doc'].includes(fileType)) {
return UpdateMerger.p.processDoc(
project_id,
user_id,
fsPath,
path,
source,
cb
)
} else {
return cb(new Error('unrecognized file'))
}
}
],
callback
)
})
},

View file

@ -77,7 +77,7 @@ module.exports = FileTypeManager = {
// returns charset as understood by fs.readFile,
getType(name, fsPath, callback) {
if (callback == null) {
callback = function(error, isBinary, charset) {}
callback = function(error, isBinary, charset, bytes) {}
}
const parts = name.split('.')
const extension = parts.slice(-1)[0].toLowerCase()
@ -104,7 +104,7 @@ module.exports = FileTypeManager = {
}
if (isUtf8(bytes)) {
return callback(null, false, 'utf-8')
return callback(null, false, 'utf-8', bytes)
}
// check for little-endian unicode bom (nodejs does not support big-endian)
if (bytes[0] === 0xff && bytes[1] === 0xfe) {
@ -116,6 +116,33 @@ module.exports = FileTypeManager = {
})
},
// For compatibility with the history service, only accept valid utf8 with no
// nulls or non-BMP characters as text, eveything else is binary.
getStrictType(name, fsPath, callback) {
FileTypeManager.getType(name, fsPath, function(
err,
isBinary,
charset,
bytes
) {
if (err) {
return callback(err)
}
if (isBinary || charset !== 'utf-8' || !bytes) {
return callback(null, true)
}
let data = bytes.toString()
if (data.indexOf('\x00') !== -1) {
return callback(null, true)
}
if (/[\uD800-\uDFFF]/.test(data)) {
// non-BMP characters (high and low surrogate characters)
return callback(null, true)
}
return callback(null, false)
})
},
getExtension(fileName) {
const nameSplit = fileName.split('.')
if (nameSplit.length < 2) {

View file

@ -66,7 +66,7 @@ describe('UpdateMerger :', function() {
describe('mergeUpdate', function() {
describe('doc updates for a new doc', function() {
beforeEach(function() {
this.FileTypeManager.getType = sinon.stub().yields(null, false)
this.FileTypeManager.getStrictType = sinon.stub().yields(null, false)
this.updateMerger.p.processDoc = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
@ -79,7 +79,7 @@ describe('UpdateMerger :', function() {
})
it('should look at the file contents', function() {
return this.FileTypeManager.getType.called.should.equal(true)
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should process update as doc', function() {
@ -101,7 +101,7 @@ describe('UpdateMerger :', function() {
describe('file updates for a new file ', function() {
beforeEach(function() {
this.FileTypeManager.getType = sinon.stub().yields(null, true)
this.FileTypeManager.getStrictType = sinon.stub().yields(null, true)
this.updateMerger.p.processFile = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
@ -114,7 +114,7 @@ describe('UpdateMerger :', function() {
})
it('should look at the file contents', function() {
return this.FileTypeManager.getType.called.should.equal(true)
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should process update as file', function() {
@ -136,7 +136,7 @@ describe('UpdateMerger :', function() {
describe('doc updates for an existing doc', function() {
beforeEach(function() {
this.FileTypeManager.getType = sinon.stub()
this.FileTypeManager.getStrictType = sinon.stub().yields(null, false)
this.updateMerger.p.processDoc = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
@ -148,8 +148,8 @@ describe('UpdateMerger :', function() {
)
})
it('should not look at the file contents', function() {
return this.FileTypeManager.getType.called.should.equal(false)
it('should look at the file contents', function() {
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should process update as doc', function() {
@ -171,7 +171,7 @@ describe('UpdateMerger :', function() {
describe('file updates for an existing file', function() {
beforeEach(function() {
this.FileTypeManager.getType = sinon.stub()
this.FileTypeManager.getStrictType = sinon.stub().yields(null, true)
this.updateMerger.p.processFile = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
@ -183,8 +183,8 @@ describe('UpdateMerger :', function() {
)
})
it('should not look at the file contents', function() {
return this.FileTypeManager.getType.called.should.equal(false)
it('should look at the file contents', function() {
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should process update as file', function() {
@ -205,6 +205,95 @@ describe('UpdateMerger :', function() {
})
})
describe('file updates for an existing doc', function() {
beforeEach(function() {
this.FileTypeManager.getStrictType = sinon
.stub()
.yields(null, true, 'delete-existing-doc')
this.updateMerger.deleteUpdate = sinon.stub().yields()
this.updateMerger.p.processFile = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
this.project_id,
this.existingDocPath,
this.updateRequest,
this.source,
this.callback
)
})
it('should look at the file contents', function() {
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should delete the existing doc', function() {
this.updateMerger.deleteUpdate
.calledWith(
this.user_id,
this.project_id,
this.existingDocPath,
this.source
)
.should.equal(true)
})
it('should process update as file', function() {
return this.updateMerger.p.processFile
.calledWith(
this.project_id,
this.fsPath,
this.existingDocPath,
this.source,
this.user_id
)
.should.equal(true)
})
it('removes the temp file from disk', function() {
return this.fs.unlink.calledWith(this.fsPath).should.equal(true)
})
})
describe('doc updates for an existing file', function() {
beforeEach(function() {
this.FileTypeManager.getStrictType = sinon.stub().yields(null, true)
this.updateMerger.deleteUpdate = sinon.stub().yields()
this.updateMerger.p.processFile = sinon.stub().yields()
return this.updateMerger.mergeUpdate(
this.user_id,
this.project_id,
this.existingFilePath,
this.updateRequest,
this.source,
this.callback
)
})
it('should look at the file contents', function() {
return this.FileTypeManager.getStrictType.called.should.equal(true)
})
it('should not delete the existing file', function() {
this.updateMerger.deleteUpdate.called.should.equal(false)
})
it('should process update as file', function() {
return this.updateMerger.p.processFile
.calledWith(
this.project_id,
this.fsPath,
this.existingFilePath,
this.source,
this.user_id
)
.should.equal(true)
})
it('removes the temp file from disk', function() {
return this.fs.unlink.calledWith(this.fsPath).should.equal(true)
})
})
describe('deleteUpdate', function() {
beforeEach(function() {
this.EditorController.deleteEntityWithPath = sinon.stub().yields()