Fix up duplicate filenames during resync (#5742)

* Fix up duplicate filenames during resync

* Unit test for project history resync with duplicate paths

* Decaf cleanup: unnecessary returns

* Decaf cleanup: remove initClass()

* Decaf cleanup: unused variables

* Decaf cleanup: camel case variables

* Support duplicate folder names when traversing project folders

We have some broken projects that contain duplicate folder names. In
order to fix them, we need to be able to find them in the project. The
use of an object to store folders by path meant that only one of the
duplicate folders would be found.

* Keep docupdater up to date when renaming during a resync

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
GitOrigin-RevId: bf3dcdc4c1d4f5418ff9f5e68b255438620349ca
This commit is contained in:
Brian Gough 2021-11-11 15:10:46 +00:00 committed by Copybot
parent 302a6fddc8
commit aeb2ce7435
4 changed files with 252 additions and 111 deletions

View file

@ -20,15 +20,11 @@ const ProjectEntityHandler = {
} }
ProjectEntityHandler._getAllFolders(projectId, (error, folders) => { ProjectEntityHandler._getAllFolders(projectId, (error, folders) => {
if (folders == null) {
folders = {}
}
if (error != null) { if (error != null) {
return callback(error) return callback(error)
} }
const docs = {} const docs = {}
for (const folderPath in folders) { for (const { path: folderPath, folder } of folders) {
const folder = folders[folderPath]
for (const doc of folder.docs || []) { for (const doc of folder.docs || []) {
const content = docContents[doc._id.toString()] const content = docContents[doc._id.toString()]
if (content != null) { if (content != null) {
@ -49,15 +45,11 @@ const ProjectEntityHandler = {
getAllFiles(projectId, callback) { getAllFiles(projectId, callback) {
ProjectEntityHandler._getAllFolders(projectId, (err, folders) => { ProjectEntityHandler._getAllFolders(projectId, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) { if (err != null) {
return callback(err) return callback(err)
} }
const files = {} const files = {}
for (const folderPath in folders) { for (const { path: folderPath, folder } of folders) {
const folder = folders[folderPath]
for (const file of folder.fileRefs || []) { for (const file of folder.fileRefs || []) {
if (file != null) { if (file != null) {
files[path.join(folderPath, file.name)] = file files[path.join(folderPath, file.name)] = file
@ -83,16 +75,12 @@ const ProjectEntityHandler = {
getAllEntitiesFromProject(project, callback) { getAllEntitiesFromProject(project, callback) {
ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => { ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) { if (err != null) {
return callback(err) return callback(err)
} }
const docs = [] const docs = []
const files = [] const files = []
for (const folderPath in folders) { for (const { path: folderPath, folder } of folders) {
const folder = folders[folderPath]
for (const doc of folder.docs || []) { for (const doc of folder.docs || []) {
if (doc != null) { if (doc != null) {
docs.push({ path: path.join(folderPath, doc.name), doc }) docs.push({ path: path.join(folderPath, doc.name), doc })
@ -122,15 +110,11 @@ const ProjectEntityHandler = {
getAllDocPathsFromProject(project, callback) { getAllDocPathsFromProject(project, callback) {
ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => { ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) { if (err != null) {
return callback(err) return callback(err)
} }
const docPath = {} const docPath = {}
for (const folderPath in folders) { for (const { path: folderPath, folder } of folders) {
const folder = folders[folderPath]
for (const doc of folder.docs || []) { for (const doc of folder.docs || []) {
docPath[doc._id] = path.join(folderPath, doc.name) docPath[doc._id] = path.join(folderPath, doc.name)
} }
@ -226,9 +210,9 @@ const ProjectEntityHandler = {
}, },
_getAllFoldersFromProject(project, callback) { _getAllFoldersFromProject(project, callback) {
const folders = {} const folders = []
function processFolder(basePath, folder) { function processFolder(basePath, folder) {
folders[basePath] = folder folders.push({ path: basePath, folder })
for (const childFolder of folder.folders || []) { for (const childFolder of folder.folders || []) {
if (childFolder.name != null) { if (childFolder.name != null) {
processFolder(path.join(basePath, childFolder.name), childFolder) processFolder(path.join(basePath, childFolder.name), childFolder)

View file

@ -1355,25 +1355,34 @@ const ProjectEntityUpdateHandler = {
if (error != null) { if (error != null) {
return callback(error) return callback(error)
} }
ProjectEntityUpdateHandler._checkFiletree(
docs = _.map(docs, doc => ({
doc: doc.doc._id,
path: doc.path,
}))
files = _.map(files, file => ({
file: file.file._id,
path: file.path,
url: FileStoreHandler._buildUrl(projectId, file.file._id),
_hash: file.file.hash,
}))
DocumentUpdaterHandler.resyncProjectHistory(
projectId, projectId,
projectHistoryId, projectHistoryId,
docs, [...docs, ...files],
files, error => {
callback if (error) {
return callback(error)
}
docs = _.map(docs, doc => ({
doc: doc.doc._id,
path: doc.path,
}))
files = _.map(files, file => ({
file: file.file._id,
path: file.path,
url: FileStoreHandler._buildUrl(projectId, file.file._id),
_hash: file.file.hash,
}))
DocumentUpdaterHandler.resyncProjectHistory(
projectId,
projectHistoryId,
docs,
files,
callback
)
}
) )
} }
) )
@ -1381,6 +1390,91 @@ const ProjectEntityUpdateHandler = {
) )
), ),
_checkFiletree(projectId, projectHistoryId, entities, callback) {
const renames = []
const paths = new Map()
for (const entity of entities) {
// if the path is invalid (i.e. not going to be accepted in
// project-history due to filename rules) we could also flag it for a
// rename here.
if (paths.has(entity.path)) {
renames.push(entity)
} else {
paths.set(entity.path, entity)
}
}
if (renames.length === 0) {
return callback()
}
logger.warn({ projectId, renames }, 'found conflict in filetree')
// find new names for duplicate files
for (const entity of renames) {
const {
newPath,
newName,
} = ProjectEntityUpdateHandler.findNextAvailableName(paths, entity)
logger.debug({ projectId, newName, newPath }, 'found new name')
entity.newPath = newPath
entity.newName = newName
}
// rename the duplicate files
const doRename = (entity, cb) => {
const entityId = entity.doc ? entity.doc._id : entity.file._id
const entityType = entity.doc ? 'doc' : 'file'
ProjectEntityMongoUpdateHandler.renameEntity(
projectId,
entityId,
entityType,
entity.newName,
(err, project, startPath, endPath, rev, changes) => {
if (err) {
return cb(err)
}
// update the renamed entity for the resync
entity.path = entity.newPath
if (entityType === 'doc') {
entity.doc.name = entity.newName
} else {
entity.file.name = entity.newName
}
delete entity.newPath
delete entity.newname
DocumentUpdaterHandler.updateProjectStructure(
projectId,
projectHistoryId,
null,
changes,
cb
)
}
)
}
async.eachSeries(renames, doRename, callback)
},
findNextAvailableName(allPaths, entity) {
const incrementReplacer = (match, p1) => {
return ' (' + (parseInt(p1, 10) + 1) + ')'
}
let candidatePath = entity.path
// if the filename was invalid we should normalise it here too. Currently
// this only handles renames in the same folder, so we will be out of luck
// if it is the folder name which in invalid. We could handle folder
// renames by returning the folders list from getAllEntitiesFromProject
do {
// does the filename look like "foo (1)" if so, increment the number in parentheses
if (/ \(\d+\)$/.test(candidatePath)) {
candidatePath = candidatePath.replace(/ \((\d+)\)$/, incrementReplacer)
} else {
// otherwise, add a ' (1)' suffix to the name
candidatePath = candidatePath + ' (1)'
}
} while (allPaths.has(candidatePath)) // keep going until the name is unique
// add the new name to the set
allPaths.set(candidatePath, entity)
return { newPath: candidatePath, newName: candidatePath.split('/').pop() }
},
isPathValidForRootDoc(docPath) { isPathValidForRootDoc(docPath) {
const docExtension = Path.extname(docPath) const docExtension = Path.extname(docPath)
return VALID_ROOT_DOC_REGEXP.test(docExtension) return VALID_ROOT_DOC_REGEXP.test(docExtension)

View file

@ -1,53 +1,26 @@
/* eslint-disable const { expect } = require('chai')
camelcase,
max-len,
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS206: Consider reworking classes to avoid initClass
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { assert, expect } = require('chai')
const sinon = require('sinon') const sinon = require('sinon')
const modulePath = '../../../../app/src/Features/Project/ProjectEntityHandler' const modulePath = '../../../../app/src/Features/Project/ProjectEntityHandler'
const SandboxedModule = require('sandboxed-module') const SandboxedModule = require('sandboxed-module')
const Errors = require('../../../../app/src/Features/Errors/Errors') const Errors = require('../../../../app/src/Features/Errors/Errors')
describe('ProjectEntityHandler', function () { describe('ProjectEntityHandler', function () {
const project_id = '4eecb1c1bffa66588e0000a1' const projectId = '4eecb1c1bffa66588e0000a1'
const doc_id = '4eecb1c1bffa66588e0000a2' const docId = '4eecb1c1bffa66588e0000a2'
const folder_id = '4eecaffcbffa66588e000008'
const rootFolderId = '4eecaffcbffa66588e000007'
const userId = 1234
beforeEach(function () { beforeEach(function () {
let Project
this.TpdsUpdateSender = { this.TpdsUpdateSender = {
addDoc: sinon.stub().callsArg(1), addDoc: sinon.stub().callsArg(1),
addFile: sinon.stub().callsArg(1), addFile: sinon.stub().callsArg(1),
} }
this.ProjectModel = Project = (function () { this.ProjectModel = class Project {
Project = class Project { constructor(options) {
static initClass() { this._id = projectId
this.prototype.rootFolder = [this.rootFolder] this.name = 'project_name_here'
} this.rev = 0
this.rootFolder = [this.rootFolder]
constructor(options) {
this._id = project_id
this.name = 'project_name_here'
this.rev = 0
}
} }
Project.initClass() }
return Project
})()
this.project = new this.ProjectModel() this.project = new this.ProjectModel()
this.ProjectLocator = { findElement: sinon.stub() } this.ProjectLocator = { findElement: sinon.stub() }
@ -56,7 +29,7 @@ describe('ProjectEntityHandler', function () {
} }
this.callback = sinon.stub() this.callback = sinon.stub()
return (this.ProjectEntityHandler = SandboxedModule.require(modulePath, { this.ProjectEntityHandler = SandboxedModule.require(modulePath, {
requires: { requires: {
'../Docstore/DocstoreManager': (this.DocstoreManager = {}), '../Docstore/DocstoreManager': (this.DocstoreManager = {}),
'../../Features/DocumentUpdater/DocumentUpdaterHandler': this '../../Features/DocumentUpdater/DocumentUpdaterHandler': this
@ -68,7 +41,7 @@ describe('ProjectEntityHandler', function () {
'./ProjectGetter': (this.ProjectGetter = {}), './ProjectGetter': (this.ProjectGetter = {}),
'../ThirdPartyDataStore/TpdsUpdateSender': this.TpdsUpdateSender, '../ThirdPartyDataStore/TpdsUpdateSender': this.TpdsUpdateSender,
}, },
})) })
}) })
describe('getting folders, docs and files', function () { describe('getting folders, docs and files', function () {
@ -131,13 +104,11 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getAllDocs = sinon this.DocstoreManager.getAllDocs = sinon
.stub() .stub()
.callsArgWith(1, null, this.docs) .callsArgWith(1, null, this.docs)
this.ProjectEntityHandler.getAllDocs(project_id, this.callback) this.ProjectEntityHandler.getAllDocs(projectId, this.callback)
}) })
it('should get the doc lines and rev from the docstore', function () { it('should get the doc lines and rev from the docstore', function () {
this.DocstoreManager.getAllDocs this.DocstoreManager.getAllDocs.calledWith(projectId).should.equal(true)
.calledWith(project_id)
.should.equal(true)
}) })
it('should call the callback with the docs with the lines and rev included', function () { it('should call the callback with the docs with the lines and rev included', function () {
@ -163,7 +134,7 @@ describe('ProjectEntityHandler', function () {
describe('getAllFiles', function () { describe('getAllFiles', function () {
beforeEach(function () { beforeEach(function () {
this.callback = sinon.stub() this.callback = sinon.stub()
this.ProjectEntityHandler.getAllFiles(project_id, this.callback) this.ProjectEntityHandler.getAllFiles(projectId, this.callback)
}) })
it('should call the callback with the files', function () { it('should call the callback with the files', function () {
@ -197,7 +168,7 @@ describe('ProjectEntityHandler', function () {
) )
}) })
it('should call the callback with the path for each doc_id', function () { it('should call the callback with the path for each docId', function () {
this.expected = {} this.expected = {}
this.expected[this.doc1._id] = `/${this.doc1.name}` this.expected[this.doc1._id] = `/${this.doc1.name}`
this.expected[this.doc2._id] = `/folder1/${this.doc2.name}` this.expected[this.doc2._id] = `/folder1/${this.doc2.name}`
@ -211,7 +182,7 @@ describe('ProjectEntityHandler', function () {
}) })
it('should call the callback with the path for an existing doc id at the root level', function () { it('should call the callback with the path for an existing doc id at the root level', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id, projectId,
this.doc1._id, this.doc1._id,
this.callback this.callback
) )
@ -220,7 +191,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with the path for an existing doc id nested within a folder', function () { it('should call the callback with the path for an existing doc id nested within a folder', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id, projectId,
this.doc2._id, this.doc2._id,
this.callback this.callback
) )
@ -231,7 +202,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with a NotFoundError for a non-existing doc', function () { it('should call the callback with a NotFoundError for a non-existing doc', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id, projectId,
'non-existing-id', 'non-existing-id',
this.callback this.callback
) )
@ -242,7 +213,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with a NotFoundError for an existing file', function () { it('should call the callback with a NotFoundError for an existing file', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId( this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id, projectId,
this.file1._id, this.file1._id,
this.callback this.callback
) )
@ -255,21 +226,21 @@ describe('ProjectEntityHandler', function () {
describe('_getAllFolders', function () { describe('_getAllFolders', function () {
beforeEach(function () { beforeEach(function () {
this.callback = sinon.stub() this.callback = sinon.stub()
this.ProjectEntityHandler._getAllFolders(project_id, this.callback) this.ProjectEntityHandler._getAllFolders(projectId, this.callback)
}) })
it('should get the project without the docs lines', function () { it('should get the project without the docs lines', function () {
this.ProjectGetter.getProjectWithoutDocLines this.ProjectGetter.getProjectWithoutDocLines
.calledWith(project_id) .calledWith(projectId)
.should.equal(true) .should.equal(true)
}) })
it('should call the callback with the folders', function () { it('should call the callback with the folders', function () {
this.callback this.callback
.calledWith(null, { .calledWith(null, [
'/': this.project.rootFolder[0], { path: '/', folder: this.project.rootFolder[0] },
'/folder1': this.folder1, { path: '/folder1', folder: this.folder1 },
}) ])
.should.equal(true) .should.equal(true)
}) })
}) })
@ -285,10 +256,10 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with the folders', function () { it('should call the callback with the folders', function () {
this.callback this.callback
.calledWith(null, { .calledWith(null, [
'/': this.project.rootFolder[0], { path: '/', folder: this.project.rootFolder[0] },
'/folder1': this.folder1, { path: '/folder1', folder: this.folder1 },
}) ])
.should.equal(true) .should.equal(true)
}) })
}) })
@ -304,12 +275,12 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getDoc = sinon this.DocstoreManager.getDoc = sinon
.stub() .stub()
.callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges) .callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges)
this.ProjectEntityHandler.getDoc(project_id, doc_id, this.callback) this.ProjectEntityHandler.getDoc(projectId, docId, this.callback)
}) })
it('should call the docstore', function () { it('should call the docstore', function () {
this.DocstoreManager.getDoc this.DocstoreManager.getDoc
.calledWith(project_id, doc_id) .calledWith(projectId, docId)
.should.equal(true) .should.equal(true)
}) })
@ -332,15 +303,12 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getDoc = sinon this.DocstoreManager.getDoc = sinon
.stub() .stub()
.callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges) .callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges)
result = await this.ProjectEntityHandler.promises.getDoc( result = await this.ProjectEntityHandler.promises.getDoc(projectId, docId)
project_id,
doc_id
)
}) })
it('should call the docstore', function () { it('should call the docstore', function () {
this.DocstoreManager.getDoc this.DocstoreManager.getDoc
.calledWith(project_id, doc_id) .calledWith(projectId, docId)
.should.equal(true) .should.equal(true)
}) })

View file

@ -78,7 +78,7 @@ describe('ProjectEntityUpdateHandler', function () {
flushDocToMongo: sinon.stub().yields(), flushDocToMongo: sinon.stub().yields(),
updateProjectStructure: sinon.stub().yields(), updateProjectStructure: sinon.stub().yields(),
setDocument: sinon.stub(), setDocument: sinon.stub(),
resyncProjectHistory: sinon.stub(), resyncProjectHistory: sinon.stub().yields(),
deleteDoc: sinon.stub().yields(), deleteDoc: sinon.stub().yields(),
} }
this.fs = { this.fs = {
@ -118,7 +118,7 @@ describe('ProjectEntityUpdateHandler', function () {
replaceFileWithNew: sinon.stub(), replaceFileWithNew: sinon.stub(),
mkdirp: sinon.stub(), mkdirp: sinon.stub(),
moveEntity: sinon.stub(), moveEntity: sinon.stub(),
renameEntity: sinon.stub(), renameEntity: sinon.stub().yields(),
deleteEntity: sinon.stub(), deleteEntity: sinon.stub(),
replaceDocWithFile: sinon.stub(), replaceDocWithFile: sinon.stub(),
replaceFileWithDoc: sinon.stub(), replaceFileWithDoc: sinon.stub(),
@ -136,6 +136,11 @@ describe('ProjectEntityUpdateHandler', function () {
copyFile: sinon.stub(), copyFile: sinon.stub(),
uploadFileFromDisk: sinon.stub(), uploadFileFromDisk: sinon.stub(),
deleteFile: sinon.stub(), deleteFile: sinon.stub(),
_buildUrl: sinon
.stub()
.callsFake(
(projectId, fileId) => `www.filestore.test/${projectId}/${fileId}`
),
} }
this.FileWriter = { this.FileWriter = {
writeLinesToDisk: sinon.stub(), writeLinesToDisk: sinon.stub(),
@ -1940,10 +1945,6 @@ describe('ProjectEntityUpdateHandler', function () {
docs, docs,
files files
) )
this.FileStoreHandler._buildUrl = (projectId, fileId) =>
`www.filestore.test/${projectId}/${fileId}`
this.DocumentUpdaterHandler.resyncProjectHistory.yields()
this.ProjectEntityUpdateHandler.resyncProjectHistory( this.ProjectEntityUpdateHandler.resyncProjectHistory(
projectId, projectId,
this.callback this.callback
@ -1984,6 +1985,100 @@ describe('ProjectEntityUpdateHandler', function () {
this.callback.called.should.equal(true) this.callback.called.should.equal(true)
}) })
}) })
describe('a project with duplicate filenames', function () {
beforeEach(function (done) {
this.ProjectGetter.getProject.yields(null, this.project)
this.docs = [
{ doc: { _id: 'doc1' }, path: 'main.tex' },
{ doc: { _id: 'doc2' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc3' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc4' }, path: 'another dupe (22)' },
{ doc: { _id: 'doc5' }, path: 'a/b/c/duplicate.tex' },
]
this.files = [
{ file: { _id: 'file1', hash: 'hash1' }, path: 'image.jpg' },
{ file: { _id: 'file2', hash: 'hash2' }, path: 'duplicate.jpg' },
{ file: { _id: 'file3', hash: 'hash3' }, path: 'duplicate.jpg' },
{ file: { _id: 'file4', hash: 'hash4' }, path: 'another dupe (22)' },
]
this.ProjectEntityHandler.getAllEntitiesFromProject.yields(
null,
this.docs,
this.files
)
this.ProjectEntityUpdateHandler.resyncProjectHistory(projectId, done)
})
it('renames the duplicate files', function () {
const renameEntity = this.ProjectEntityMongoUpdateHandler.renameEntity
expect(renameEntity).to.have.callCount(4)
expect(renameEntity).to.have.been.calledWith(
projectId,
'doc3',
'doc',
'duplicate.tex (1)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'doc5',
'doc',
'duplicate.tex (2)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'file3',
'file',
'duplicate.jpg (1)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'file4',
'file',
'another dupe (23)'
)
})
it('tells the doc updater to resync the project', function () {
const docs = [
{ doc: 'doc1', path: 'main.tex' },
{ doc: 'doc2', path: 'a/b/c/duplicate.tex' },
{ doc: 'doc3', path: 'a/b/c/duplicate.tex (1)' },
{ doc: 'doc4', path: 'another dupe (22)' },
{ doc: 'doc5', path: 'a/b/c/duplicate.tex (2)' },
]
const urlPrefix = `www.filestore.test/${projectId}`
const files = [
{
file: 'file1',
path: 'image.jpg',
url: `${urlPrefix}/file1`,
_hash: 'hash1',
},
{
file: 'file2',
path: 'duplicate.jpg',
url: `${urlPrefix}/file2`,
_hash: 'hash2',
},
{
file: 'file3',
path: 'duplicate.jpg (1)',
url: `${urlPrefix}/file3`,
_hash: 'hash3',
},
{
file: 'file4',
path: 'another dupe (23)',
url: `${urlPrefix}/file4`,
_hash: 'hash4',
},
]
expect(
this.DocumentUpdaterHandler.resyncProjectHistory
).to.have.been.calledWith(projectId, projectHistoryId, docs, files)
})
})
}) })
describe('_cleanUpEntity', function () { describe('_cleanUpEntity', function () {