Fix up duplicate filenames during resync (#5742)

* Fix up duplicate filenames during resync

* Unit test for project history resync with duplicate paths

* Decaf cleanup: unnecessary returns

* Decaf cleanup: remove initClass()

* Decaf cleanup: unused variables

* Decaf cleanup: camel case variables

* Support duplicate folder names when traversing project folders

We have some broken projects that contain duplicate folder names. In
order to fix them, we need to be able to find them in the project. The
use of an object to store folders by path meant that only one of the
duplicate folders would be found.

* Keep docupdater up to date when renaming during a resync

Co-authored-by: Eric Mc Sween <eric.mcsween@overleaf.com>
GitOrigin-RevId: bf3dcdc4c1d4f5418ff9f5e68b255438620349ca
This commit is contained in:
Brian Gough 2021-11-11 15:10:46 +00:00 committed by Copybot
parent 302a6fddc8
commit aeb2ce7435
4 changed files with 252 additions and 111 deletions

View file

@ -20,15 +20,11 @@ const ProjectEntityHandler = {
}
ProjectEntityHandler._getAllFolders(projectId, (error, folders) => {
if (folders == null) {
folders = {}
}
if (error != null) {
return callback(error)
}
const docs = {}
for (const folderPath in folders) {
const folder = folders[folderPath]
for (const { path: folderPath, folder } of folders) {
for (const doc of folder.docs || []) {
const content = docContents[doc._id.toString()]
if (content != null) {
@ -49,15 +45,11 @@ const ProjectEntityHandler = {
getAllFiles(projectId, callback) {
ProjectEntityHandler._getAllFolders(projectId, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) {
return callback(err)
}
const files = {}
for (const folderPath in folders) {
const folder = folders[folderPath]
for (const { path: folderPath, folder } of folders) {
for (const file of folder.fileRefs || []) {
if (file != null) {
files[path.join(folderPath, file.name)] = file
@ -83,16 +75,12 @@ const ProjectEntityHandler = {
getAllEntitiesFromProject(project, callback) {
ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) {
return callback(err)
}
const docs = []
const files = []
for (const folderPath in folders) {
const folder = folders[folderPath]
for (const { path: folderPath, folder } of folders) {
for (const doc of folder.docs || []) {
if (doc != null) {
docs.push({ path: path.join(folderPath, doc.name), doc })
@ -122,15 +110,11 @@ const ProjectEntityHandler = {
getAllDocPathsFromProject(project, callback) {
ProjectEntityHandler._getAllFoldersFromProject(project, (err, folders) => {
if (folders == null) {
folders = {}
}
if (err != null) {
return callback(err)
}
const docPath = {}
for (const folderPath in folders) {
const folder = folders[folderPath]
for (const { path: folderPath, folder } of folders) {
for (const doc of folder.docs || []) {
docPath[doc._id] = path.join(folderPath, doc.name)
}
@ -226,9 +210,9 @@ const ProjectEntityHandler = {
},
_getAllFoldersFromProject(project, callback) {
const folders = {}
const folders = []
function processFolder(basePath, folder) {
folders[basePath] = folder
folders.push({ path: basePath, folder })
for (const childFolder of folder.folders || []) {
if (childFolder.name != null) {
processFolder(path.join(basePath, childFolder.name), childFolder)

View file

@ -1355,25 +1355,34 @@ const ProjectEntityUpdateHandler = {
if (error != null) {
return callback(error)
}
docs = _.map(docs, doc => ({
doc: doc.doc._id,
path: doc.path,
}))
files = _.map(files, file => ({
file: file.file._id,
path: file.path,
url: FileStoreHandler._buildUrl(projectId, file.file._id),
_hash: file.file.hash,
}))
DocumentUpdaterHandler.resyncProjectHistory(
ProjectEntityUpdateHandler._checkFiletree(
projectId,
projectHistoryId,
docs,
files,
callback
[...docs, ...files],
error => {
if (error) {
return callback(error)
}
docs = _.map(docs, doc => ({
doc: doc.doc._id,
path: doc.path,
}))
files = _.map(files, file => ({
file: file.file._id,
path: file.path,
url: FileStoreHandler._buildUrl(projectId, file.file._id),
_hash: file.file.hash,
}))
DocumentUpdaterHandler.resyncProjectHistory(
projectId,
projectHistoryId,
docs,
files,
callback
)
}
)
}
)
@ -1381,6 +1390,91 @@ const ProjectEntityUpdateHandler = {
)
),
_checkFiletree(projectId, projectHistoryId, entities, callback) {
const renames = []
const paths = new Map()
for (const entity of entities) {
// if the path is invalid (i.e. not going to be accepted in
// project-history due to filename rules) we could also flag it for a
// rename here.
if (paths.has(entity.path)) {
renames.push(entity)
} else {
paths.set(entity.path, entity)
}
}
if (renames.length === 0) {
return callback()
}
logger.warn({ projectId, renames }, 'found conflict in filetree')
// find new names for duplicate files
for (const entity of renames) {
const {
newPath,
newName,
} = ProjectEntityUpdateHandler.findNextAvailableName(paths, entity)
logger.debug({ projectId, newName, newPath }, 'found new name')
entity.newPath = newPath
entity.newName = newName
}
// rename the duplicate files
const doRename = (entity, cb) => {
const entityId = entity.doc ? entity.doc._id : entity.file._id
const entityType = entity.doc ? 'doc' : 'file'
ProjectEntityMongoUpdateHandler.renameEntity(
projectId,
entityId,
entityType,
entity.newName,
(err, project, startPath, endPath, rev, changes) => {
if (err) {
return cb(err)
}
// update the renamed entity for the resync
entity.path = entity.newPath
if (entityType === 'doc') {
entity.doc.name = entity.newName
} else {
entity.file.name = entity.newName
}
delete entity.newPath
delete entity.newname
DocumentUpdaterHandler.updateProjectStructure(
projectId,
projectHistoryId,
null,
changes,
cb
)
}
)
}
async.eachSeries(renames, doRename, callback)
},
findNextAvailableName(allPaths, entity) {
const incrementReplacer = (match, p1) => {
return ' (' + (parseInt(p1, 10) + 1) + ')'
}
let candidatePath = entity.path
// if the filename was invalid we should normalise it here too. Currently
// this only handles renames in the same folder, so we will be out of luck
// if it is the folder name which in invalid. We could handle folder
// renames by returning the folders list from getAllEntitiesFromProject
do {
// does the filename look like "foo (1)" if so, increment the number in parentheses
if (/ \(\d+\)$/.test(candidatePath)) {
candidatePath = candidatePath.replace(/ \((\d+)\)$/, incrementReplacer)
} else {
// otherwise, add a ' (1)' suffix to the name
candidatePath = candidatePath + ' (1)'
}
} while (allPaths.has(candidatePath)) // keep going until the name is unique
// add the new name to the set
allPaths.set(candidatePath, entity)
return { newPath: candidatePath, newName: candidatePath.split('/').pop() }
},
isPathValidForRootDoc(docPath) {
const docExtension = Path.extname(docPath)
return VALID_ROOT_DOC_REGEXP.test(docExtension)

View file

@ -1,53 +1,26 @@
/* eslint-disable
camelcase,
max-len,
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS206: Consider reworking classes to avoid initClass
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { assert, expect } = require('chai')
const { expect } = require('chai')
const sinon = require('sinon')
const modulePath = '../../../../app/src/Features/Project/ProjectEntityHandler'
const SandboxedModule = require('sandboxed-module')
const Errors = require('../../../../app/src/Features/Errors/Errors')
describe('ProjectEntityHandler', function () {
const project_id = '4eecb1c1bffa66588e0000a1'
const doc_id = '4eecb1c1bffa66588e0000a2'
const folder_id = '4eecaffcbffa66588e000008'
const rootFolderId = '4eecaffcbffa66588e000007'
const userId = 1234
const projectId = '4eecb1c1bffa66588e0000a1'
const docId = '4eecb1c1bffa66588e0000a2'
beforeEach(function () {
let Project
this.TpdsUpdateSender = {
addDoc: sinon.stub().callsArg(1),
addFile: sinon.stub().callsArg(1),
}
this.ProjectModel = Project = (function () {
Project = class Project {
static initClass() {
this.prototype.rootFolder = [this.rootFolder]
}
constructor(options) {
this._id = project_id
this.name = 'project_name_here'
this.rev = 0
}
this.ProjectModel = class Project {
constructor(options) {
this._id = projectId
this.name = 'project_name_here'
this.rev = 0
this.rootFolder = [this.rootFolder]
}
Project.initClass()
return Project
})()
}
this.project = new this.ProjectModel()
this.ProjectLocator = { findElement: sinon.stub() }
@ -56,7 +29,7 @@ describe('ProjectEntityHandler', function () {
}
this.callback = sinon.stub()
return (this.ProjectEntityHandler = SandboxedModule.require(modulePath, {
this.ProjectEntityHandler = SandboxedModule.require(modulePath, {
requires: {
'../Docstore/DocstoreManager': (this.DocstoreManager = {}),
'../../Features/DocumentUpdater/DocumentUpdaterHandler': this
@ -68,7 +41,7 @@ describe('ProjectEntityHandler', function () {
'./ProjectGetter': (this.ProjectGetter = {}),
'../ThirdPartyDataStore/TpdsUpdateSender': this.TpdsUpdateSender,
},
}))
})
})
describe('getting folders, docs and files', function () {
@ -131,13 +104,11 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getAllDocs = sinon
.stub()
.callsArgWith(1, null, this.docs)
this.ProjectEntityHandler.getAllDocs(project_id, this.callback)
this.ProjectEntityHandler.getAllDocs(projectId, this.callback)
})
it('should get the doc lines and rev from the docstore', function () {
this.DocstoreManager.getAllDocs
.calledWith(project_id)
.should.equal(true)
this.DocstoreManager.getAllDocs.calledWith(projectId).should.equal(true)
})
it('should call the callback with the docs with the lines and rev included', function () {
@ -163,7 +134,7 @@ describe('ProjectEntityHandler', function () {
describe('getAllFiles', function () {
beforeEach(function () {
this.callback = sinon.stub()
this.ProjectEntityHandler.getAllFiles(project_id, this.callback)
this.ProjectEntityHandler.getAllFiles(projectId, this.callback)
})
it('should call the callback with the files', function () {
@ -197,7 +168,7 @@ describe('ProjectEntityHandler', function () {
)
})
it('should call the callback with the path for each doc_id', function () {
it('should call the callback with the path for each docId', function () {
this.expected = {}
this.expected[this.doc1._id] = `/${this.doc1.name}`
this.expected[this.doc2._id] = `/folder1/${this.doc2.name}`
@ -211,7 +182,7 @@ describe('ProjectEntityHandler', function () {
})
it('should call the callback with the path for an existing doc id at the root level', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id,
projectId,
this.doc1._id,
this.callback
)
@ -220,7 +191,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with the path for an existing doc id nested within a folder', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id,
projectId,
this.doc2._id,
this.callback
)
@ -231,7 +202,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with a NotFoundError for a non-existing doc', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id,
projectId,
'non-existing-id',
this.callback
)
@ -242,7 +213,7 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with a NotFoundError for an existing file', function () {
this.ProjectEntityHandler.getDocPathByProjectIdAndDocId(
project_id,
projectId,
this.file1._id,
this.callback
)
@ -255,21 +226,21 @@ describe('ProjectEntityHandler', function () {
describe('_getAllFolders', function () {
beforeEach(function () {
this.callback = sinon.stub()
this.ProjectEntityHandler._getAllFolders(project_id, this.callback)
this.ProjectEntityHandler._getAllFolders(projectId, this.callback)
})
it('should get the project without the docs lines', function () {
this.ProjectGetter.getProjectWithoutDocLines
.calledWith(project_id)
.calledWith(projectId)
.should.equal(true)
})
it('should call the callback with the folders', function () {
this.callback
.calledWith(null, {
'/': this.project.rootFolder[0],
'/folder1': this.folder1,
})
.calledWith(null, [
{ path: '/', folder: this.project.rootFolder[0] },
{ path: '/folder1', folder: this.folder1 },
])
.should.equal(true)
})
})
@ -285,10 +256,10 @@ describe('ProjectEntityHandler', function () {
it('should call the callback with the folders', function () {
this.callback
.calledWith(null, {
'/': this.project.rootFolder[0],
'/folder1': this.folder1,
})
.calledWith(null, [
{ path: '/', folder: this.project.rootFolder[0] },
{ path: '/folder1', folder: this.folder1 },
])
.should.equal(true)
})
})
@ -304,12 +275,12 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getDoc = sinon
.stub()
.callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges)
this.ProjectEntityHandler.getDoc(project_id, doc_id, this.callback)
this.ProjectEntityHandler.getDoc(projectId, docId, this.callback)
})
it('should call the docstore', function () {
this.DocstoreManager.getDoc
.calledWith(project_id, doc_id)
.calledWith(projectId, docId)
.should.equal(true)
})
@ -332,15 +303,12 @@ describe('ProjectEntityHandler', function () {
this.DocstoreManager.getDoc = sinon
.stub()
.callsArgWith(3, null, this.lines, this.rev, this.version, this.ranges)
result = await this.ProjectEntityHandler.promises.getDoc(
project_id,
doc_id
)
result = await this.ProjectEntityHandler.promises.getDoc(projectId, docId)
})
it('should call the docstore', function () {
this.DocstoreManager.getDoc
.calledWith(project_id, doc_id)
.calledWith(projectId, docId)
.should.equal(true)
})

View file

@ -78,7 +78,7 @@ describe('ProjectEntityUpdateHandler', function () {
flushDocToMongo: sinon.stub().yields(),
updateProjectStructure: sinon.stub().yields(),
setDocument: sinon.stub(),
resyncProjectHistory: sinon.stub(),
resyncProjectHistory: sinon.stub().yields(),
deleteDoc: sinon.stub().yields(),
}
this.fs = {
@ -118,7 +118,7 @@ describe('ProjectEntityUpdateHandler', function () {
replaceFileWithNew: sinon.stub(),
mkdirp: sinon.stub(),
moveEntity: sinon.stub(),
renameEntity: sinon.stub(),
renameEntity: sinon.stub().yields(),
deleteEntity: sinon.stub(),
replaceDocWithFile: sinon.stub(),
replaceFileWithDoc: sinon.stub(),
@ -136,6 +136,11 @@ describe('ProjectEntityUpdateHandler', function () {
copyFile: sinon.stub(),
uploadFileFromDisk: sinon.stub(),
deleteFile: sinon.stub(),
_buildUrl: sinon
.stub()
.callsFake(
(projectId, fileId) => `www.filestore.test/${projectId}/${fileId}`
),
}
this.FileWriter = {
writeLinesToDisk: sinon.stub(),
@ -1940,10 +1945,6 @@ describe('ProjectEntityUpdateHandler', function () {
docs,
files
)
this.FileStoreHandler._buildUrl = (projectId, fileId) =>
`www.filestore.test/${projectId}/${fileId}`
this.DocumentUpdaterHandler.resyncProjectHistory.yields()
this.ProjectEntityUpdateHandler.resyncProjectHistory(
projectId,
this.callback
@ -1984,6 +1985,100 @@ describe('ProjectEntityUpdateHandler', function () {
this.callback.called.should.equal(true)
})
})
describe('a project with duplicate filenames', function () {
beforeEach(function (done) {
this.ProjectGetter.getProject.yields(null, this.project)
this.docs = [
{ doc: { _id: 'doc1' }, path: 'main.tex' },
{ doc: { _id: 'doc2' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc3' }, path: 'a/b/c/duplicate.tex' },
{ doc: { _id: 'doc4' }, path: 'another dupe (22)' },
{ doc: { _id: 'doc5' }, path: 'a/b/c/duplicate.tex' },
]
this.files = [
{ file: { _id: 'file1', hash: 'hash1' }, path: 'image.jpg' },
{ file: { _id: 'file2', hash: 'hash2' }, path: 'duplicate.jpg' },
{ file: { _id: 'file3', hash: 'hash3' }, path: 'duplicate.jpg' },
{ file: { _id: 'file4', hash: 'hash4' }, path: 'another dupe (22)' },
]
this.ProjectEntityHandler.getAllEntitiesFromProject.yields(
null,
this.docs,
this.files
)
this.ProjectEntityUpdateHandler.resyncProjectHistory(projectId, done)
})
it('renames the duplicate files', function () {
const renameEntity = this.ProjectEntityMongoUpdateHandler.renameEntity
expect(renameEntity).to.have.callCount(4)
expect(renameEntity).to.have.been.calledWith(
projectId,
'doc3',
'doc',
'duplicate.tex (1)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'doc5',
'doc',
'duplicate.tex (2)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'file3',
'file',
'duplicate.jpg (1)'
)
expect(renameEntity).to.have.been.calledWith(
projectId,
'file4',
'file',
'another dupe (23)'
)
})
it('tells the doc updater to resync the project', function () {
const docs = [
{ doc: 'doc1', path: 'main.tex' },
{ doc: 'doc2', path: 'a/b/c/duplicate.tex' },
{ doc: 'doc3', path: 'a/b/c/duplicate.tex (1)' },
{ doc: 'doc4', path: 'another dupe (22)' },
{ doc: 'doc5', path: 'a/b/c/duplicate.tex (2)' },
]
const urlPrefix = `www.filestore.test/${projectId}`
const files = [
{
file: 'file1',
path: 'image.jpg',
url: `${urlPrefix}/file1`,
_hash: 'hash1',
},
{
file: 'file2',
path: 'duplicate.jpg',
url: `${urlPrefix}/file2`,
_hash: 'hash2',
},
{
file: 'file3',
path: 'duplicate.jpg (1)',
url: `${urlPrefix}/file3`,
_hash: 'hash3',
},
{
file: 'file4',
path: 'another dupe (23)',
url: `${urlPrefix}/file4`,
_hash: 'hash4',
},
]
expect(
this.DocumentUpdaterHandler.resyncProjectHistory
).to.have.been.calledWith(projectId, projectHistoryId, docs, files)
})
})
})
describe('_cleanUpEntity', function () {