mirror of
https://github.com/overleaf/overleaf.git
synced 2025-04-22 20:59:53 +00:00
Merge pull request #22091 from overleaf/jpa-bad-file-trees
[history-v1] back_fill_file_hash: gracefully handle bad file-trees GitOrigin-RevId: 0419c06b1ccf827f4c6c5170978a38659435f26f
This commit is contained in:
parent
2b1df12052
commit
838ae23b52
2 changed files with 57 additions and 11 deletions
|
@ -198,6 +198,7 @@ const STATS = {
|
|||
filesRetries: 0,
|
||||
filesFailed: 0,
|
||||
fileTreeUpdated: 0,
|
||||
badFileTrees: 0,
|
||||
globalBlobsCount: 0,
|
||||
globalBlobsEgress: 0,
|
||||
projectDeleted: 0,
|
||||
|
@ -844,7 +845,6 @@ function* findFileInBatch(
|
|||
projectBlobs,
|
||||
projectBackedUpBlobs
|
||||
)
|
||||
yield* findFiles(ctx, project.rootFolder[0], prefix, true)
|
||||
for (const fileId of projectDeletedFiles) {
|
||||
ctx.remainingQueueEntries++
|
||||
yield { ctx, cacheKey: fileId, fileId, path: '' }
|
||||
|
@ -860,6 +860,12 @@ function* findFileInBatch(
|
|||
hash: blob.getHash(),
|
||||
}
|
||||
}
|
||||
try {
|
||||
yield* findFiles(ctx, project.rootFolder[0], prefix, true)
|
||||
} catch (err) {
|
||||
STATS.badFileTrees++
|
||||
logger.error({ err, projectId: projectIdS }, 'bad file-tree')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -104,12 +104,14 @@ describe('back_fill_file_hash script', function () {
|
|||
const projectIdHardDeleted = objectIdFromTime('2017-01-01T00:08:00Z')
|
||||
const projectIdNoOverleaf = objectIdFromTime('2017-01-01T00:09:00Z')
|
||||
const projectIdNoOverleafDeleted = objectIdFromTime('2017-01-01T00:10:00Z')
|
||||
const projectIdBadFileTree = objectIdFromTime('2024-01-01T00:11:00Z')
|
||||
const historyId0 = 42 // stored as number is mongo
|
||||
const historyId1 = projectId1.toString()
|
||||
const historyId2 = projectId2.toString()
|
||||
const historyId3 = projectId3.toString()
|
||||
const historyIdDeleted0 = projectIdDeleted0.toString()
|
||||
const historyIdDeleted1 = projectIdDeleted1.toString()
|
||||
const historyIdBadFileTree = projectIdBadFileTree.toString()
|
||||
const fileId0 = objectIdFromTime('2017-02-01T00:00:00Z')
|
||||
const fileId1 = objectIdFromTime('2017-02-01T00:01:00Z')
|
||||
const fileId2 = objectIdFromTime('2017-02-01T00:02:00Z')
|
||||
|
@ -129,6 +131,8 @@ describe('back_fill_file_hash script', function () {
|
|||
const hashTextBlob1 = gitBlobHashBuffer(contentTextBlob1)
|
||||
const contentTextBlob2 = Buffer.from('Hello 2')
|
||||
const hashTextBlob2 = gitBlobHashBuffer(contentTextBlob2)
|
||||
const contentTextBlob3 = Buffer.from('Hello 3')
|
||||
const hashTextBlob3 = gitBlobHashBuffer(contentTextBlob3)
|
||||
const deleteProjectsRecordId0 = new ObjectId()
|
||||
const deleteProjectsRecordId1 = new ObjectId()
|
||||
const deleteProjectsRecordId2 = new ObjectId()
|
||||
|
@ -182,6 +186,12 @@ describe('back_fill_file_hash script', function () {
|
|||
},
|
||||
// { historyId: historyIdDeleted0, fileId:fileIdDeleted3 }, // fileIdDeleted3 is dupe of fileIdDeleted2
|
||||
// { historyId: historyIdDeleted0, fileId: fileIdDeleted4 }, // already has hash
|
||||
{
|
||||
projectId: projectIdBadFileTree,
|
||||
historyId: historyIdBadFileTree,
|
||||
hash: hashTextBlob3,
|
||||
content: contentTextBlob3,
|
||||
},
|
||||
]
|
||||
if (PRINT_IDS_AND_HASHES_FOR_DEBUGGING) {
|
||||
const fileIds = {
|
||||
|
@ -304,6 +314,11 @@ describe('back_fill_file_hash script', function () {
|
|||
_id: projectIdNoOverleaf,
|
||||
rootFolder: [{ fileRefs: [], folders: [] }],
|
||||
},
|
||||
{
|
||||
_id: projectIdBadFileTree,
|
||||
rootFolder: [],
|
||||
overleaf: { history: { id: historyIdBadFileTree } },
|
||||
},
|
||||
])
|
||||
await deletedProjectsCollection.insertMany([
|
||||
{
|
||||
|
@ -397,6 +412,7 @@ describe('back_fill_file_hash script', function () {
|
|||
await testProjects.createEmptyProject(historyId3)
|
||||
await testProjects.createEmptyProject(historyIdDeleted0)
|
||||
await testProjects.createEmptyProject(historyIdDeleted1)
|
||||
await testProjects.createEmptyProject(historyIdBadFileTree)
|
||||
|
||||
const blobStore0 = new BlobStore(historyId0.toString())
|
||||
await blobStore0.putString(contentTextBlob0.toString())
|
||||
|
@ -404,6 +420,8 @@ describe('back_fill_file_hash script', function () {
|
|||
await blobStore1.putString(contentTextBlob1.toString())
|
||||
const blobStore2 = new BlobStore(historyId2.toString())
|
||||
await blobStore2.putString(contentTextBlob2.toString())
|
||||
const blobStoreBadFileTree = new BlobStore(historyIdBadFileTree.toString())
|
||||
await blobStoreBadFileTree.putString(contentTextBlob3.toString())
|
||||
})
|
||||
|
||||
beforeEach('populate filestore', async function () {
|
||||
|
@ -649,6 +667,11 @@ describe('back_fill_file_hash script', function () {
|
|||
_id: projectIdNoOverleaf,
|
||||
rootFolder: [{ fileRefs: [], folders: [] }],
|
||||
},
|
||||
{
|
||||
_id: projectIdBadFileTree,
|
||||
rootFolder: [],
|
||||
overleaf: { history: { id: historyIdBadFileTree } },
|
||||
},
|
||||
])
|
||||
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
|
||||
{
|
||||
|
@ -802,6 +825,10 @@ describe('back_fill_file_hash script', function () {
|
|||
_id: projectId3,
|
||||
blobs: [binaryForGitBlobHash(gitBlobHash(fileId3))].sort(),
|
||||
},
|
||||
{
|
||||
_id: projectIdBadFileTree,
|
||||
blobs: [binaryForGitBlobHash(hashTextBlob3)].sort(),
|
||||
},
|
||||
])
|
||||
})
|
||||
it('should process nothing on re-run', async function () {
|
||||
|
@ -809,9 +836,10 @@ describe('back_fill_file_hash script', function () {
|
|||
expect(rerun.stats).deep.equal({
|
||||
...STATS_ALL_ZERO,
|
||||
// We still need to iterate over all the projects and blobs.
|
||||
projects: 6,
|
||||
blobs: 11,
|
||||
backedUpBlobs: 11,
|
||||
projects: 7,
|
||||
blobs: 12,
|
||||
backedUpBlobs: 12,
|
||||
badFileTrees: 1,
|
||||
})
|
||||
})
|
||||
it('should have backed up all the files', async function () {
|
||||
|
@ -904,6 +932,7 @@ describe('back_fill_file_hash script', function () {
|
|||
projectDeleted: 0,
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 0,
|
||||
mongoUpdates: 0,
|
||||
deduplicatedWriteToAWSLocalCount: 0,
|
||||
deduplicatedWriteToAWSLocalEgress: 0,
|
||||
|
@ -930,6 +959,7 @@ describe('back_fill_file_hash script', function () {
|
|||
projectDeleted: 0,
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
badFileTrees: 0,
|
||||
mongoUpdates: 6,
|
||||
deduplicatedWriteToAWSLocalCount: 0,
|
||||
deduplicatedWriteToAWSLocalEgress: 0,
|
||||
|
@ -943,8 +973,8 @@ describe('back_fill_file_hash script', function () {
|
|||
writeToGCSEgress: 4000096,
|
||||
}
|
||||
const STATS_UP_FROM_PROJECT1_ONWARD = {
|
||||
projects: 4,
|
||||
blobs: 1,
|
||||
projects: 5,
|
||||
blobs: 2,
|
||||
backedUpBlobs: 0,
|
||||
filesWithoutHash: 4,
|
||||
filesDuplicated: 0,
|
||||
|
@ -956,15 +986,16 @@ describe('back_fill_file_hash script', function () {
|
|||
projectDeleted: 0,
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
mongoUpdates: 7,
|
||||
badFileTrees: 1,
|
||||
mongoUpdates: 8,
|
||||
deduplicatedWriteToAWSLocalCount: 1,
|
||||
deduplicatedWriteToAWSLocalEgress: 30,
|
||||
deduplicatedWriteToAWSRemoteCount: 0,
|
||||
deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 5,
|
||||
readFromGCSIngress: 103,
|
||||
writeToAWSCount: 4,
|
||||
writeToAWSEgress: 115,
|
||||
readFromGCSCount: 6,
|
||||
readFromGCSIngress: 110,
|
||||
writeToAWSCount: 5,
|
||||
writeToAWSEgress: 142,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 72,
|
||||
}
|
||||
|
@ -1058,6 +1089,15 @@ describe('back_fill_file_hash script', function () {
|
|||
it('should print stats', function () {
|
||||
expect(output.stats).deep.equal(STATS_ALL)
|
||||
})
|
||||
it('should have logged the bad file-tree', function () {
|
||||
const line = output.result.stdout
|
||||
.split('\n')
|
||||
.find(l => l.includes('bad file-tree'))
|
||||
expect(line).to.exist
|
||||
expect(JSON.parse(line)).to.include({
|
||||
projectId: projectIdBadFileTree.toString(),
|
||||
})
|
||||
})
|
||||
commonAssertions()
|
||||
})
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue