diff --git a/services/history-v1/storage/scripts/back_fill_file_hash.mjs b/services/history-v1/storage/scripts/back_fill_file_hash.mjs index 859c286eb5..15c9e8508e 100644 --- a/services/history-v1/storage/scripts/back_fill_file_hash.mjs +++ b/services/history-v1/storage/scripts/back_fill_file_hash.mjs @@ -84,17 +84,11 @@ ObjectId.cacheHexString = true const COLLECT_BLOBS = process.argv.includes('blobs') -// Time of closing the ticket for adding hashes: https://github.com/overleaf/internal/issues/464#issuecomment-492668129 -const ALL_PROJECTS_HAVE_FILE_HASHES_AFTER = new Date('2019-05-15T14:02:00Z') const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z') const BATCH_RANGE_START = process.env.BATCH_RANGE_START || ObjectId.createFromTime(PUBLIC_LAUNCH_DATE.getTime() / 1000).toString() -const BATCH_RANGE_END = - process.env.BATCH_RANGE_END || - ObjectId.createFromTime( - ALL_PROJECTS_HAVE_FILE_HASHES_AFTER.getTime() / 1000 - ).toString() +const BATCH_RANGE_END = process.env.BATCH_RANGE_END || new ObjectId().toString() // We need to control the start and end as ids of deleted projects are created at time of deletion. delete process.env.BATCH_RANGE_START delete process.env.BATCH_RANGE_END diff --git a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs index b7f05738ff..6a2a4046f7 100644 --- a/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs +++ b/services/history-v1/test/acceptance/js/storage/back_fill_file_hash.test.mjs @@ -168,7 +168,7 @@ describe('back_fill_file_hash script', function () { { projectId: projectId1, historyId: historyId1, fileId: fileId1 }, { projectId: projectId1, historyId: historyId1, fileId: fileIdDeleted1 }, // { historyId: historyId2, fileId: fileId2 }, // already has hash - // { historyId: historyId3, fileId: fileId3 }, // too new + { projectId: projectId3, historyId: historyId3, fileId: fileId3 }, { projectId: projectIdDeleted0, historyId: historyIdDeleted0, @@ -284,7 +284,7 @@ describe('back_fill_file_hash script', function () { fileRefs: [], folders: [ { - fileRefs: [{ _id: fileId3, hash: gitBlobHash(fileId3) }], + fileRefs: [{ _id: fileId3 }], folders: [], }, ], @@ -782,6 +782,10 @@ describe('back_fill_file_hash script', function () { binaryForGitBlobHash(gitBlobHash(fileIdDeleted2)), ].sort(), }, + { + _id: projectId3, + blobs: [binaryForGitBlobHash(gitBlobHash(fileId3))].sort(), + }, ]) }) it('should process nothing on re-run', async function () { @@ -789,9 +793,9 @@ describe('back_fill_file_hash script', function () { expect(rerun.stats).deep.equal({ ...STATS_ALL_ZERO, // We still need to iterate over all the projects and blobs. - projects: 4, - blobs: 10, - backedUpBlobs: 10, + projects: 6, + blobs: 11, + backedUpBlobs: 11, }) }) it('should have backed up all the files', async function () { @@ -923,10 +927,10 @@ describe('back_fill_file_hash script', function () { writeToGCSEgress: 4000096, } const STATS_UP_FROM_PROJECT1_ONWARD = { - projects: 2, + projects: 4, blobs: 1, backedUpBlobs: 0, - filesWithoutHash: 3, + filesWithoutHash: 4, filesDuplicated: 0, filesRetries: 0, filesFailed: 0, @@ -936,17 +940,17 @@ describe('back_fill_file_hash script', function () { projectDeleted: 0, projectHardDeleted: 0, fileHardDeleted: 0, - mongoUpdates: 5, + mongoUpdates: 7, deduplicatedWriteToAWSLocalCount: 1, deduplicatedWriteToAWSLocalEgress: 30, deduplicatedWriteToAWSRemoteCount: 0, deduplicatedWriteToAWSRemoteEgress: 0, - readFromGCSCount: 4, - readFromGCSIngress: 79, - writeToAWSCount: 3, - writeToAWSEgress: 85, - writeToGCSCount: 2, - writeToGCSEgress: 48, + readFromGCSCount: 5, + readFromGCSIngress: 103, + writeToAWSCount: 4, + writeToAWSEgress: 115, + writeToGCSCount: 3, + writeToGCSEgress: 72, } function sumStats(a, b) {