mirror of
https://github.com/overleaf/overleaf.git
synced 2025-03-22 02:04:31 +00:00
Merge pull request #22017 from overleaf/jpa-process-all-projects
[history-v1] back_fill_file_hash: process all projects GitOrigin-RevId: 41c3cd59022bbac09552684ef2a99c58f2235ac3
This commit is contained in:
parent
1fca37af61
commit
2e630e50dc
2 changed files with 19 additions and 21 deletions
|
@ -84,17 +84,11 @@ ObjectId.cacheHexString = true
|
|||
|
||||
const COLLECT_BLOBS = process.argv.includes('blobs')
|
||||
|
||||
// Time of closing the ticket for adding hashes: https://github.com/overleaf/internal/issues/464#issuecomment-492668129
|
||||
const ALL_PROJECTS_HAVE_FILE_HASHES_AFTER = new Date('2019-05-15T14:02:00Z')
|
||||
const PUBLIC_LAUNCH_DATE = new Date('2012-01-01T00:00:00Z')
|
||||
const BATCH_RANGE_START =
|
||||
process.env.BATCH_RANGE_START ||
|
||||
ObjectId.createFromTime(PUBLIC_LAUNCH_DATE.getTime() / 1000).toString()
|
||||
const BATCH_RANGE_END =
|
||||
process.env.BATCH_RANGE_END ||
|
||||
ObjectId.createFromTime(
|
||||
ALL_PROJECTS_HAVE_FILE_HASHES_AFTER.getTime() / 1000
|
||||
).toString()
|
||||
const BATCH_RANGE_END = process.env.BATCH_RANGE_END || new ObjectId().toString()
|
||||
// We need to control the start and end as ids of deleted projects are created at time of deletion.
|
||||
delete process.env.BATCH_RANGE_START
|
||||
delete process.env.BATCH_RANGE_END
|
||||
|
|
|
@ -168,7 +168,7 @@ describe('back_fill_file_hash script', function () {
|
|||
{ projectId: projectId1, historyId: historyId1, fileId: fileId1 },
|
||||
{ projectId: projectId1, historyId: historyId1, fileId: fileIdDeleted1 },
|
||||
// { historyId: historyId2, fileId: fileId2 }, // already has hash
|
||||
// { historyId: historyId3, fileId: fileId3 }, // too new
|
||||
{ projectId: projectId3, historyId: historyId3, fileId: fileId3 },
|
||||
{
|
||||
projectId: projectIdDeleted0,
|
||||
historyId: historyIdDeleted0,
|
||||
|
@ -284,7 +284,7 @@ describe('back_fill_file_hash script', function () {
|
|||
fileRefs: [],
|
||||
folders: [
|
||||
{
|
||||
fileRefs: [{ _id: fileId3, hash: gitBlobHash(fileId3) }],
|
||||
fileRefs: [{ _id: fileId3 }],
|
||||
folders: [],
|
||||
},
|
||||
],
|
||||
|
@ -782,6 +782,10 @@ describe('back_fill_file_hash script', function () {
|
|||
binaryForGitBlobHash(gitBlobHash(fileIdDeleted2)),
|
||||
].sort(),
|
||||
},
|
||||
{
|
||||
_id: projectId3,
|
||||
blobs: [binaryForGitBlobHash(gitBlobHash(fileId3))].sort(),
|
||||
},
|
||||
])
|
||||
})
|
||||
it('should process nothing on re-run', async function () {
|
||||
|
@ -789,9 +793,9 @@ describe('back_fill_file_hash script', function () {
|
|||
expect(rerun.stats).deep.equal({
|
||||
...STATS_ALL_ZERO,
|
||||
// We still need to iterate over all the projects and blobs.
|
||||
projects: 4,
|
||||
blobs: 10,
|
||||
backedUpBlobs: 10,
|
||||
projects: 6,
|
||||
blobs: 11,
|
||||
backedUpBlobs: 11,
|
||||
})
|
||||
})
|
||||
it('should have backed up all the files', async function () {
|
||||
|
@ -923,10 +927,10 @@ describe('back_fill_file_hash script', function () {
|
|||
writeToGCSEgress: 4000096,
|
||||
}
|
||||
const STATS_UP_FROM_PROJECT1_ONWARD = {
|
||||
projects: 2,
|
||||
projects: 4,
|
||||
blobs: 1,
|
||||
backedUpBlobs: 0,
|
||||
filesWithoutHash: 3,
|
||||
filesWithoutHash: 4,
|
||||
filesDuplicated: 0,
|
||||
filesRetries: 0,
|
||||
filesFailed: 0,
|
||||
|
@ -936,17 +940,17 @@ describe('back_fill_file_hash script', function () {
|
|||
projectDeleted: 0,
|
||||
projectHardDeleted: 0,
|
||||
fileHardDeleted: 0,
|
||||
mongoUpdates: 5,
|
||||
mongoUpdates: 7,
|
||||
deduplicatedWriteToAWSLocalCount: 1,
|
||||
deduplicatedWriteToAWSLocalEgress: 30,
|
||||
deduplicatedWriteToAWSRemoteCount: 0,
|
||||
deduplicatedWriteToAWSRemoteEgress: 0,
|
||||
readFromGCSCount: 4,
|
||||
readFromGCSIngress: 79,
|
||||
writeToAWSCount: 3,
|
||||
writeToAWSEgress: 85,
|
||||
writeToGCSCount: 2,
|
||||
writeToGCSEgress: 48,
|
||||
readFromGCSCount: 5,
|
||||
readFromGCSIngress: 103,
|
||||
writeToAWSCount: 4,
|
||||
writeToAWSEgress: 115,
|
||||
writeToGCSCount: 3,
|
||||
writeToGCSEgress: 72,
|
||||
}
|
||||
|
||||
function sumStats(a, b) {
|
||||
|
|
Loading…
Reference in a new issue