From 5dd40dc7cb1559aa847f74f4fef8855e5673e9f5 Mon Sep 17 00:00:00 2001 From: Henry Oswald Date: Tue, 14 Apr 2015 15:21:07 +0100 Subject: [PATCH] added migration 2 to clean up projects, removing the doc lines --- .../2_doc_lines_delete_from_project.coffee | 194 ++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 migrations/2_doc_lines_delete_from_project.coffee diff --git a/migrations/2_doc_lines_delete_from_project.coffee b/migrations/2_doc_lines_delete_from_project.coffee new file mode 100644 index 0000000000..49e7eb0918 --- /dev/null +++ b/migrations/2_doc_lines_delete_from_project.coffee @@ -0,0 +1,194 @@ +Settings = require "settings-sharelatex" +fs = require("fs") +mongojs = require("mongojs") +ObjectId = mongojs.ObjectId +console.log Settings.mongo.url +db = mongojs(Settings.mongo.url, ['projects', 'docs']) +_ = require("lodash") +async = require("async") +exec = require("child_process").exec + +finished_projects_path = "/home/sharelatex/finished-projects" +all_projects_path = "/home/sharelatex/all-projects" +unmigrated_docs_path = "/home/sharelatex/unmigrated" + +processedFiles = fs.readFileSync finished_projects_path + +printProgress = -> + exec "wc #{finished_projects_path}", (error, results) -> + setTimeout printProgress, 1000 * 30 + +checkIfFileHasBeenProccessed = (project_id, callback)-> + hasBeenProcessed = _.include processedFiles, project + callback null, hasBeenProcessed + # exec "grep #{project_id} #{finished_projects_path}", (error, results) -> + # hasBeenProcessed = _.include(results, project_id) + # callback(error, hasBeenProcessed) + +loadProjectIds = (callback)-> + console.log "loading project ids from #{all_projects_path}" + fs.readFile all_projects_path, "utf-8", (err, data)-> + ids = data.split("\n") + console.log "loaded #{ids.length} project ids from #{all_projects_path}" + callback err, ids + +getAndWriteProjectids = (callback)-> + console.log "finding all project id's - #{new Date().toString()}" + db.projects.find {}, {_id:1}, (err, ids)-> + console.log "total found projects in mongo #{ids.length} - #{new Date().toString()}" + ids = _.pluck ids, '_id' + ids = _.filter ids, (id)-> id? + fileData = ids.join("\n") + fs.writeFile all_projects_path, fileData, -> + callback(err, ids) + +markDocAsUnmigrated = (project_id, doc_id, callback)-> + console.log "#{project_id} #{doc_id} unmigrated" + markProjectAsProcessed project_id, (err)-> + fs.appendFile unmigrated_docs_path, "#{project_id} #{doc_id}\n", callback + +markUnmigratedDocs = (project_id, docs, callback)-> + console.log docs.length, project_id, "unmigrated" + jobs = _.map docs, (doc)-> + (cb)-> + markDocAsUnmigrated project_id, doc._id, cb + async.series jobs, callback + +getProjectIds = (callback)-> + exists = fs.existsSync all_projects_path + if exists + loadProjectIds callback + else + getAndWriteProjectids callback + +markProjectAsProcessed = (project_id, callback)-> + fs.appendFile finished_projects_path, "#{project_id}\n", callback + +getAllDocs = (project_id, callback = (error, docs) ->) -> + excludes = {} + for i in [0..12] + excludes["rootFolder#{Array(i).join(".folders")}.docs.lines"] = 0 + db.projects.findOne _id: ObjectId(project_id.toString()), excludes, (error, project) -> + return callback(error) if error? + if !project? + console.log "no such project #{project_id}" + return callback() + findAllDocsInProject project, (error, docs) -> + return callback(error) if error? + return callback null, docs, project + +findAllDocsInProject = (project, callback = (error, docs) ->) -> + callback null, _findAllDocsInFolder project.rootFolder[0] + +findDocInProject = (project, doc_id, callback = (error, doc, mongoPath) ->) -> + result = _findDocInFolder project.rootFolder[0], doc_id, "rootFolder.0" + if result? + callback null, result.doc, result.mongoPath + else + callback null, null, null + +_findDocInFolder = (folder = {}, doc_id, currentPath) -> + for doc, i in folder.docs or [] + if doc?._id? and doc._id.toString() == doc_id.toString() + return { + doc: doc + mongoPath: "#{currentPath}.docs.#{i}" + } + for childFolder, i in folder.folders or [] + result = _findDocInFolder childFolder, doc_id, "#{currentPath}.folders.#{i}" + return result if result? + + return null + +_findAllDocsInFolder = (folder = {}) -> + docs = folder.docs or [] + for childFolder in folder.folders or [] + docs = docs.concat _findAllDocsInFolder childFolder + return docs + +isDocInDocCollection = (doc, callback)-> + if !doc?._id? or doc._id.length == 0 + return callback(null, true) + db.docs.find({_id: ObjectId(doc._id+"")}, {_id: 1}).limit 1, (err, foundDocs)-> + exists = foundDocs.length > 0 + console.log doc._id, "Exits = "+exists + callback err, exists + +getWhichDocsCanBeDeleted = (docs, callback = (err, docsToBeDeleted, unmigratedDocs)->)-> + docsToBeDeleted = [] + unmigratedDocs = [] + + jobs = _.map docs, (doc)-> + return (cb)-> + isDocInDocCollection doc, (err, exists)-> + if exists + docsToBeDeleted.push doc + else + unmigratedDocs.push doc + cb(err) + async.series jobs, (err)-> + callback err, docsToBeDeleted, unmigratedDocs + +whipeDocLines = (project_id, mongoPath, callback)-> + update = + $unset: {} + update.$unset["#{mongoPath}.lines"] = "" + update.$unset["#{mongoPath}.rev"] = "" + console.log "project id = #{ObjectId(project_id+'')}", update + #callback() + db.projects.update _id: ObjectId(project_id+''), update, callback + + +removeDocLinesFromProject = (docs, project, callback)-> + jobs = _.map docs, (doc)-> + (cb)-> + findDocInProject project, doc._id, (err, doc, mongoPath)-> + whipeDocLines project._id, mongoPath, cb + async.parallelLimit jobs, 5, callback + +processNext = (project_id, callback)-> + if !project_id? or project_id.length == 0 + return callback() + checkIfFileHasBeenProccessed project_id, (err, hasBeenProcessed)-> + if hasBeenProcessed + console.log "#{project_id} already procssed, skipping" + return callback() + console.log "#{project_id} processing" + getAllDocs project_id, (err, docs, project)-> + if err? + console.error err, project_id, "could not get all docs" + return callback(err) + else + getWhichDocsCanBeDeleted docs, (err, docsToBeDeleted, unmigratedDocs)-> + if err? + console.error err, project_id, "could not save docs into mongo" + return callback(err) + markUnmigratedDocs project_id, unmigratedDocs, (err)-> + removeDocLinesFromProject docsToBeDeleted, project, (err)-> + if err? + return callback(err) + markProjectAsProcessed project_id, (err)-> + setTimeout( + -> callback(err) + ,5000) + +exports.migrate = (client, done = ->)-> + getProjectIds (err, ids)-> + printProgress() + jobs = _.map ids, (id)-> + return (cb)-> + processNext(id, cb) + async.series jobs, (err)-> + if err? + console.error err, "at end of jobs" + process.exit() + else + console.log "finished" + process.exit() + done(err) + + +exports.rollback = (next)-> + next() + +exports.migrate()