overleaf/migrations/1_move_doc_lines_to_doc_collection.coffee

165 lines
4.9 KiB
CoffeeScript
Raw Normal View History

2015-02-10 17:25:13 +00:00
Settings = require "settings-sharelatex"
bson = require('bson')
BSON = new bson()
2015-01-28 17:50:39 +00:00
fs = require("fs")
mongojs = require("mongojs")
ObjectId = mongojs.ObjectId
2015-02-20 15:40:11 +00:00
console.log Settings.mongo.url
2015-02-10 17:25:13 +00:00
db = mongojs(Settings.mongo.url, ['projects', 'docs'])
_ = require("lodash")
2015-01-28 17:50:39 +00:00
async = require("async")
exec = require("child_process").exec
2015-01-28 17:50:39 +00:00
finished_projects_path = "/tmp/finished-projects"
all_projects_path = "/tmp/all-projects"
2015-02-20 15:40:11 +00:00
project_too_large_path = "/tmp/large_projects"
2015-01-28 17:50:39 +00:00
printProgress = ->
exec "wc #{finished_projects_path}", (error, results) ->
setTimeout printProgress, 1000 * 30
checkIfFileHasBeenProccessed = (project_id, callback)->
exec "grep #{project_id} #{finished_projects_path}", (error, results) ->
hasBeenProcessed = _.include(results, project_id)
2015-02-10 17:25:13 +00:00
callback(error, hasBeenProcessed)
loadProjectIds = (callback)->
2015-02-10 17:25:13 +00:00
console.log "loading project ids from #{all_projects_path}"
fs.readFile all_projects_path, "utf-8", (err, data)->
ids = data.split("\n")
2016-06-30 13:52:45 +00:00
ids = _.filter ids, (id)-> id? and id.length == 24
2015-02-10 17:25:13 +00:00
console.log "loaded #{ids.length} project ids from #{all_projects_path}"
callback err, ids
getAndWriteProjectids = (callback)->
console.log "finding all project id's - #{new Date().toString()}"
db.projects.find {}, {_id:1}, (err, ids)->
console.log "total found projects in mongo #{ids.length} - #{new Date().toString()}"
ids = _.pluck ids, '_id'
ids = _.filter ids, (id)-> id?
fileData = ids.join("\n")
fs.writeFile all_projects_path, fileData, ->
callback(err, ids)
2015-02-20 15:40:11 +00:00
markProjectAsToLargeAndFinished = (project_id, callback)->
console.log "#{project_id} too large"
markProjectAsProcessed project_id, (err)->
fs.appendFile project_too_large_path, "#{project_id}\n", callback
getProjectIds = (callback)->
exists = fs.existsSync all_projects_path
2015-02-20 15:40:11 +00:00
if exists
loadProjectIds callback
else
getAndWriteProjectids callback
2015-01-28 17:50:39 +00:00
markProjectAsProcessed = (project_id, callback)->
fs.appendFile finished_projects_path, "#{project_id}\n", callback
2015-01-28 17:50:39 +00:00
getAllDocs = (project_id, callback = (error, docs) ->) ->
db.projects.findOne _id:ObjectId(project_id), (error, project) ->
return callback(error) if error?
if !project?
2015-02-20 15:40:11 +00:00
console.log "no such project #{project_id}"
return callback()
size = BSON.calculateObjectSize(project)
2015-02-20 15:40:11 +00:00
if size > 12000000 #12mb
return markProjectAsToLargeAndFinished project_id, callback
2015-01-28 17:50:39 +00:00
findAllDocsInProject project, (error, docs) ->
return callback(error) if error?
return callback null, docs
findAllDocsInProject = (project, callback = (error, docs) ->) ->
callback null, _findAllDocsInFolder project.rootFolder[0]
_findDocInFolder = (folder = {}, doc_id, currentPath) ->
for doc, i in folder.docs or []
if doc?._id? and doc._id.toString() == doc_id.toString()
return {
doc: doc
mongoPath: "#{currentPath}.docs.#{i}"
}
for childFolder, i in folder.folders or []
result = _findDocInFolder childFolder, doc_id, "#{currentPath}.folders.#{i}"
return result if result?
return null
_findAllDocsInFolder = (folder = {}) ->
docs = folder.docs or []
for childFolder in folder.folders or []
docs = docs.concat _findAllDocsInFolder childFolder
return docs
insertDocIntoDocCollection = (project_id, doc_id, lines, oldRev, callback)->
2015-02-10 17:25:13 +00:00
if !project_id?
return callback("no project id")
if !doc_id?
2015-02-20 15:40:11 +00:00
return callback()
2015-02-10 17:25:13 +00:00
if !lines?
2015-02-20 15:40:11 +00:00
lines = [""]
2015-01-28 17:50:39 +00:00
update = {}
2015-02-10 17:25:13 +00:00
update["_id"] = ObjectId(doc_id.toString())
2015-01-28 17:50:39 +00:00
update["lines"] = lines
update["project_id"] = ObjectId(project_id)
2015-02-10 17:25:13 +00:00
update["rev"] = oldRev || 0
db.docs.insert update, callback
2015-01-28 17:50:39 +00:00
saveDocsIntoMongo = (project_id, docs, callback)->
jobs = _.map docs, (doc)->
(cb)->
2015-02-10 17:25:13 +00:00
if !doc?
2015-02-20 15:40:11 +00:00
console.error "null doc in project #{project_id}" #just skip it, not a big deal
2015-02-10 17:25:13 +00:00
return cb()
insertDocIntoDocCollection project_id, doc._id, doc.lines, doc.rev, (err)->
if err?.code == 11000 #duplicate key, doc already in there so its not a problem.
err = undefined
2015-02-20 15:40:11 +00:00
if err?
2015-02-10 17:25:13 +00:00
console.log "error inserting doc into doc collection", err
cb(err)
2015-01-28 17:50:39 +00:00
async.series jobs, callback
processNext = (project_id, callback)->
checkIfFileHasBeenProccessed project_id, (err, hasBeenProcessed)->
if hasBeenProcessed
console.log "#{project_id} already procssed, skipping"
return callback()
2015-02-10 17:25:13 +00:00
console.log "#{project_id} processing"
getAllDocs project_id, (err, docs)->
if err?
console.error err, project_id, "could not get all docs"
2015-02-10 17:25:13 +00:00
return callback(err)
else
saveDocsIntoMongo project_id, docs, (err)->
if err?
console.error err, project_id, "could not save docs into mongo"
return callback(err)
markProjectAsProcessed project_id, (err)->
setTimeout(
-> callback(err)
2015-09-16 14:58:38 +00:00
,0)
2015-02-20 15:40:11 +00:00
2015-01-28 17:50:39 +00:00
2015-02-10 17:25:13 +00:00
2015-02-20 15:40:11 +00:00
exports.migrate = (client, done = ->)->
2015-02-10 17:25:13 +00:00
getProjectIds (err, ids)->
printProgress()
jobs = _.map ids, (id)->
return (cb)->
processNext(id, cb)
async.series jobs, (err)->
if err?
console.error err, "at end of jobs"
else
console.log "finished"
done(err)
2015-01-28 17:50:39 +00:00
2015-02-10 17:25:13 +00:00
exports.rollback = (next)->
2015-01-28 17:50:39 +00:00
next()