use docHistoryStats to drive the packing script

This commit is contained in:
Brian Gough 2015-06-03 10:36:31 +01:00
parent 27a3511b37
commit 8668bb9be1

View file

@ -10,6 +10,7 @@ PackManager = require "./app/coffee/PackManager.coffee"
lineReader = require "line-reader"
cli = require "cli"
options = cli.parse {
'update': ['u', 'find documents to pack from database']
'dry-run': ['n', 'do not write to database'],
'fast': [false, 'no delays on writes']
}
@ -21,10 +22,16 @@ packDocHistory = (doc_id, callback) ->
util.log "starting pack operation for #{doc_id}"
getDocHistory doc_id, (err, docs) ->
return callback(err) if err?
origDocs = docs.length
origDocs = 0
origPacks = 0
for d in docs
if d.pack? then origPacks++ else origDocs++
PackManager.convertDocsToPacks docs, (err, packs) ->
return callback(err) if err?
util.log "docs #{origDocs} packs #{packs.length}"
total = 0
for p in packs
total = total + p.pack.length
util.log "docs #{origDocs} packs #{origPacks} => packs #{packs.length} of #{total} ops"
if packs.length
if options['dry-run']
util.log 'dry-run, skipping write packs'
@ -76,34 +83,52 @@ readFile = (file, callback) ->
.then () ->
callback(null, ids)
todoFile = cli.args[1]
doneFile = cli.args[2]
util.log "reading from #{todoFile}"
util.log "logging progress to #{doneFile}"
fs.appendFileSync doneFile, '# starting pack run at ' + new Date() + '\n'
shutdownRequested = false
process.on 'SIGINT', () ->
util.log "Gracefully shutting down from SIGINT"
shutdownRequested = true
readFile todoFile, (err, todo) ->
readFile doneFile, (err, done) ->
pending = _.difference todo, done
async.eachSeries pending, (doc_id, callback) ->
packDocHistory doc_id, (err, result) ->
if err?
console.log "ERROR:", err, result
return callback(err)
else if not options['dry-run']
fs.appendFileSync doneFile, doc_id + '\n'
if shutdownRequested
return callback('shutdown')
setTimeout () ->
callback(err, result)
, DOCUMENT_PACK_DELAY
, (err, results) ->
processUpdates = (pending) ->
async.eachSeries pending, (doc_id, callback) ->
packDocHistory doc_id, (err, result) ->
if err?
console.log 'error:', err
util.log 'closing db'
console.log "ERROR:", err, result
return callback(err)
else if not options['dry-run'] && doneFile?
fs.appendFileSync doneFile, doc_id + '\n'
if shutdownRequested
return callback('shutdown')
setTimeout () ->
callback(err, result)
, DOCUMENT_PACK_DELAY
, (err, results) ->
if err?
console.log 'error:', err
util.log 'closing db'
db.close()
if options['update']
util.log 'checking for updates'
db.docHistoryStats.find({
update_count: {$gt : PackManager.MIN_COUNT}
}).sort({
update_count:-1
}).limit 1000, (error, results) ->
if err?
utils.log 'error', error
db.close()
return
util.log "found #{results.length} documents to pack"
pending = _.pluck results, 'doc_id'
processUpdates pending
else
todoFile = cli.args[1]
doneFile = cli.args[2]
util.log "reading from #{todoFile}"
util.log "logging progress to #{doneFile}"
fs.appendFileSync doneFile, '# starting pack run at ' + new Date() + '\n'
readFile todoFile, (err, todo) ->
readFile doneFile, (err, done) ->
pending = _.difference todo, done
processUpdates pending