Merge pull request #64 from overleaf/sk-skip-failing-projects-in-swap-job

Skip failing projects in swap job, and give up after a certain number
This commit is contained in:
Shane Kilkelly 2019-05-29 10:33:06 +01:00 committed by GitHub
commit b6fc48645a

View file

@ -14,6 +14,7 @@ import java.io.InputStream;
import java.sql.Timestamp; import java.sql.Timestamp;
import java.time.Duration; import java.time.Duration;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Timer; import java.util.Timer;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -105,6 +106,8 @@ public class SwapJobImpl implements SwapJob {
} }
private void doSwap_() { private void doSwap_() {
ArrayList<String> exceptionProjectNames = new ArrayList<String>();
Log.info("Running swap number {}", swaps.get() + 1); Log.info("Running swap number {}", swaps.get() + 1);
long totalSize = repoStore.totalSize(); long totalSize = repoStore.totalSize();
Log.info("Size is {}/{} (high)", totalSize, highWatermarkBytes); Log.info("Size is {}/{} (high)", totalSize, highWatermarkBytes);
@ -114,15 +117,40 @@ public class SwapJobImpl implements SwapJob {
return; return;
} }
int numProjects = dbStore.getNumProjects(); int numProjects = dbStore.getNumProjects();
// while we have too many projects on disk
while ( while (
(totalSize = repoStore.totalSize()) > lowWatermarkBytes && (totalSize = repoStore.totalSize()) > lowWatermarkBytes &&
(numProjects = dbStore.getNumUnswappedProjects()) > minProjects (numProjects = dbStore.getNumUnswappedProjects()) > minProjects
) { ) {
// check if we've had too many exceptions so far
if (exceptionProjectNames.size() >= 20) {
StringBuilder sb = new StringBuilder();
for (String s: exceptionProjectNames) {
sb.append(s);
sb.append(' ');
}
Log.error(
"Too many exceptions while running swap, giving up on this run: {}",
sb.toString()
);
break;
}
// get the oldest project and try to swap it
String projectName = dbStore.getOldestUnswappedProject(); String projectName = dbStore.getOldestUnswappedProject();
try { try {
evict(projectName); evict(projectName);
} catch (IOException e) { } catch (Throwable t) {
Log.warn("[{}] Exception while swapping, giving up", projectName, e); Log.warn("[{}] Exception while swapping, mark project and move on", projectName, t);
// NOTE: this is something of a hack. If a project fails to swap we get stuck in a
// loop where `dbStore.getOldestUnswappedProject()` gives the same failing project over and over again,
// which fills up the disk with errors. By touching the access time we can mark the project as a
// non-candidate for swapping. Ideally we should be checking the logs for these log events and fixing
// whatever is wrong with the project
dbStore.setLastAccessedTime(
projectName,
Timestamp.valueOf(LocalDateTime.now())
);
exceptionProjectNames.add(projectName);
} }
} }
if (totalSize > lowWatermarkBytes) { if (totalSize > lowWatermarkBytes) {