Hi Frank,
On Thu, 2021-11-04 at 13:21 -0400, Frank Ch. Eigler via Elfutils-devel wrote:
> commit 8ce18d0f8a5d7c00dc897f99751d575c8deeae82 (HEAD -> master)
> Author: Frank Ch. Eigler <fche@redhat.com>
> Date: Thu Nov 4 13:08:35 2021 -0400
>
> PR28514: debuginfod: limit groom operation times
>
> For large databases and many stale files, it was possible to starve
> rescan operations by numerous groom "nuke" (database delete ops).
> Under the theory that including new data is at least as important as
> aging old, we now impose a rough deadline on groom queries.
>
> In the process, we discovered that we were commiting some
> undefined-behaviour sqlite ops (deleting rows while iterating), which
> may explain some previous heisenbug occurrences. So the groom nuke
> operations are split into decision & action phases, with associated
> progress-tracking metrics.
>
> Testing the timeout facility requires hand-testing beyond the
> testsuite (since it requires LARGE databases to show measurable query
> times). So confirmed this part by hand.
Makes sense and looks good. Please push.
Thanks,
Mark
@@ -4,6 +4,8 @@ debuginfod-client: Default $DEBUGINFOD_URLS is computed from drop-in files
/etc/debuginfod/*.urls rather than hardcoded into the
/etc/profile.d/debuginfod* scripts.
+debuginfod: Limit the duration of groom ops roughly to rescan (-t) times.
+
Version 0.185
debuginfod-client: Simplify curl handle reuse so downloads which
@@ -1,3 +1,10 @@
+2021-11-04 Frank Ch. Eigler <fche@redhat.com>
+
+ PR28514
+ * debuginfod.cxx (groom): Rework into separate decision/action
+ phases. Add new metrics to monitor progress. Limit indefinite
+ operation times to avoid starving rescan.
+
2021-10-23 Frank Ch. Eigler <fche@redhat.com>
PR28240
@@ -3431,16 +3431,30 @@ void groom()
clock_gettime (CLOCK_MONOTONIC, &ts_start);
// scan for files that have disappeared
- sqlite_ps files (db, "check old files", "select s.mtime, s.file, f.name from "
- BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
- "where f.id = s.file");
- sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
- sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
- sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
- "where file = ? and mtime = ?");
+ sqlite_ps files (db, "check old files",
+ "select distinct s.mtime, s.file, f.name from "
+ BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
+ "where f.id = s.file");
+ // NB: Because _ftime_mtime_scanned can contain both F and
+ // R records for the same file, this query would return duplicates if the
+ // DISTINCT qualifier were not there.
files.reset();
+
+ // DECISION TIME - we enumerate stale fileids/mtimes
+ deque<pair<int64_t,int64_t> > stale_fileid_mtime;
+
+ time_t time_start = time(NULL);
while(1)
{
+ // PR28514: limit grooming iteration to O(rescan time), to avoid
+ // slow filesystem tests over many files locking out rescans for
+ // too long.
+ if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
+ {
+ inc_metric("groomed_total", "decision", "aborted");
+ break;
+ }
+
if (interrupted) break;
int rc = files.step();
@@ -3458,19 +3472,67 @@ void groom()
if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) )
{
if (verbose > 2)
- obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
- files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
- files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
- files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
+ stale_fileid_mtime.push_back(make_pair(fileid,mtime));
inc_metric("groomed_total", "decision", "stale");
+ set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
}
else
inc_metric("groomed_total", "decision", "fresh");
+
if (sigusr1 != forced_rescan_count) // stop early if scan triggered
break;
}
files.reset();
+ // ACTION TIME
+
+ // Now that we know which file/mtime tuples are stale, actually do
+ // the deletion from the database. Doing this during the SELECT
+ // iteration above results in undefined behaviour in sqlite, as per
+ // https://www.sqlite.org/isolation.html
+
+ // We could shuffle stale_fileid_mtime[] here. It'd let aborted
+ // sequences of nuke operations resume at random locations, instead
+ // of just starting over. But it doesn't matter much either way,
+ // as long as we make progress.
+
+ sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
+ sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
+ sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
+ "where file = ? and mtime = ?");
+
+ while (! stale_fileid_mtime.empty())
+ {
+ auto stale = stale_fileid_mtime.front();
+ stale_fileid_mtime.pop_front();
+ set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+
+ // PR28514: limit grooming iteration to O(rescan time), to avoid
+ // slow nuke_* queries over many files locking out rescans for too
+ // long. We iterate over the files in random() sequence to avoid
+ // partial checks going over the same set.
+ if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
+ {
+ inc_metric("groomed_total", "action", "aborted");
+ break;
+ }
+
+ if (interrupted) break;
+
+ int64_t fileid = stale.first;
+ int64_t mtime = stale.second;
+ files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ inc_metric("groomed_total", "action", "cleaned");
+
+ if (sigusr1 != forced_rescan_count) // stop early if scan triggered
+ break;
+ }
+ stale_fileid_mtime.clear(); // no need for this any longer
+ set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+
// delete buildids with no references in _r_de or _f_de tables;
// cascades to _r_sref & _f_s records
sqlite_ps buildids_del (db, "nuke orphan buildids",
@@ -1,3 +1,8 @@
+2021-11-04 Frank Ch. Eigler <fche@redhat.com>
+
+ PR28514
+ * run-debuginfod-archive-groom.sh: Look for new groom metric.
+
2021-10-23 Frank Ch. Eigler <fche@redhat.com>
PR28240
@@ -150,6 +150,8 @@ kill -USR2 $PID1 # groom cycle
wait_ready $PORT1 'thread_work_total{role="groom"}' 2
# Expect 4 rpms containing 2 buildids to be deleted by the groom
wait_ready $PORT1 'groomed_total{decision="stale"}' 4
+# Expect no more groom actions pending
+wait_ready $PORT1 'thread_work_pending{role="groom"}' 0
rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests