PR27711

Message ID CAJXA7qh9xksDf7qzf19GKN5QFGcqYO3FqWbU28WBL=E3_u6ugQ@mail.gmail.com
State Superseded
Headers
Series PR27711 |

Commit Message

Noah Sanci July 1, 2021, 8:38 p.m. UTC
  }
@@ -293,7 +294,7 @@ kill -USR1 $PID1
 wait_ready $PORT1 'thread_work_total{role="traverse"}' 3
 wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
 wait_ready $PORT1 'thread_busy{role="scan"}' 0
-
+cp $DB $DB.backup
 # Rerun same tests for the prog2 binary
 filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v
debuginfo $BUILDID2 2>vlog`
 cmp $filename F/prog2
@@ -705,4 +706,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/"
 filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find
source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c`
 cmp $filename ${local_dir}/main.c

-exit 0
+########################################################################
+## PR27711
+# Test to ensure the -A removes files from the index using a given regex
+while true; do
+    PORT3=`expr '(' $RANDOM % 1000 ')' + 9000`
+    ss -atn | fgrep ":$PORT3" || break
+done
+env LD_LIBRARY_PATH=$ldpath
DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/"
${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0
--regex-groom --include="^$" --exclude=".*"  -d $DB.backup >
vlog$PORT3 2>&1 &
+PID4=$!
+wait_ready $PORT3 'ready' 1
+tempfiles vlog$PORT3
+errfiles vlog$PORT3
+
+kill -USR2 $PID4
+wait_ready $PORT3 'thread_work_total{role="groom"}' 1
+wait_ready $PORT3 'groom{statistic="archive d/e"}'  0
+wait_ready $PORT3 'groom{statistic="archive sdef"}' 0
+wait_ready $PORT3 'groom{statistic="archive sref"}' 0
+wait_ready $PORT3 'groom{statistic="buildids"}' 0
+wait_ready $PORT3 'groom{statistic="file d/e"}' 0
+wait_ready $PORT3 'groom{statistic="file s"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0
+
+kill $PID4
+exit 0;
  

Comments

Mark Wielaard July 7, 2021, 3:28 p.m. UTC | #1
Hi Noah,

If possible please add a commit message and sent the patch using git
send-email or git format-patch. Feel free to use the description of 
https://sourceware.org/bugzilla/show_bug.cgi?id=27711#c0 that Frank
wrote as commit message. It clearly explains the intention.

On Thu, 2021-07-01 at 16:38 -0400, Noah Sanci via Elfutils-devel wrote:
> diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
> index 286c910a..5afbafcd 100644
> --- a/debuginfod/ChangeLog
> +++ b/debuginfod/ChangeLog
> @@ -1,3 +1,8 @@
> +2021-07-01  Noah Sanci <nsanci@redhat.com>
> +       PR27711
> +       * debuginfod.cxx (groom): Allowed the use of regexes during the
> +       grooming process.

Slightly better would be to describe the changes. e.g.

  (options): Add --regex-groon, -r option.
  (regex_groom): New static bool defaults to false.
  (parse_opt): Handle 'r' option by setting regex_groom to true.
  (groom): Introduce and use reg_include and reg_exclude.

>  2021-06-03  Frank Ch. Eigler <fche@redhat.com>
> 
>         PR27863
> diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
> index 543044c6..4f7fd2d5 100644
> --- a/debuginfod/debuginfod.cxx
> +++ b/debuginfod/debuginfod.cxx
> @@ -360,6 +360,7 @@ static const struct argp_option options[] =
>     { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
>     { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
>     { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
> +   { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X
> arguments to groom the database.",0},
>  #define ARGP_KEY_FDCACHE_FDS 0x1001
>     { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number
> of archive files to keep in fdcache.", 0 },
>  #define ARGP_KEY_FDCACHE_MBS 0x1002
> @@ -407,6 +408,7 @@ static map<string,string> scan_archives;
>  static vector<string> extra_ddl;
>  static regex_t file_include_regex;
>  static regex_t file_exclude_regex;
> +static bool regex_groom = false;
>  static bool traverse_logical;
>  static long fdcache_fds;
>  static long fdcache_mbs;
> @@ -527,6 +529,9 @@ parse_opt (int key, char *arg,
>        if (rc != 0)
>          argp_failure(state, 1, EINVAL, "regular expression");
>        break;
> +    case 'r':
> +      regex_groom = true;
> +      break;
>      case ARGP_KEY_FDCACHE_FDS:
>        fdcache_fds = atol (arg);
>        break;
> @@ -3249,8 +3254,11 @@ void groom()
>        int64_t fileid = sqlite3_column_int64 (files, 1);
>        const char* filename = ((const char*) sqlite3_column_text
> (files, 2) ?: "");
>        struct stat s;
> +      bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
> +      bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
> +
>        rc = stat(filename, &s);
> -      if (rc < 0 || (mtime != (int64_t) s.st_mtime))
> +      if ( (regex_groom && reg_exclude && !reg_include) ||  rc < 0 ||
> (mtime != (int64_t) s.st_mtime) )
>          {

OK, so we groom the file as before
rc < 0 || (mtime != (int64_t) s.st_mtime) )
But also (if -r is given) if the file matches the exclude regexp, but
not the include one.

So if I read this right, an exclude regexp match means groom that file,
but an include rexexp match means, don't groom (except if it
disappeared on itself).

>            if (verbose > 2)
>              obatched(clog) << "groom: forgetting file=" << filename
> << " mtime=" << mtime << endl;
> @@ -3261,7 +3269,6 @@ void groom()
>          }
>        else
>          inc_metric("groomed_total", "decision", "fresh");
> -
>        if (sigusr1 != forced_rescan_count) // stop early if scan triggered
>          break;
>      }

Spurious line removed?

> diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
> index 1ba42cf6..1adf703a 100644
> --- a/doc/debuginfod.8
> +++ b/doc/debuginfod.8
> @@ -159,6 +159,9 @@ scan, independent of the rescan time (including if
> it was zero),
>  interrupting a groom pass (if any).
> 
>  .TP
> +.B "\-r"
> +Apply the -I and -X during groom cycles, so that files excluded by
> the regexes are removed from the index. These parameters are in
> addition to what normally qualifies a file for grooming, not a
> replacement.
> +

OK. That matches my reading of the code. Good.

>  .B "\-g SECONDS" "\-\-groom\-time=SECONDS"
>  Set the groom time for the index database.  This is the amount of time
>  the grooming thread will wait after finishing a grooming pass before
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index d8fa97fa..346b9e6e 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,8 @@
> +2021-07-01  Noah Sanci <nsanci@redhat.com>
> +       PR2711
> +       * run-debuginfod-find.sh: Added test case for grooming the database
> +       using regexes.
> +
>  2021-06-16  Frank Ch. Eigler <fche@redhat.com>
> 
>         * run-debuginfod-find.sh: Fix intermittent groom/stale failure,
> diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
> index 456dc2f8..bd78bf46 100755
> --- a/tests/run-debuginfod-find.sh
> +++ b/tests/run-debuginfod-find.sh
> @@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
>  PID1=0
>  PID2=0
>  PID3=0
> +PID4=0
> 
>  cleanup()
>  {
> -  if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi
> -  if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi
> -  if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi
> -
> +  if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi
> +  if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
> +  if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
> +  if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi
>    rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree
> ${PWD}/.client_cache* ${PWD}/tmp*
>    exit_cleanup
>  }
> @@ -293,7 +294,7 @@ kill -USR1 $PID1
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 3
>  wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
>  wait_ready $PORT1 'thread_busy{role="scan"}' 0
> -
> +cp $DB $DB.backup
>  # Rerun same tests for the prog2 binary
>  filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v
> debuginfo $BUILDID2 2>vlog`
>  cmp $filename F/prog2
> @@ -705,4 +706,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/"
>  filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find
> source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c`
>  cmp $filename ${local_dir}/main.c
> 
> -exit 0
> +########################################################################
> +## PR27711
> +# Test to ensure the -A removes files from the index using a given regex
> +while true; do
> +    PORT3=`expr '(' $RANDOM % 1000 ')' + 9000`
> +    ss -atn | fgrep ":$PORT3" || break
> +done
> +env LD_LIBRARY_PATH=$ldpath
> DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/"
> ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0
> --regex-groom --include="^$" --exclude=".*"  -d $DB.backup >
> vlog$PORT3 2>&1 &
> +PID4=$!
> +wait_ready $PORT3 'ready' 1
> +tempfiles vlog$PORT3
> +errfiles vlog$PORT3
> +
> +kill -USR2 $PID4
> +wait_ready $PORT3 'thread_work_total{role="groom"}' 1
> +wait_ready $PORT3 'groom{statistic="archive d/e"}'  0
> +wait_ready $PORT3 'groom{statistic="archive sdef"}' 0
> +wait_ready $PORT3 'groom{statistic="archive sref"}' 0
> +wait_ready $PORT3 'groom{statistic="buildids"}' 0
> +wait_ready $PORT3 'groom{statistic="file d/e"}' 0
> +wait_ready $PORT3 'groom{statistic="file s"}' 0
> +wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0
> +wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0
> +
> +kill $PID4
> +exit 0;

OK, so this checks that nothing ^$ gets included (kept) and everything
.* gets excluded (removed/groomed)?

Thanks,

Mark
  

Patch

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 286c910a..5afbafcd 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,8 @@ 
+2021-07-01  Noah Sanci <nsanci@redhat.com>
+       PR27711
+       * debuginfod.cxx (groom): Allowed the use of regexes during the
+       grooming process.
+
 2021-06-03  Frank Ch. Eigler <fche@redhat.com>

        PR27863
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 543044c6..4f7fd2d5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -360,6 +360,7 @@  static const struct argp_option options[] =
    { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
    { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
    { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
+   { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X
arguments to groom the database.",0},
 #define ARGP_KEY_FDCACHE_FDS 0x1001
    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number
of archive files to keep in fdcache.", 0 },
 #define ARGP_KEY_FDCACHE_MBS 0x1002
@@ -407,6 +408,7 @@  static map<string,string> scan_archives;
 static vector<string> extra_ddl;
 static regex_t file_include_regex;
 static regex_t file_exclude_regex;
+static bool regex_groom = false;
 static bool traverse_logical;
 static long fdcache_fds;
 static long fdcache_mbs;
@@ -527,6 +529,9 @@  parse_opt (int key, char *arg,
       if (rc != 0)
         argp_failure(state, 1, EINVAL, "regular expression");
       break;
+    case 'r':
+      regex_groom = true;
+      break;
     case ARGP_KEY_FDCACHE_FDS:
       fdcache_fds = atol (arg);
       break;
@@ -3249,8 +3254,11 @@  void groom()
       int64_t fileid = sqlite3_column_int64 (files, 1);
       const char* filename = ((const char*) sqlite3_column_text
(files, 2) ?: "");
       struct stat s;
+      bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
+      bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
+
       rc = stat(filename, &s);
-      if (rc < 0 || (mtime != (int64_t) s.st_mtime))
+      if ( (regex_groom && reg_exclude && !reg_include) ||  rc < 0 ||
(mtime != (int64_t) s.st_mtime) )
         {
           if (verbose > 2)
             obatched(clog) << "groom: forgetting file=" << filename
<< " mtime=" << mtime << endl;
@@ -3261,7 +3269,6 @@  void groom()
         }
       else
         inc_metric("groomed_total", "decision", "fresh");
-
       if (sigusr1 != forced_rescan_count) // stop early if scan triggered
         break;
     }
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 1ba42cf6..1adf703a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -159,6 +159,9 @@  scan, independent of the rescan time (including if
it was zero),
 interrupting a groom pass (if any).

 .TP
+.B "\-r"
+Apply the -I and -X during groom cycles, so that files excluded by
the regexes are removed from the index. These parameters are in
addition to what normally qualifies a file for grooming, not a
replacement.
+
 .B "\-g SECONDS" "\-\-groom\-time=SECONDS"
 Set the groom time for the index database.  This is the amount of time
 the grooming thread will wait after finishing a grooming pass before
diff --git a/tests/ChangeLog b/tests/ChangeLog
index d8fa97fa..346b9e6e 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@ 
+2021-07-01  Noah Sanci <nsanci@redhat.com>
+       PR2711
+       * run-debuginfod-find.sh: Added test case for grooming the database
+       using regexes.
+
 2021-06-16  Frank Ch. Eigler <fche@redhat.com>

        * run-debuginfod-find.sh: Fix intermittent groom/stale failure,
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 456dc2f8..bd78bf46 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -36,13 +36,14 @@  export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
 PID1=0
 PID2=0
 PID3=0
+PID4=0

 cleanup()
 {
-  if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi
-  if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi
-  if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi
-
+  if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi
+  if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
+  if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
+  if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi
   rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree
${PWD}/.client_cache* ${PWD}/tmp*
   exit_cleanup