[Bug,debuginfod/28577] Make run-...-fd-prefetch-caches.sh test something

Message ID CAJXA7qhyJt+sGdp9zxGapxiOP95b+jz2cJC9NYBWyBw8SLHy5g@mail.gmail.com
State Committed
Headers
Series [Bug,debuginfod/28577] Make run-...-fd-prefetch-caches.sh test something |

Commit Message

Noah Sanci May 25, 2022, 6:44 p.m. UTC
  PR28577: Make run-debuginfod-fd-prefetch-caches.sh test
             something

Update to the run-debuginfod-fd-prefetch to make the test more sound.
  

Comments

Mark Wielaard May 27, 2022, 4:20 p.m. UTC | #1
Hi Noah,

On Wed, 2022-05-25 at 14:44 -0400, Noah Sanci via Elfutils-devel wrote:
> PR28577: Make run-debuginfod-fd-prefetch-caches.sh test
>              something
> 
> Update to the run-debuginfod-fd-prefetch to make the test more sound.

Thanks. And according to Evgeny this makes the test pass for him:
https://sourceware.org/bugzilla/show_bug.cgi?id=29180#c2

But I find it a little hard to review, it would have been helpful to
get a bit more comments/documentation what exactly is being tested now.

> diff --git a/tests/run-debuginfod-fd-prefetch-caches.sh b/tests/run-
> debuginfod-fd-prefetch-caches.sh
> index 66a7a083..50ffc4ac 100755
> --- a/tests/run-debuginfod-fd-prefetch-caches.sh
> +++ b/tests/run-debuginfod-fd-prefetch-caches.sh
> @@ -21,22 +21,22 @@
>  # for test case debugging, uncomment:
>  set -x
>  unset VALGRIND_CMD
> +mkdir R Z

OK, some boilerplate.

> -mkdir R
> -cp -rvp ${abs_srcdir}/debuginfod-rpms R
> -if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
> -    rm -vrf R/debuginfod-rpms/fedora31
> -fi

This gets moved to below.

> -FDCACHE_MBS=$((1/1024/1024))
> +FDCACHE_MBS=1
>  FDCACHE_FDS=1
> -PREFETCH_MBS=$((1/1024/1024))
> -PREFETCH_FDS=2
> +PREFETCH_MBS=1
> +PREFETCH_FDS=1
>  PREFETCH=1

OK, new presets.

>  # This variable is essential and ensures no time-race for claiming
> ports occurs
>  # set base to a unique multiple of 100 not used in any other 'run-
> debuginfod-*' test
>  base=8800
>  get_ports
> +cp -rvp ${abs_srcdir}/debuginfod-rpms R
> +if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
> +    rm -vrf R/debuginfod-rpms/fedora31
> +fi
> +cp -rvp ${abs_srcdir}/debuginfod-tars Z

OK, put rpms under R, tars under Z.
 
>  DB=${PWD}/.debuginfod_tmp.sqlite
>  tempfiles $DB
> @@ -47,88 +47,68 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
>  ######
>  rm -rf $DEBUGINFOD_CACHE_PATH
>  rm -rf $DB
> +
>  # Set mb values to ensure the caches certainly have enough space
>  # To store the test files
>  env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d $DB \
> -    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-fds=$PREFETCH_FDS --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R \
> -    --fdcache-mbs=50  --fdcache-prefetch-mbs=50 \
> -    --fdcache-prefetch=$PREFETCH  > vlog$PORT1 2>&1 &
> +    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-fds=$PREFETCH_FDS  -vvvvv -g 0 -t 0 -R R \
> +    -Z .tar.bz2=bzcat Z  --fdcache-mbs=50  --fdcache-prefetch-mbs=100 \
> +    --fdcache-prefetch=$PREFETCH -F > vlog$PORT1 2>&1 &

Now run with --fdcache-fds=1 --fdcache-mbs=50 and
-fdcache-prefetch-fds=1 fdcache-prefetch=1 -fdcache-prefetch-mbs=100

It is these last 3 we want to test.

>  PID1=$!
>  tempfiles vlog$PORT1
>  errfiles vlog$PORT1
> +
>  # Server must become ready
>  wait_ready $PORT1 'ready' 1
>  ########################################################################
> -cp -rvp ${abs_srcdir}/debuginfod-rpms R
> -if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
> -    rm -vrf R/debuginfod-rpms/fedora31
> -fi
>  kill -USR1 $PID1
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
> +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
> +wait_ready $PORT1 'thread_busy{role="scan"}' 0

OK, boilerplate to wait for initial scan.

>  # All rpms need to be in the index, except the dummy permission-000 one
>  rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
>  wait_ready $PORT1 'scanned_files_total{source=".rpm archive"}' $rpms
> -kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs
> -# Wait till both files are in the index and scan/index fully finished
> +kill -USR1 $PID1 
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
> -export DEBUGINFOD_URLS="http://127.0.0.1:"$PORT1
> +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
> +wait_ready $PORT1 'thread_busy{role="scan"}' 0

Do another scan and wait for it to be ready.

> -archive_hits=5
> -SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
> -for i in $archive_hits
> -do
> -  archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
> -done
> +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/
> +# load prefetch cache
> +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb

OK, which particular file is this?

>  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> -regex="fdcache_op_count\{op=\"enqueue\"\} ([0-9]+).*fdcache_op_count\{op=\"evict\"\} ([0-9]+).*"
> -enqueue=0
> -if [[ $metrics =~ $regex ]]
> -then
> -   enqueue=${BASH_REMATCH[1]}
> -   evict=${BASH_REMATCH[2]}
> +regex="fdcache_prefetch_count ([0-9])+"
> +# Check to see if prefetch cache is loaded
> +if [[ $metrics =~ $regex ]]; then
> +  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
> +    err
> +  fi

Why the -ne 1 ? What number are we expecting and why?

>  else
> -   err
> -fi
> -# This is an ad-hoc test than only works when FDCACHE_FDS < "archive_test"s above (5)
> -# otherwise there will be no eviction
> -if [[ $(( $enqueue-$FDCACHE_FDS )) -ne $evict ]]
> -then
> -   err
> -fi
> -# Test prefetch cache
> -archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
> +  err
> +fi 
> +# Ensure that searching for the source prefetched finds it in the prefetch cache
> +testrun ${abs_top_builddir}/debuginfod/debuginfod-find source cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb /usr/src/debug/hello.c
>  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> -regex="fdcache_prefetch_count ([0-9])+"
> -pf_count=0
> -for i in $FDCACHE_FDS
> -do
> -  if [[ $metrics =~ $regex ]]; then
> -
> -    if [[ $(( $pf_count+$PREFETCH )) -ne ${BASH_REMATCH[1]} ]]; then
> -      err
> -    else
> -      pf_count=${BASH_REMATCH[1]}
> -    fi
> -
> -  else
> +regex="fdcache_op_count\{op=\"prefetch_access\"\} ([0-9])+"
> +if [[ $metrics =~ $regex ]]; then
> +  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
>      err
>    fi
> -done
> +else
> +  err
> +fi

Again -ne 1? Shouldn't it be different from the above now?
 
>  kill $PID1
>  wait $PID1
>  PID1=0
>  
> -
>  #########
> -# Test mb limit on fd and prefetch cache
> +# Test mb limit on fd cache
>  #########
> -
>  rm -rf $DEBUGINFOD_CACHE_PATH
>  rm -rf $DB
>  env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d $DB \
> -    --fdcache-mbs=1 --fdcache-prefetch-mbs=1 --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R \
> -    > vlog2$PORT1 2>&1 &
> +    --fdcache-mbs=$FDCACHE_MBS --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R > vlog2$PORT1 2>&1 &
>  PID1=$!

OK start a fresh debuginfod with -fdcache-mbs=1 --fdcache-mintmp 0
Is the fdcache-mintmp 0 meant to clear the cache immediately?

>  tempfiles vlog2$PORT1
>  errfiles vlog2$PORT1
> @@ -139,26 +119,26 @@ wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
>  # All rpms need to be in the index, except the dummy permission-000 one
>  rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
>  wait_ready $PORT1 'scanned_files_total{source=".rpm archive"}' $rpms
> -kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs
> -# Wait till both files are in the index and scan/index fully finished
> +kill -USR1 $PID1
>  wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
> +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
> +wait_ready $PORT1 'thread_busy{role="scan"}' 0

OK, boilerplate again for triggering and waiting for a new scan to
complete.

> -archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
> +# This many archives cause the fd cache to be loaded with just over 1mb of
> +# files. So we expect the 1mb cap off
> +SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
>  archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
>  archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
>  archive_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
>  archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
>  archive_test f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2 /usr/src/debug/hello-1.0/hello.c $SHA
>  archive_test bbbf92ebee5228310e398609c23c2d7d53f6e2f9 /usr/src/debug/hello-1.0/hello.c $SHA
> -archive_test d44d42cbd7d915bc938c81333a21e355a6022fb7 /usr/src/debug/hello-1.0/hello.c $SHA

OK, test that the hello.c source file is found in the scanned archives,
that the fdcache is used and that the sha1sum checks out.

>  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> -regex="fdcache_bytes ([0-9]+).*fdcache_prefetch_bytes ([0-9]+)"
> +regex="fdcache_bytes ([0-9]+)"
>  mb=$((1024*1024))
>  if [[ $metrics =~ $regex ]]; then
>    fdbytes=${BASH_REMATCH[1]}
> -  pfbytes=${BASH_REMATCH[2]}
> -  if [ $fdbytes -gt $mb ] || [ $pfbytes -gt $mb ]; then
> +  if [ $fdbytes -gt $mb ] ; then
>      err
>    fi

OK, check that the prefetch cache isn't bigger than 10MB. Why that
number?

Thanks,

Mark
  
Mark Wielaard May 30, 2022, 3:36 p.m. UTC | #2
Hi Noah,

Hope you don't mind me adding elfutils-devel back to the CC.

On Fri, 2022-05-27 at 15:32 -0400, Noah Sanci wrote:
> On Fri, May 27, 2022 at 12:29 PM Mark Wielaard <mark@klomp.org>
> wrote:
> 
> > On Wed, 2022-05-25 at 14:44 -0400, Noah Sanci via Elfutils-devel
> > wrote:
> > > PR28577: Make run-debuginfod-fd-prefetch-caches.sh test
> > >              something
> > > 
> > > Update to the run-debuginfod-fd-prefetch to make the test more
> > > sound.
> > 
> > Thanks. And according to Evgeny this makes the test pass for him:
> > https://sourceware.org/bugzilla/show_bug.cgi?id=29180#c2
> 
> Good to hear.
> > But I find it a little hard to review, it would have been helpful
> > to
> > get a bit more comments/documentation what exactly is being tested
> > now.
> 
> Fair, I should have made what was being tested/why clearer.

Thanks. I also understand now that I read the test logic the wrong way
around.

> > > diff --git a/tests/run-debuginfod-fd-prefetch-caches.sh
> > > b/tests/run-
> > > debuginfod-fd-prefetch-caches.sh
> > > index 66a7a083..50ffc4ac 100755
> > > --- a/tests/run-debuginfod-fd-prefetch-caches.sh
> > > +++ b/tests/run-debuginfod-fd-prefetch-caches.sh
> > > @@ -21,22 +21,22 @@
> > >  # for test case debugging, uncomment:
> > >  set -x
> > >  unset VALGRIND_CMD
> > > +mkdir R Z
> > 
> > OK, some boilerplate.
> > > -mkdir R
> > > -cp -rvp ${abs_srcdir}/debuginfod-rpms R
> > > -if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
> > > -    rm -vrf R/debuginfod-rpms/fedora31
> > > -fi
> > 
> > This gets moved to below.
> 
> The two above changes should look better now
> 
> > >  DB=${PWD}/.debuginfod_tmp.sqlite
> > >  tempfiles $DB
> > > @@ -47,88 +47,68 @@ export
> > > DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
> > >  ######
> > >  rm -rf $DEBUGINFOD_CACHE_PATH
> > >  rm -rf $DB
> > > +
> > >  # Set mb values to ensure the caches certainly have enough space
> > >  # To store the test files
> > >  env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS=
> > > ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d
> > > $DB \
> > > -    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-
> > > fds=$PREFETCH_FDS --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R \
> > > -    --fdcache-mbs=50  --fdcache-prefetch-mbs=50 \
> > > -    --fdcache-prefetch=$PREFETCH  > vlog$PORT1 2>&1 &
> > > +    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-
> > > fds=$PREFETCH_FDS  -vvvvv -g 0 -t 0 -R R \
> > > +    -Z .tar.bz2=bzcat Z  --fdcache-mbs=50  --fdcache-prefetch-
> > > mbs=100 \
> > > +    --fdcache-prefetch=$PREFETCH -F > vlog$PORT1 2>&1 &
> > 
> > Now run with --fdcache-fds=1 --fdcache-mbs=50 and
> > -fdcache-prefetch-fds=1 fdcache-prefetch=1 -fdcache-prefetch-
> > mbs=100
> > 
> > It is these last 3 we want to test.
> 
> This particular instance of debuginfod was intended to test
> fdcache-fds and prefetch-fds.
> The test is just testing prefetch-fds. fdcache-fds was tested in a
> different diff, but because the
> fdcache is just the basic lru the test was removed. Mb values are set
> high enough so that no
> files will be excluded for being too large. Maybe there should be a
> test prior to this for just
> fdcache-prefetch as it determines how much is prefetched.

OK. I think no extra test is needed.

> > >  PID1=$!
> > >  tempfiles vlog$PORT1
> > >  errfiles vlog$PORT1
> > > +
> > >  # Server must become ready
> > >  wait_ready $PORT1 'ready' 1
> > >  ################################################################
> > > ########
> > > -cp -rvp ${abs_srcdir}/debuginfod-rpms R
> > > -if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
> > > -    rm -vrf R/debuginfod-rpms/fedora31
> > > -fi
> > >  kill -USR1 $PID1
> > >  wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
> > > +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
> > > +wait_ready $PORT1 'thread_busy{role="scan"}' 0
> > 
> > OK, boilerplate to wait for initial scan.
> 
> Yes, Frank told me that just checking traverse is insufficient to
> conclude a
> scan has completed.
> > > -archive_hits=5
> > > -SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
> > > -for i in $archive_hits
> > > -do
> > > -  archive_test c36708a78618d597dee15d0dc989f093ca5f9120
> > > /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
> > > -done
> > > +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/
> > > +# load prefetch cache
> > > +testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo
> > > cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb
> > 
> > OK, which particular file is this?
> 
> pk.pkg.tar/usr/src/debug/hello.c, now elaborated in comment

Thanks.

> > >  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> > > -regex="fdcache_op_count\{op=\"enqueue\"\} ([0-
> > > 9]+).*fdcache_op_count\{op=\"evict\"\} ([0-9]+).*"
> > > -enqueue=0
> > > -if [[ $metrics =~ $regex ]]
> > > -then
> > > -   enqueue=${BASH_REMATCH[1]}
> > > -   evict=${BASH_REMATCH[2]}
> > > +regex="fdcache_prefetch_count ([0-9])+"
> > > +# Check to see if prefetch cache is loaded
> > > +if [[ $metrics =~ $regex ]]; then
> > > +  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
> > > +    err
> > > +  fi
> > 
> > Why the -ne 1 ? What number are we expecting and why?
> 
> Ensuring that prefetch is loaded, but not loaded more than it is
> intended to hold.
> debuginfod will try to load 2 files into prefetch, so check to ensure
> that only 1 is loaded.
> Explained in comment.

Right. I misread the test condition. This makes sense now.

> > >  else
> > > -   err
> > > -fi
> > > -# This is an ad-hoc test than only works when FDCACHE_FDS <
> > > "archive_test"s above (5)
> > > -# otherwise there will be no eviction
> > > -if [[ $(( $enqueue-$FDCACHE_FDS )) -ne $evict ]]
> > > -then
> > > -   err
> > > -fi
> > > -# Test prefetch cache
> > > -archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30
> > > /usr/src/debug/hello-1.0/hello.c $SHA
> > > +  err
> > > +fi
> > > +# Ensure that searching for the source prefetched finds it in
> > > the prefetch cache
> > > +testrun ${abs_top_builddir}/debuginfod/debuginfod-find source
> > > cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb /usr/src/debug/hello.c
> > >  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> > > -regex="fdcache_prefetch_count ([0-9])+"
> > > -pf_count=0
> > > -for i in $FDCACHE_FDS
> > > -do
> > > -  if [[ $metrics =~ $regex ]]; then
> > > -
> > > -    if [[ $(( $pf_count+$PREFETCH )) -ne ${BASH_REMATCH[1]} ]];
> > > then
> > > -      err
> > > -    else
> > > -      pf_count=${BASH_REMATCH[1]}
> > > -    fi
> > > -
> > > -  else
> > > +regex="fdcache_op_count\{op=\"prefetch_access\"\} ([0-9])+"
> > > +if [[ $metrics =~ $regex ]]; then
> > > +  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
> > >      err
> > >    fi
> > > -done
> > > +else
> > > +  err
> > > +fi
> > 
> > Again -ne 1? Shouldn't it be different from the above now?
> 
> Now we're seeing if debuginfod is actually using the prefetch cache
> by checking the access count.
> We're searching for the 1 file we loaded before so we expect 1
> access.
> Explained in comment

Thanks.

> > >  metrics=$(curl http://127.0.0.1:$PORT1/metrics)
> > > -regex="fdcache_bytes ([0-9]+).*fdcache_prefetch_bytes ([0-9]+)"
> > > +regex="fdcache_bytes ([0-9]+)"
> > >  mb=$((1024*1024))
> > >  if [[ $metrics =~ $regex ]]; then
> > >    fdbytes=${BASH_REMATCH[1]}
> > > -  pfbytes=${BASH_REMATCH[2]}
> > > -  if [ $fdbytes -gt $mb ] || [ $pfbytes -gt $mb ]; then
> > > +  if [ $fdbytes -gt $mb ] ; then
> > >      err
> > >    fi
> > 
> > OK, check that the prefetch cache isn't bigger than 10MB. Why that
> > number?
> 
> mb should equal 1mb in bytes as this is the maximum space allocated
> to both the prefetch and fd cache. and curling metrics returns memory
> used in bytes

Thanks. I was off by a factor 10. Sorry.

> Please find the new patch attached.

Thanks for adding all the comments. Looks good.

Cheers,

Mark
  
Mark Wielaard June 3, 2022, 2:49 p.m. UTC | #3
Hi Noah,

On Mon, 2022-05-30 at 17:36 +0200, Mark Wielaard wrote:
> > Please find the new patch attached.
> 
> Thanks for adding all the comments. Looks good.

The only issue is that although it seems to work well on Fedora I am
having problems getting to test to work on an older CentOS 7 install.

It always seems to fail at this point:

 export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/
 # load prefetch cache with debuginfod-tars/usr/src/debug/hello.c
 testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb
 metrics=$(curl http://127.0.0.1:$PORT1/metrics)
 regex="fdcache_prefetch_count ([0-9])+"
 # Check to see if prefetch cache is maximally loaded. Note fdcache-prefetch (2) > prefetch-fds (1),
# so the debuginfod will try to load the prefetch cache with 2 files. We want prefetch-fds to cap that
# off
if [[ $metrics =~ $regex ]]; then
  if [[ ${BASH_REMATCH[1]} -ne $PREFETCH_FDS ]]; then
    err
  fi
else
  err
fi

Because I am getting:

++ curl http://127.0.0.1:8819/metrics
  % Total    % Received % Xferd  Average
Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left 
 Speed
^M  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:
--:--     0^M100  5793  100  5793    0     0  7532k      0 --:--:-- --
:--:-- --:--:-- 5657k
+ metrics='fdcache_bytes 0
fdcache_count 0
fdcache_op_count{op="emerg-flush"} 3
fdcache_op_count{op="enqueue"} 1
fdcache_op_count{op="evict"} 1
fdcache_op_count{op="prefetch_enqueue"} 1
fdcache_op_count{op="prefetch_evict"} 1
fdcache_op_count{op="probe_miss"} 2
fdcache_prefetch_bytes 0
fdcache_prefetch_count 0
[...]

So no bytes are prefetched at all.

I am not sure why this is happening. Do you see any obvious issue that
would explain this happening on centos7?

Thanks,

Mark
  
Mark Wielaard June 3, 2022, 3:02 p.m. UTC | #4
Hi Noah,

Adding the mailinglist back to CC so other can jump in if there is
anything obvious from the logs.

On Fri, 2022-06-03 at 10:51 -0400, Noah Sanci wrote:
> Nothing immediately jumps out, would you mind sending the entire log?

Attached.

Cheers,

Mark
  
Mark Wielaard June 3, 2022, 4:22 p.m. UTC | #5
Hi,

On Fri, 2022-06-03 at 17:02 +0200, Mark Wielaard wrote:
> On Fri, 2022-06-03 at 10:51 -0400, Noah Sanci wrote:
> > Nothing immediately jumps out, would you mind sending the entire
> > log?
> 
> Attached.

And Noah figured it out. The machine where it failed has /tmp 92% full
and the default mintmp is 25%. Which triggers an fdcache emergency
flush. So with this it PASSes everywhere:

diff --git a/tests/run-debuginfod-fd-prefetch-caches.sh b/tests/run-debuginfod-fd-prefetch-caches.sh
index 8778d3a2..a538cd48 100755
--- a/tests/run-debuginfod-fd-prefetch-caches.sh
+++ b/tests/run-debuginfod-fd-prefetch-caches.sh
@@ -50,10 +50,12 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
 rm -rf $DEBUGINFOD_CACHE_PATH
 rm -rf $DB
 # Testing prefetch fd maximum (Set mb maximums to be beyond consideration)
+# Set --fdcache-mintmp=0 so we don't accidentially trigger an fdcache
+# emergency flush for filling tmpdir
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d $DB \
     --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-fds=$PREFETCH_FDS  -vvvvv -g 0 -t 0 \
     -Z .tar.bz2=bzcat Z  --fdcache-mbs=100  --fdcache-prefetch-mbs=100 \
-    --fdcache-prefetch=$PREFETCH > vlog$PORT1 2>&1 &
+    --fdcache-mintmp=0 --fdcache-prefetch=$PREFETCH > vlog$PORT1 2>&1 &
 PID1=$!
 tempfiles vlog$PORT1
 errfiles vlog$PORT1

Pushed with that change.

Thanks,

Mark
  

Patch

From b84dd9b2b36812ea299ff189c7e5557863dd82d2 Mon Sep 17 00:00:00 2001
From: Noah Sanci <nsanci@redhat.com>
Date: Fri, 13 May 2022 14:32:27 -0400
Subject: [PATCH] PR28577: Make run-debuginfod-fd-prefetch-caches.sh test 
  something

Update to the run-debuginfod-fd-prefetch to make the test more
sound.
---
 tests/run-debuginfod-fd-prefetch-caches.sh | 116 +++++++++------------
 1 file changed, 48 insertions(+), 68 deletions(-)

diff --git a/tests/run-debuginfod-fd-prefetch-caches.sh b/tests/run-debuginfod-fd-prefetch-caches.sh
index 66a7a083..50ffc4ac 100755
--- a/tests/run-debuginfod-fd-prefetch-caches.sh
+++ b/tests/run-debuginfod-fd-prefetch-caches.sh
@@ -21,22 +21,22 @@ 
 # for test case debugging, uncomment:
 set -x
 unset VALGRIND_CMD
+mkdir R Z
 
-mkdir R
-cp -rvp ${abs_srcdir}/debuginfod-rpms R
-if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
-    rm -vrf R/debuginfod-rpms/fedora31
-fi
-
-FDCACHE_MBS=$((1/1024/1024))
+FDCACHE_MBS=1
 FDCACHE_FDS=1
-PREFETCH_MBS=$((1/1024/1024))
-PREFETCH_FDS=2
+PREFETCH_MBS=1
+PREFETCH_FDS=1
 PREFETCH=1
 # This variable is essential and ensures no time-race for claiming ports occurs
 # set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test
 base=8800
 get_ports
+cp -rvp ${abs_srcdir}/debuginfod-rpms R
+if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
+    rm -vrf R/debuginfod-rpms/fedora31
+fi
+cp -rvp ${abs_srcdir}/debuginfod-tars Z
 
 DB=${PWD}/.debuginfod_tmp.sqlite
 tempfiles $DB
@@ -47,88 +47,68 @@  export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
 ######
 rm -rf $DEBUGINFOD_CACHE_PATH
 rm -rf $DB
+
 # Set mb values to ensure the caches certainly have enough space
 # To store the test files
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d $DB \
-    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-fds=$PREFETCH_FDS --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R \
-    --fdcache-mbs=50  --fdcache-prefetch-mbs=50 \
-    --fdcache-prefetch=$PREFETCH  > vlog$PORT1 2>&1 &
+    --fdcache-fds=$FDCACHE_FDS --fdcache-prefetch-fds=$PREFETCH_FDS  -vvvvv -g 0 -t 0 -R R \
+    -Z .tar.bz2=bzcat Z  --fdcache-mbs=50  --fdcache-prefetch-mbs=100 \
+    --fdcache-prefetch=$PREFETCH -F > vlog$PORT1 2>&1 &
 PID1=$!
 tempfiles vlog$PORT1
 errfiles vlog$PORT1
+
 # Server must become ready
 wait_ready $PORT1 'ready' 1
 ########################################################################
-cp -rvp ${abs_srcdir}/debuginfod-rpms R
-if [ "$zstd" = "false" ]; then  # nuke the zstd fedora 31 ones
-    rm -vrf R/debuginfod-rpms/fedora31
-fi
 kill -USR1 $PID1
 wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
 # All rpms need to be in the index, except the dummy permission-000 one
 rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
 wait_ready $PORT1 'scanned_files_total{source=".rpm archive"}' $rpms
-kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs
-# Wait till both files are in the index and scan/index fully finished
+kill -USR1 $PID1 
 wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
-export DEBUGINFOD_URLS="http://127.0.0.1:"$PORT1
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
 
-archive_hits=5
-SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
-for i in $archive_hits
-do
-  archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
-done
+export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/
+# load prefetch cache
+testrun ${abs_top_builddir}/debuginfod/debuginfod-find debuginfo cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb
 metrics=$(curl http://127.0.0.1:$PORT1/metrics)
-regex="fdcache_op_count\{op=\"enqueue\"\} ([0-9]+).*fdcache_op_count\{op=\"evict\"\} ([0-9]+).*"
-enqueue=0
-if [[ $metrics =~ $regex ]]
-then
-   enqueue=${BASH_REMATCH[1]}
-   evict=${BASH_REMATCH[2]}
+regex="fdcache_prefetch_count ([0-9])+"
+# Check to see if prefetch cache is loaded
+if [[ $metrics =~ $regex ]]; then
+  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
+    err
+  fi
 else
-   err
-fi
-# This is an ad-hoc test than only works when FDCACHE_FDS < "archive_test"s above (5)
-# otherwise there will be no eviction
-if [[ $(( $enqueue-$FDCACHE_FDS )) -ne $evict ]]
-then
-   err
-fi
-# Test prefetch cache
-archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
+  err
+fi 
+# Ensure that searching for the source prefetched finds it in the prefetch cache
+testrun ${abs_top_builddir}/debuginfod/debuginfod-find source cee13b2ea505a7f37bd20d271c6bc7e5f8d2dfcb /usr/src/debug/hello.c
 metrics=$(curl http://127.0.0.1:$PORT1/metrics)
-regex="fdcache_prefetch_count ([0-9])+"
-pf_count=0
-for i in $FDCACHE_FDS
-do
-  if [[ $metrics =~ $regex ]]; then
-
-    if [[ $(( $pf_count+$PREFETCH )) -ne ${BASH_REMATCH[1]} ]]; then
-      err
-    else
-      pf_count=${BASH_REMATCH[1]}
-    fi
-
-  else
+regex="fdcache_op_count\{op=\"prefetch_access\"\} ([0-9])+"
+if [[ $metrics =~ $regex ]]; then
+  if [[  ${BASH_REMATCH[1]} -ne 1 ]]; then
     err
   fi
-done
+else
+  err
+fi
 
 kill $PID1
 wait $PID1
 PID1=0
 
-
 #########
-# Test mb limit on fd and prefetch cache
+# Test mb limit on fd cache
 #########
-
 rm -rf $DEBUGINFOD_CACHE_PATH
 rm -rf $DB
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT1 -d $DB \
-    --fdcache-mbs=1 --fdcache-prefetch-mbs=1 --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R \
-    > vlog2$PORT1 2>&1 &
+    --fdcache-mbs=$FDCACHE_MBS --fdcache-mintmp 0 -vvvvv -g 0 -t 0 -R R > vlog2$PORT1 2>&1 &
 PID1=$!
 tempfiles vlog2$PORT1
 errfiles vlog2$PORT1
@@ -139,26 +119,26 @@  wait_ready $PORT1 'thread_work_total{role="traverse"}' 1
 # All rpms need to be in the index, except the dummy permission-000 one
 rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
 wait_ready $PORT1 'scanned_files_total{source=".rpm archive"}' $rpms
-kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->srefs
-# Wait till both files are in the index and scan/index fully finished
+kill -USR1 $PID1
 wait_ready $PORT1 'thread_work_total{role="traverse"}' 2
+wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
+wait_ready $PORT1 'thread_busy{role="scan"}' 0
 
-archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
+# This many archives cause the fd cache to be loaded with just over 1mb of
+# files. So we expect the 1mb cap off
+SHA=f4a1a8062be998ae93b8f1cd744a398c6de6dbb1
 archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
 archive_test c36708a78618d597dee15d0dc989f093ca5f9120 /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
 archive_test 41a236eb667c362a1c4196018cc4581e09722b1b /usr/src/debug/hello2-1.0-2.x86_64/hello.c $SHA
 archive_test bc1febfd03ca05e030f0d205f7659db29f8a4b30 /usr/src/debug/hello-1.0/hello.c $SHA
 archive_test f0aa15b8aba4f3c28cac3c2a73801fefa644a9f2 /usr/src/debug/hello-1.0/hello.c $SHA
 archive_test bbbf92ebee5228310e398609c23c2d7d53f6e2f9 /usr/src/debug/hello-1.0/hello.c $SHA
-archive_test d44d42cbd7d915bc938c81333a21e355a6022fb7 /usr/src/debug/hello-1.0/hello.c $SHA
-
 metrics=$(curl http://127.0.0.1:$PORT1/metrics)
-regex="fdcache_bytes ([0-9]+).*fdcache_prefetch_bytes ([0-9]+)"
+regex="fdcache_bytes ([0-9]+)"
 mb=$((1024*1024))
 if [[ $metrics =~ $regex ]]; then
   fdbytes=${BASH_REMATCH[1]}
-  pfbytes=${BASH_REMATCH[2]}
-  if [ $fdbytes -gt $mb ] || [ $pfbytes -gt $mb ]; then
+  if [ $fdbytes -gt $mb ] ; then
     err
   fi
 else
-- 
2.36.1