diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9da9ac51eccd..71097b80cfe1 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -550,7 +550,7 @@ riscv*)
 	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
 	extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o"
 	extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o sifive-vector-builtins-bases.o andes-vector-builtins-bases.o"
-	extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o"
+	extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o"
 	d_target_objs="riscv-d.o"
 	extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h riscv_th_vector.h sifive_vector.h andes_vector.h"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-bclr-lowest-set-bit.cc b/gcc/config/riscv/riscv-bclr-lowest-set-bit.cc
new file mode 100644
index 000000000000..51bd4301ce7a
--- /dev/null
+++ b/gcc/config/riscv/riscv-bclr-lowest-set-bit.cc
@@ -0,0 +1,306 @@
+/* Convert clear lowest bit set idiom into a more efficient
+   bclr sequence when possible.
+
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "backend.h"
+#include "df.h"
+#include "tree-pass.h"
+
+/* x & (x - 1) clears the lowest set bit in x.  If we have a ctz (x) nearby,
+   then we can use a bclr with the bit position defined by the output of
+   the ctz.  */
+
+namespace {
+
+const pass_data pass_data_bclr_lowest_set_bit =
+{
+  RTL_PASS, /* type */
+  "bclr_lowest_set_bit", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_bclr_lowest_set_bit : public rtl_opt_pass
+{
+public:
+  pass_bclr_lowest_set_bit (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_bclr_lowest_set_bit, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      /* This uses ctz and bclr, so we need ZBB and ZBS
+	 instructions.  */
+      return TARGET_ZBB && TARGET_ZBS && optimize > 0;
+    }
+  virtual unsigned int execute (function *);
+
+private:
+}; // class pass_bclr_lowest_set_bit
+
+/* Look at real insns before START to see if any of them compute ctz (src).
+   If so, return the insn that is a ctz, otherwise return NULL.  Look no
+   further that LIMIT real statements and do not leave this basic block.  */
+
+rtx_insn *
+find_prior_ctz (rtx_insn *start, rtx src, int limit)
+{
+  rtx_insn *prev = start;
+  while (prev && limit > 0)
+    {
+      prev = prev_nonnote_nondebug_insn_bb (prev);
+      limit--;
+
+      if (prev)
+	{
+	  rtx set = single_set (prev);
+	  if (!set)
+	    continue;
+
+	  /* A ctz of a SI object on rv64 will have an
+	     SUBREG argument.  */
+	  if (GET_CODE (SET_SRC (set)) == CTZ
+	      && (XEXP (SET_SRC (set), 0) == src
+		  || (SUBREG_P (XEXP (SET_SRC (set), 0))
+		      && SUBREG_REG (XEXP (SET_SRC (set), 0)) == src)))
+	    {
+	      /* We've found a CTZ, make sure neither the input nor
+		 the output change between the CTZ and START.  */
+	      if (reg_set_between_p (src, prev, start)
+		  || reg_set_between_p (SET_DEST (set), prev, start))
+		return NULL;
+
+	      /* Everything looks good.  */
+	      return prev;
+	    }
+	}
+    }
+  return NULL;
+}
+
+/* Look at real insns after START to see if any of them compute ctz (src).
+   If so, return the insn that is a ctz, otherwise return NULL.  Look no
+   further that LIMIT real statements and do not leave this basic block.  */
+
+rtx_insn *
+find_later_ctz (rtx_insn *start, rtx src, int limit)
+{
+  rtx_insn *next = start;
+  while (next && limit > 0)
+    {
+      next = next_nonnote_nondebug_insn_bb (next);
+      limit--;
+
+      if (next)
+	{
+	  rtx set = single_set (next);
+	  if (!set)
+	    continue;
+
+	  /* A ctz of a SI object on rv64 will have an
+	     SUBREG argument.  */
+	  if (GET_CODE (SET_SRC (set)) == CTZ
+	      && (XEXP (SET_SRC (set), 0) == src
+		  || (SUBREG_P (XEXP (SET_SRC (set), 0))
+		      && SUBREG_REG (XEXP (SET_SRC (set), 0)) == src)))
+	    {
+	      /* We've found a CTZ.  The CTZ is going to be moved, so
+		 we need to verify its input doesn't change between
+		 START and NEXT.  We also have to verify that its 
+		 destination is unused between those points.  */
+	      if (reg_set_between_p (XEXP (SET_SRC (set), 0), start, next)
+		  || reg_used_between_p (SET_DEST (set), start, next))
+		return NULL;
+	      
+	      return next;
+	    }
+	}
+    }
+  return NULL;
+}
+
+/* So the basic idea here is to find x & (x - 1) idioms which clear the
+   lowest set bit.  If there is a nearby ctz (x), then we can profitably
+   use a ctz+bclr sequence instead, essentially replacing the addi+and
+   with bclr.  This should often be more efficient and save space. 
+
+   We don't do this in gimple because the cost model would reject as the
+   optimized for appear more expensive from its costing model.
+
+   Combine won't work as there's no data dependency between the
+   ctz and the x & (x - 1) idiom.
+
+   Peepholing doesn't work consistently because we're constantly
+   inserting unrelated instructions between the two components of
+   the x & (x - 1) idiom.  We'd have to match the ctz in various
+   positions as well as deal with random insns the scheduler puts
+   in the middle of the key instrutions.
+
+   So, this mini pass to optimize this scenario.  */
+
+unsigned int
+pass_bclr_lowest_set_bit::execute (function *fn)
+{
+  basic_block bb;
+
+  /* Scan all the blocks, once.  */
+  FOR_ALL_BB_FN (bb, fn)
+    {
+      rtx_insn *insn;
+      /* Scan all the insns once.  */
+      FOR_BB_INSNS (bb, insn)
+	{
+	  /* Ignore as much as we can.  */
+	  if (!NONDEBUG_INSN_P (insn)
+	      || JUMP_P (insn)
+	      || CALL_P (insn))
+	    continue;
+
+	  rtx dec_set = single_set (insn);
+	  if (!dec_set)
+	    continue;
+
+	  rtx dec_src = SET_SRC (dec_set);
+	  rtx dec_dest = SET_DEST (dec_set);
+
+	  /* For a 32 bit object on rv64, the decrement will
+	     be wrapped by a SIGN_EXTEND.  Strip it.  */
+	  if (GET_CODE (dec_src) == SIGN_EXTEND)
+	    dec_src = XEXP (dec_src, 0);
+
+	  /* Verify it's res = x - 1, if not proceed to the next insn.  */
+	  if (!dec_set
+	      || !REG_P (dec_dest)
+	      || GET_CODE (dec_src) != PLUS
+	      || (XEXP (dec_src, 1) != CONSTM1_RTX (GET_MODE (dec_src))))
+	    continue;
+
+	  /* Get the value being decremented.  Note it might be
+	     wrapped by a SUBREG which we strip.  */
+	  dec_src = XEXP (dec_src, 0);
+	  if (SUBREG_P (dec_src))
+	    dec_src = SUBREG_REG (dec_src);
+
+	  /* So we've found dest = src - 1;  Now look at the next
+	     real insn and see if it's dest2 = (dest & src).  */
+	  rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
+	  if (!next)
+	    continue;
+
+	  rtx and_set = single_set (next);
+
+	  if (!and_set)
+	    continue;
+
+	  rtx and_src = SET_SRC (and_set);
+	  rtx and_dest = SET_DEST (and_set);
+	  if (!and_set
+	      || !REG_P (and_dest)
+	      || GET_CODE (and_src) != AND)
+	    continue;
+
+	  rtx and_op0 = XEXP (and_src, 0);
+	  rtx and_op1 = XEXP (and_src, 1);
+
+	  if (dec_dest != and_op0 && dec_dest != and_op1)
+	    continue;
+
+	  if (dec_src != and_op0 && dec_src != and_op1)
+	    continue;
+
+	  /* We've found x & (x - 1).  Now look for a suitable ctz nearby.  */
+
+	  rtx_insn *prior_ctz = find_prior_ctz (insn, dec_src, 10);
+	  if (prior_ctz)
+	    {
+	      rtx prior_ctz_output = SET_DEST (single_set (prior_ctz));
+
+	      /* Create a pattern for the variable bit clear idiom.  */
+	      rtx pat = gen_rtx_ROTATE (GET_MODE (dec_dest),
+				        GEN_INT (-2),
+					gen_lowpart (QImode, prior_ctz_output));
+	      pat = gen_rtx_AND (GET_MODE (dec_dest), pat, dec_src);
+
+	      /* Slam that pattern in as the SET_SRC of the original AND.  */
+	      SET_SRC (and_set) = pat;
+	      INSN_CODE (next) = -1;
+	      df_insn_rescan (next);
+
+	      /* Start next loop iteration.  */
+	      continue;
+	    }
+
+	  /* Typically in cases where we can optimize, we'll find a REG->REG
+	     copy into DEC_SRC immediately before INSN.  Look for it.  */
+	  rtx_insn *prev = prev_nonnote_nondebug_insn_bb (insn);
+	  rtx copy = NULL_RTX;
+	  if (prev
+	      && (copy = single_set (prev)) != NULL_RTX
+	      && SET_SRC (copy) == dec_src)
+	    dec_src = SET_DEST (copy);
+	  
+	  /* We didn't find a CTZ before INSN.  So look after NEXT.
+	     This case is more complex as we have to move insns around.  */
+	  rtx_insn *later_ctz = find_later_ctz (next, dec_src, 10);
+	  if (later_ctz)
+	    {
+	      /* Remove the CTZ from the stream and reemit it immediately
+		 after NEXT.  */
+	      df_insn_delete (later_ctz);
+	      remove_insn (later_ctz);
+	      SET_PREV_INSN (later_ctz) = NULL;
+	      SET_NEXT_INSN (later_ctz) = NULL;
+	      df_insn_rescan (emit_insn_after (PATTERN (later_ctz), next));
+
+	      /* Now construct the bclr insn and add it to the stream.  */
+	      rtx later_ctz_output = SET_DEST (single_set (later_ctz));
+	      rtx pat = gen_rtx_ROTATE (GET_MODE (dec_dest),
+				        GEN_INT (-2),
+					gen_lowpart (QImode, later_ctz_output));
+	      pat = gen_rtx_AND (GET_MODE (dec_dest), pat, dec_src);
+	      pat = gen_rtx_SET (and_dest, pat);
+	      df_insn_rescan (emit_insn_after (pat, NEXT_INSN (next)));
+	    }
+	}
+    }
+
+  return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_bclr_lowest_set_bit (gcc::context *ctxt)
+{
+  return new pass_bclr_lowest_set_bit (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index bc803c4678ed..5aa41228e1fe 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -17,6 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+INSERT_PASS_AFTER (pass_combine, 1, pass_bclr_lowest_set_bit);
 INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
 INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
 INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 46b256d63539..2d3fd0e07626 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -207,6 +207,7 @@ rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
+rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
 
 
 /* Routines implemented in riscv-string.c.  */
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index d86dc7443658..b53a2dff2cf7 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -131,6 +131,11 @@ riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+riscv-bclr-lowest-set-bit.o: $(srcdir)/config/riscv/riscv-bclr-lowest-set-bit.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H)
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
 riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H)
 	$(COMPILE) $<
diff --git a/gcc/testsuite/gcc.target/riscv/bclr-lowest-set-bit-1.c b/gcc/testsuite/gcc.target/riscv/bclr-lowest-set-bit-1.c
new file mode 100644
index 000000000000..fa201cec3590
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bclr-lowest-set-bit-1.c
@@ -0,0 +1,128 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int foo1_res1;
+int foo1_res2;
+void foo1(unsigned int x)
+{
+  unsigned int tz = __builtin_ctz (x);
+  unsigned int t = x - 1UL;
+  foo1_res1 = t & x;
+  foo1_res2 = tz;
+}
+
+int foo2_res1;
+int foo2_res2;
+void foo2(unsigned int x)
+{
+  unsigned int t = x - 1UL;
+  unsigned int tz = __builtin_ctz (x);
+  foo2_res1 = t & x;
+  foo2_res2 = tz;
+}
+
+int foo3_res1;
+int foo3_res2;
+void foo3(unsigned int x)
+{
+  unsigned int t = x - 1UL;
+  foo3_res1 = t & x;
+  unsigned int tz = __builtin_ctz (x);
+  foo3_res2 = tz;
+}
+
+unsigned int foo4_res1;
+unsigned int foo4_res2;
+void foo4(unsigned int x)
+{
+  unsigned int tz = __builtin_ctz (x);
+  unsigned int t = x - 1UL;
+  foo4_res1 = t & x;
+  foo4_res2 = tz;
+}
+
+unsigned int foo5_res1;
+unsigned int foo5_res2;
+void foo5(unsigned int x)
+{
+  unsigned int t = x - 1UL;
+  unsigned int tz = __builtin_ctz (x);
+  foo5_res1 = t & x;
+  foo5_res2 = tz;
+}
+
+unsigned int foo6_res1;
+unsigned int foo6_res2;
+void foo6(unsigned int x)
+{
+  unsigned int t = x - 1UL;
+  foo6_res1 = t & x;
+  unsigned int tz = __builtin_ctzl (x);
+  foo6_res2 = tz;
+}
+
+long foo7_res1;
+long foo7_res2;
+void foo7(unsigned long x)
+{
+  unsigned long tz = __builtin_ctzl (x);
+  unsigned long t = x - 1UL;
+  foo7_res1 = t & x;
+  foo7_res2 = tz;
+}
+
+long foo8_res1;
+long foo8_res2;
+void foo8(unsigned long x)
+{
+  unsigned long t = x - 1UL;
+  unsigned long tz = __builtin_ctzl (x);
+  foo8_res1 = t & x;
+  foo8_res2 = tz;
+}
+
+long foo9_res1;
+long foo9_res2;
+void foo9(unsigned long x)
+{
+  unsigned long t = x - 1UL;
+  foo9_res1 = t & x;
+  unsigned long tz = __builtin_ctzl (x);
+  foo9_res2 = tz;
+}
+
+unsigned long foo10_res1;
+unsigned long foo10_res2;
+void foo10(unsigned long x)
+{
+  unsigned long tz = __builtin_ctzl (x);
+  unsigned long t = x - 1UL;
+  foo10_res1 = t & x;
+  foo10_res2 = tz;
+}
+
+unsigned long foo11_res1;
+unsigned long foo11_res2;
+void foo11(unsigned long x)
+{
+  unsigned long t = x - 1UL;
+  unsigned long tz = __builtin_ctzl (x);
+  foo11_res1 = t & x;
+  foo11_res2 = tz;
+}
+
+unsigned long foo12_res1;
+unsigned long foo12_res2;
+void foo12(unsigned long x)
+{
+  unsigned long t = x - 1UL;
+  foo12_res1 = t & x;
+  unsigned long tz = __builtin_ctzl (x);
+  foo12_res2 = tz;
+}
+
+/* { dg-final { scan-assembler-not "\\sand\\s" } } */
+/* { dg-final { scan-assembler-times "\\sbclr\\s" 12 } } */
+
