diff mbox series

[v7,24/34] Import float comparison from the CM0 library

Message ID	20221031154529.3627576-25-gnu@danielengel.com
State	New
Delegated to:	Richard Earnshaw
Headers	DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org C46DC38AA261 Feedback-ID: i791144d6:Fastmail From: Daniel Engel <gnu@danielengel.com> To: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>, gcc-patches@gcc.gnu.org Subject: [PATCH v7 24/34] Import float comparison from the CM0 library Date: Mon, 31 Oct 2022 08:45:19 -0700 Message-Id: <20221031154529.3627576-25-gnu@danielengel.com> In-Reply-To: <20221031154529.3627576-1-gnu@danielengel.com> References: <20221031154529.3627576-1-gnu@danielengel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Cc: Daniel Engel <gnu@danielengel.com>, Christophe Lyon <christophe.lyon@linaro.org> Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
Series	libgcc: Thumb-1 Floating-Point Assembly for Cortex M0 \| [v7,00/34] libgcc: Thumb-1 Floating-Point Assembly for Cortex M0 [v7,01/34] Add and restructure function declaration macros [v7,02/34] Rename THUMB_FUNC_START to THUMB_FUNC_ENTRY [v7,03/34] Fix syntax warnings on conditional instructions [v7,04/34] Reorganize LIB1ASMFUNCS object wrapper macros [v7,05/34] Add the __HAVE_FEATURE_IT and IT() macros [v7,06/34] Refactor 'clz' functions into a new file [v7,07/34] Refactor 'ctz' functions into a new file [v7,08/34] Refactor 64-bit shift functions into a new file [v7,09/34] Import 'clz' functions from the CM0 library [v7,10/34] Import 'ctz' functions from the CM0 library [v7,11/34] Import 64-bit shift functions from the CM0 library [v7,12/34] Import 'clrsb' functions from the CM0 library [v7,13/34] Import 'ffs' functions from the CM0 library [v7,14/34] Import 'parity' functions from the CM0 library [v7,15/34] Import 'popcnt' functions from the CM0 library [v7,16/34] Refactor Thumb-1 64-bit comparison into a new file [v7,17/34] Import 64-bit comparison from CM0 library [v7,18/34] Merge Thumb-2 optimizations for 64-bit comparison [v7,19/34] Import 32-bit division from the CM0 library [v7,20/34] Refactor Thumb-1 64-bit division into a new file [v7,21/34] Import 64-bit division from the CM0 library [v7,22/34] Import integer multiplication from the CM0 library [v7,23/34] Refactor Thumb-1 float comparison into a new file [v7,24/34] Import float comparison from the CM0 library [v7,25/34] Refactor Thumb-1 float subtraction into a new file [v7,26/34] Import float addition and subtraction from the CM0 library [v7,27/34] Import float multiplication from the CM0 library [v7,28/34] Import float division from the CM0 library [v7,29/34] Import integer-to-float conversion from the CM0 library [v7,30/34] Import float-to-integer conversion from the CM0 library [v7,31/34] Import float<->double conversion from the CM0 library [v7,32/34] Import float<->__fp16 conversion from the CM0 library [v7,33/34] Drop single-precision Thumb-1 soft-float functions [v7,34/34] Add -mpure-code support to the CM0 functions.

Commit Message

Daniel Engel Oct. 31, 2022, 3:45 p.m. UTC

  These functions are significantly smaller and faster than the wrapper
functions and soft-float implementation they replace.  Using the first
comparison operator (e.g. '<=') in any program costs about 70 bytes
initially, but every additional operator incrementally adds just 4 bytes.

NOTE: It seems that the __aeabi_cfcmp*() routines formerly in bpabi-v6m.S
were not well tested, as they returned wrong results for the 'C' flag.
The replacement functions are fully tested.

gcc/libgcc/ChangeLog:
2022-10-09 Daniel Engel <gnu@danielengel.com>

	* config/arm/eabi/fcmp.S (__cmpsf2, __eqsf2, __gesf2,
	__aeabi_fcmpne, __aeabi_fcmpun): Added new functions.
	(__aeabi_fcmpeq, __aeabi_fcmpne, __aeabi_fcmplt, __aeabi_fcmple,
	 __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_cfcmple, __aeabi_cfcmpeq,
	 __aeabi_cfrcmple): Replaced with branches to __internal_cmpsf2().
	* config/arm/eabi/fplib.h: New file with fcmp-specific constants
	and general build configuration macros.
	* config/arm/lib1funcs.S: #include eabi/fplib.h (v6m only).
	* config/arm/t-elf (LIB1ASMFUNCS): Added _internal_cmpsf2,
	_arm_cfcmpeq, _arm_cfcmple, _arm_cfrcmple, _arm_fcmpeq,
	_arm_fcmpge, _arm_fcmpgt, _arm_fcmple, _arm_fcmplt, _arm_fcmpne,
	_arm_eqsf2, and _arm_gesf2.
---
 libgcc/config/arm/eabi/fcmp.S  | 643 +++++++++++++++++++++++++++++----
 libgcc/config/arm/eabi/fplib.h |  83 +++++
 libgcc/config/arm/lib1funcs.S  |   1 +
 libgcc/config/arm/t-elf        |  18 +
 4 files changed, 681 insertions(+), 64 deletions(-)
 create mode 100644 libgcc/config/arm/eabi/fplib.h

diff mbox series

Patch

diff --git a/libgcc/config/arm/eabi/fcmp.S b/libgcc/config/arm/eabi/fcmp.S
index 96d627f1fea..0c813fae8c5 100644
--- a/libgcc/config/arm/eabi/fcmp.S
+++ b/libgcc/config/arm/eabi/fcmp.S
@@ -1,8 +1,7 @@ 
-/* Miscellaneous BPABI functions.  Thumb-1 implementation, suitable for ARMv4T,
-   ARMv6-M and ARMv8-M Baseline like ISA variants.
+/* fcmp.S: Thumb-1 optimized 32-bit float comparison
 
-   Copyright (C) 2006-2020 Free Software Foundation, Inc.
-   Contributed by CodeSourcery.
+   Copyright (C) 2018-2022 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (gnu@danielengel.com)
 
    This file is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the
@@ -24,66 +23,582 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 
+// The various compare functions in this file all expect to tail call __cmpsf2()
+//  with flags set for a particular comparison mode.  The __internal_cmpsf2()
+//  symbol  itself is unambiguous, but there is a remote risk that the linker
+//  will prefer some other symbol in place of __cmpsf2().  Importing an archive
+//  file that also exports __cmpsf2() will throw an error in this case.
+// As a workaround, this block configures __aeabi_f2lz() for compilation twice.
+// The first version configures __internal_cmpsf2() as a WEAK standalone symbol,
+//  and the second exports __cmpsf2() and __internal_cmpsf2() normally.
+// A small bonus: programs not using __cmpsf2() itself will be slightly smaller.
+// 'L_internal_cmpsf2' should appear before 'L_arm_cmpsf2' in LIB1ASMFUNCS.
+#if defined(L_arm_cmpsf2) || defined(L_internal_cmpsf2)
+
+#define CMPSF2_SECTION .text.sorted.libgcc.fcmp.cmpsf2
+
+// int __cmpsf2(float, float)
+// <https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html>
+// Returns the three-way comparison result of $r0 with $r1:
+//  * +1 if ($r0 > $r1), or either argument is NAN
+//  *  0 if ($r0 == $r1)
+//  * -1 if ($r0 < $r1)
+// Uses $r2, $r3, and $ip as scratch space.
+#ifdef L_arm_cmpsf2
+FUNC_START_SECTION cmpsf2 CMPSF2_SECTION
+FUNC_ALIAS lesf2 cmpsf2
+FUNC_ALIAS ltsf2 cmpsf2
+    CFI_START_FUNCTION
+
+        // Assumption: The 'libgcc' functions should raise exceptions.
+        movs    r2,     #(FCMP_UN_POSITIVE + FCMP_RAISE_EXCEPTIONS + FCMP_3WAY)
+
+    // int,int __internal_cmpsf2(float, float, int)
+    // Internal function expects a set of control flags in $r2.
+    // If ordered, returns a comparison type { 0, 1, 2 } in $r3
+    FUNC_ENTRY internal_cmpsf2
+
+#else /* L_internal_cmpsf2 */
+    WEAK_START_SECTION internal_cmpsf2 CMPSF2_SECTION
+    CFI_START_FUNCTION
+
+#endif
+
+        // When operand signs are considered, the comparison result falls
+        //  within one of the following quadrants:
+        //
+        // $r0  $r1  $r0-$r1* flags  result
+        //  +    +      >      C=0     GT
+        //  +    +      =      Z=1     EQ
+        //  +    +      <      C=1     LT
+        //  +    -      >      C=1     GT
+        //  +    -      =      C=1     GT
+        //  +    -      <      C=1     GT
+        //  -    +      >      C=0     LT
+        //  -    +      =      C=0     LT
+        //  -    +      <      C=0     LT
+        //  -    -      >      C=0     LT
+        //  -    -      =      Z=1     EQ
+        //  -    -      <      C=1     GT
+        //
+        // *When interpeted as a subtraction of unsigned integers
+        //
+        // From the table, it is clear that in the presence of any negative
+        //  operand, the natural result simply needs to be reversed.
+        // Save the 'N' flag for later use.
+        movs    r3,     r0
+        orrs    r3,     r1
+        mov     ip,     r3
+
+        // Keep the absolute value of the second argument for NAN testing.
+        lsls    r3,     r1,     #1
+
+        // With the absolute value of the second argument safely stored,
+        //  recycle $r1 to calculate the difference of the arguments.
+        subs    r1,     r0,     r1
+
+        // Save the 'C' flag for use later.
+        // Effectively shifts all the flags 1 bit left.
+        adcs    r2,     r2
+
+        // Absolute value of the first argument.
+        lsls    r0,     #1
+
+        // Identify the largest absolute value between the two arguments.
+        cmp     r0,     r3
+        bhs     LLSYM(__fcmp_sorted)
+
+        // Keep the larger absolute value for NAN testing.
+        // NOTE: When the arguments are respectively a signaling NAN and a
+        //  quiet NAN, the quiet NAN has precedence.  This has consequences
+        //  if TRAP_NANS is enabled, but the flags indicate that exceptions
+        //  for quiet NANs should be suppressed.  After the signaling NAN is
+        //  discarded, no exception is raised, although it should have been.
+        // This could be avoided by using a fifth register to save both
+        //  arguments until the signaling bit can be tested, but that seems
+        //  like an excessive amount of ugly code for an ambiguous case.
+        movs    r0,     r3
+
+    LLSYM(__fcmp_sorted):
+        // If $r3 is NAN, the result is unordered.
+        movs    r3,     #255
+        lsls    r3,     #24
+        cmp     r0,     r3
+        bhi     LLSYM(__fcmp_unordered)
+
+        // Positive and negative zero must be considered equal.
+        // If the larger absolute value is +/-0, both must have been +/-0.
+        subs    r3,     r0,     #0
+        beq     LLSYM(__fcmp_zero)
+
+        // Test for regular equality.
+        subs    r3,     r1,     #0
+        beq     LLSYM(__fcmp_zero)
+
+        // Isolate the saved 'C', and invert if either argument was negative.
+        // Remembering that the original subtraction was $r1 - $r0,
+        //  the result will be 1 if 'C' was set (gt), or 0 for not 'C' (lt).
+        lsls    r3,     r2,     #31
+        add     r3,     ip
+        lsrs    r3,     #31
+
+        // HACK: Force the 'C' bit clear,
+        //  since bit[30] of $r3 may vary with the operands.
+        adds    r3,     #0
+
+    LLSYM(__fcmp_zero):
+        // After everything is combined, the temp result will be
+        //  2 (gt), 1 (eq), or 0 (lt).
+        adcs    r3,     r3
+
+        // Short-circuit return if the 3-way comparison flag is set.
+        // Otherwise, shifts the condition mask into bits[2:0].
+        lsrs    r2,     #2
+        bcs     LLSYM(__fcmp_return)
+
+        // If the bit corresponding to the comparison result is set in the
+        //  accepance mask, a '1' will fall out into the result.
+        movs    r0,     #1
+        lsrs    r2,     r3
+        ands    r0,     r2
+        RET
+
+    LLSYM(__fcmp_unordered):
+        // Set up the requested UNORDERED result.
+        // Remember the shift in the flags (above).
+        lsrs    r2,     #6
+
+  #if defined(TRAP_EXCEPTIONS) && TRAP_EXCEPTIONS
+        // TODO: ... The
+
+
+  #endif
+
+  #if defined(TRAP_NANS) && TRAP_NANS
+        // Always raise an exception if FCMP_RAISE_EXCEPTIONS was specified.
+        bcs     LLSYM(__fcmp_trap)
+
+        // If FCMP_NO_EXCEPTIONS was specified, no exceptions on quiet NANs.
+        // The comparison flags are moot, so $r1 can serve as scratch space.
+        lsrs    r1,     r0,     #24
+        bcs     LLSYM(__fcmp_return2)
+
+    LLSYM(__fcmp_trap):
+        // Restore the NAN (sans sign) for an argument to the exception.
+        // As an IRQ, the handler restores all registers, including $r3.
+        // NOTE: The service handler may not return.
+        lsrs    r0,     #1
+        movs    r3,     #(UNORDERED_COMPARISON)
+        svc     #(SVC_TRAP_NAN)
+  #endif
+
+     LLSYM(__fcmp_return2):
+        // HACK: Work around result register mapping.
+        // This could probably be eliminated by remapping the flags register.
+        movs    r3,     r2
+
+    LLSYM(__fcmp_return):
+        // Finish setting up the result.
+        // Constant subtraction allows a negative result while keeping the
+        //  $r2 flag control word within 8 bits, particularly for FCMP_UN*.
+        // This operation also happens to set the 'Z' and 'C' flags correctly
+        //  per the requirements of __aeabi_cfcmple() et al.
+        subs    r0,     r3,     #1
+        RET
+
+    CFI_END_FUNCTION
+FUNC_END internal_cmpsf2
+
 #ifdef L_arm_cmpsf2
+FUNC_END ltsf2
+FUNC_END lesf2
+FUNC_END cmpsf2
+#endif
+
+#endif /* L_arm_cmpsf2 || L_internal_cmpsf2 */
+
+
+#ifdef L_arm_eqsf2
+
+// int __eqsf2(float, float)
+// <https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html>
+// Returns the three-way comparison result of $r0 with $r1:
+//  * -1 if ($r0 < $r1)
+//  *  0 if ($r0 == $r1)
+//  * +1 if ($r0 > $r1), or either argument is NAN
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION eqsf2 .text.sorted.libgcc.fcmp.eqsf2
+FUNC_ALIAS nesf2 eqsf2
+    CFI_START_FUNCTION
+
+        // Assumption: The 'libgcc' functions should raise exceptions.
+        movs    r2,     #(FCMP_UN_POSITIVE + FCMP_NO_EXCEPTIONS + FCMP_3WAY)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END nesf2
+FUNC_END eqsf2
+
+#endif /* L_arm_eqsf2 */
+
+
+#ifdef L_arm_gesf2
+
+// int __gesf2(float, float)
+// <https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html>
+// Returns the three-way comparison result of $r0 with $r1:
+//  * -1 if ($r0 < $r1), or either argument is NAN
+//  *  0 if ($r0 == $r1)
+//  * +1 if ($r0 > $r1)
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION gesf2 .text.sorted.libgcc.fcmp.gesf2
+FUNC_ALIAS gtsf2 gesf2
+    CFI_START_FUNCTION
+
+        // Assumption: The 'libgcc' functions should raise exceptions.
+        movs    r2,     #(FCMP_UN_NEGATIVE + FCMP_RAISE_EXCEPTIONS + FCMP_3WAY)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END gtsf2
+FUNC_END gesf2
+
+#endif /* L_arm_gesf2 */
+
+
+#ifdef L_arm_fcmpeq
+
+// int __aeabi_fcmpeq(float, float)
+// Returns '1' in $r1 if ($r0 == $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmpeq .text.sorted.libgcc.fcmp.fcmpeq
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmpeq
+
+#endif /* L_arm_fcmpeq */
+
+
+#ifdef L_arm_fcmpne
+
+// int __aeabi_fcmpne(float, float) [non-standard]
+// Returns '1' in $r1 if ($r0 != $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmpne .text.sorted.libgcc.fcmp.fcmpne
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_NE)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmpne
+
+#endif /* L_arm_fcmpne */
+
+
+#ifdef L_arm_fcmplt
+
+// int __aeabi_fcmplt(float, float)
+// Returns '1' in $r1 if ($r0 < $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmplt .text.sorted.libgcc.fcmp.fcmplt
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_RAISE_EXCEPTIONS + FCMP_LT)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmplt
+
+#endif /* L_arm_fcmplt */
+
+
+#ifdef L_arm_fcmple
+
+// int __aeabi_fcmple(float, float)
+// Returns '1' in $r1 if ($r0 <= $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmple .text.sorted.libgcc.fcmp.fcmple
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_RAISE_EXCEPTIONS + FCMP_LE)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmple
+
+#endif /* L_arm_fcmple */
+
+
+#ifdef L_arm_fcmpge
+
+// int __aeabi_fcmpge(float, float)
+// Returns '1' in $r1 if ($r0 >= $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmpge .text.sorted.libgcc.fcmp.fcmpge
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_RAISE_EXCEPTIONS + FCMP_GE)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmpge
+
+#endif /* L_arm_fcmpge */
+
+
+#ifdef L_arm_fcmpgt
+
+// int __aeabi_fcmpgt(float, float)
+// Returns '1' in $r1 if ($r0 > $r1) (ordered).
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmpgt .text.sorted.libgcc.fcmp.fcmpgt
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_RAISE_EXCEPTIONS + FCMP_GT)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END aeabi_fcmpgt
+
+#endif /* L_arm_cmpgt */
+
+
+#ifdef L_arm_unordsf2
+
+// int __aeabi_fcmpun(float, float)
+// Returns '1' in $r1 if $r0 and $r1 are unordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION aeabi_fcmpun .text.sorted.libgcc.fcmp.fcmpun
+FUNC_ALIAS unordsf2 aeabi_fcmpun
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_POSITIVE + FCMP_NO_EXCEPTIONS)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+FUNC_END unordsf2
+FUNC_END aeabi_fcmpun
+
+#endif /* L_arm_unordsf2 */
+
+
+#if defined(L_arm_cfcmple) || defined(L_arm_cfrcmple) || \
+   (defined(L_arm_cfcmpeq) && defined(TRAP_NANS) && TRAP_NANS)
+
+#if defined(L_arm_cfcmple)
+  #define CFCMPLE_NAME aeabi_cfcmple
+  #define CFCMPLE_SECTION .text.sorted.libgcc.fcmp.cfcmple
+#elif defined(L_arm_cfrcmple)
+  #define CFCMPLE_NAME aeabi_cfrcmple
+  #define CFCMPLE_SECTION .text.sorted.libgcc.fcmp.cfrcmple
+#else
+  #define CFCMPLE_NAME aeabi_cfcmpeq
+  #define CFCMPLE_SECTION .text.sorted.libgcc.fcmp.cfcmpeq
+#endif
+
+// void __aeabi_cfcmple(float, float)
+// void __aeabi_cfrcmple(float, float)
+// void __aeabi_cfcmpeq(float, float)
+// __aeabi_cfrcmple() first reverses the ordr of the input arguments.
+// __aeabi_cfcmpeq() is an alias of __aeabi_cfcmple() if the library
+//  does not support signaling NAN exceptions.
+// Three-way compare of $r0 ? $r1, with result in the status flags:
+//  * 'Z' is set only when the operands are ordered and equal.
+//  * 'C' is clear only when the operands are ordered and $r0 < $r1.
+// Preserves all core registers except $ip, $lr, and the CPSR.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION CFCMPLE_NAME CFCMPLE_SECTION
+
+  // __aeabi_cfcmpeq() is defined separately when TRAP_NANS is enabled.
+  #if defined(L_arm_cfcmple) && !(defined(TRAP_NANS) && TRAP_NANS)
+    FUNC_ALIAS aeabi_cfcmpeq aeabi_cfcmple
+  #endif
+
+    CFI_START_FUNCTION
+
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        push    { r0 - r3, rT, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 24
+                .cfi_rel_offset r0, 0
+                .cfi_rel_offset r1, 4
+                .cfi_rel_offset r2, 8
+                .cfi_rel_offset r3, 12
+                .cfi_rel_offset rT, 16
+                .cfi_rel_offset lr, 20
+      #else
+        push    { r0 - r3, lr }
+                .cfi_remember_state
+                .cfi_adjust_cfa_offset 20
+                .cfi_rel_offset r0, 0
+                .cfi_rel_offset r1, 4
+                .cfi_rel_offset r2, 8
+                .cfi_rel_offset r3, 12
+                .cfi_rel_offset lr, 16
+      #endif
+
+  #ifdef L_arm_cfcmple
+        // Even though the result in $r0 will be discarded, the 3-way
+        //  subtraction of '-1' that generates this result happens to
+        //  set 'C' and 'Z' perfectly.  Unordered results group with '>'.
+        // This happens to be the same control word as __cmpsf2(), meaning
+        //  that __cmpsf2() is a potential direct branch target.  However,
+        //  the choice to set a redundant control word and branch to
+        //  __internal_cmpsf2() makes this compiled object more robust
+        //  against linking with 'foreign' __cmpsf2() implementations.
+        movs    r2,     #(FCMP_UN_POSITIVE + FCMP_RAISE_EXCEPTIONS + FCMP_3WAY)
+  #elif defined(L_arm_cfrcmple)
+        // Instead of reversing the order of the operands, it's slightly
+        //  faster to inverted the result.  But, for that to fully work,
+        //  the sense of NAN must be pre-inverted.
+        movs    r2,     #(FCMP_UN_NEGATIVE + FCMP_NO_EXCEPTIONS + FCMP_3WAY)
+  #else /* L_arm_cfcmpeq */
+        // Same as __aeabi_cfcmple(), except no exceptions on quiet NAN.
+        movs    r2,     #(FCMP_UN_POSITIVE + FCMP_NO_EXCEPTIONS + FCMP_3WAY)
+  #endif
+
+        bl      SYM(__internal_cmpsf2)
+
+  #ifdef L_arm_cfrcmple
+        // Instead of reversing the order of the operands, it's slightly
+        //  faster to inverted the result.  Since __internal_cmpsf2() sets
+        //  its flags by subtracing '1' from $r3, the reverse flags may be
+        //  simply obtained subtracting $r3 from 1.
+        movs    r1,    #1
+        subs    r1,    r3
+  #endif /* L_arm_cfrcmple */
+
+        // Clean up all working registers.
+      #if defined(DOUBLE_ALIGN_STACK) && DOUBLE_ALIGN_STACK
+        pop     { r0 - r3, rT, pc }
+                .cfi_restore_state
+      #else
+        pop     { r0 - r3, pc }
+                .cfi_restore_state
+      #endif
+
+    CFI_END_FUNCTION
+
+  #if defined(L_arm_cfcmple) && !(defined(TRAP_NANS) && TRAP_NANS)
+    FUNC_END aeabi_cfcmpeq
+  #endif
+
+FUNC_END CFCMPLE_NAME
+
+#endif /* L_arm_cfcmple || L_arm_cfrcmple || L_arm_cfcmpeq */
+
+
+// C99 libm functions
+#ifndef __GNUC__
+
+// int isgreaterf(float, float)
+// Returns '1' in $r0 if ($r0 > $r1) and both $r0 and $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION isgreaterf .text.sorted.libgcc.fcmp.isgtf
+MATH_ALIAS isgreaterf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END isgreaterf
+FUNC_END isgreaterf
+
+
+// int isgreaterequalf(float, float)
+// Returns '1' in $r0 if ($r0 >= $r1) and both $r0 and $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION isgreaterequalf .text.sorted.libgcc.fcmp.isgef
+MATH_ALIAS isgreaterequalf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END isgreaterequalf
+FUNC_END isgreaterequalf
+
+
+// int islessf(float, float)
+// Returns '1' in $r0 if ($r0 < $r1) and both $r0 and $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION islessf .text.sorted.libgcc.fcmp.isltf
+MATH_ALIAS islessf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END islessf
+FUNC_END islessf
+
+
+// int islessequalf(float, float)
+// Returns '1' in $r0 if ($r0 <= $r1) and both $r0 and $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION islessequalf .text.sorted.libgcc.fcmp.islef
+MATH_ALIAS islessequalf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END islessequalf
+FUNC_END islessequalf
+
+
+// int islessgreaterf(float, float)
+// Returns '1' in $r0 if ($r0 != $r1) and both $r0 and $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION islessgreaterf .text.sorted.libgcc.fcmp.isnef
+MATH_ALIAS islessgreaterf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END islessgreaterf
+FUNC_END islessgreaterf
+
+
+// int isunorderedf(float, float)
+// Returns '1' in $r0 if either $r0 or $r1 are ordered.
+// Uses $r2, $r3, and $ip as scratch space.
+// Same parent section as __cmpsf2() to keep tail call branch within range.
+FUNC_START_SECTION isunorderedf .text.sorted.libgcc.fcmp.isunf
+MATH_ALIAS isunorderedf
+    CFI_START_FUNCTION
+
+        movs    r2,     #(FCMP_UN_ZERO + FCMP_NO_EXCEPTIONS + FCMP_GT + FCMP_EQ)
+        b       SYM(__internal_cmpsf2)
+
+    CFI_END_FUNCTION
+MATH_END isunorderedf
+FUNC_END isunorderedf
 
-FUNC_START aeabi_cfrcmple
-
-	mov	ip, r0
-	movs	r0, r1
-	mov	r1, ip
-	b	6f
-
-FUNC_START aeabi_cfcmpeq
-FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
-
-	@ The status-returning routines are required to preserve all
-	@ registers except ip, lr, and cpsr.
-6:	push	{r0, r1, r2, r3, r4, lr}
-	bl	__lesf2
-	@ Set the Z flag correctly, and the C flag unconditionally.
-	cmp	r0, #0
-	@ Clear the C flag if the return value was -1, indicating
-	@ that the first operand was smaller than the second.
-	bmi	1f
-	movs	r1, #0
-	cmn	r0, r1
-1:
-	pop	{r0, r1, r2, r3, r4, pc}
-
-	FUNC_END aeabi_cfcmple
-	FUNC_END aeabi_cfcmpeq
-	FUNC_END aeabi_cfrcmple
-
-FUNC_START	aeabi_fcmpeq
-
-	push	{r4, lr}
-	bl	__eqsf2
-	negs	r0, r0
-	adds	r0, r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_fcmpeq
-
-.macro COMPARISON cond, helper, mode=sf2
-FUNC_START	aeabi_fcmp\cond
-
-	push	{r4, lr}
-	bl	__\helper\mode
-	cmp	r0, #0
-	b\cond	1f
-	movs	r0, #0
-	pop	{r4, pc}
-1:
-	movs	r0, #1
-	pop	{r4, pc}
-
-	FUNC_END aeabi_fcmp\cond
-.endm
-
-COMPARISON lt, le
-COMPARISON le, le
-COMPARISON gt, ge
-COMPARISON ge, ge
-
-#endif /* L_arm_cmpsf2 */
+#endif /* !__GNUC__ */
 
diff --git a/libgcc/config/arm/eabi/fplib.h b/libgcc/config/arm/eabi/fplib.h
new file mode 100644
index 00000000000..91cc1dde0d7
--- /dev/null
+++ b/libgcc/config/arm/eabi/fplib.h
@@ -0,0 +1,83 @@ 
+/* fplib.h: Thumb-1 optimized floating point library configuration
+
+   Copyright (C) 2018-2022 Free Software Foundation, Inc.
+   Contributed by Daniel Engel, Senva Inc (gnu@danielengel.com)
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef __FPLIB_H
+#define __FPLIB_H
+
+/* Enable exception interrupt handler.
+   Exception implementation is opportunistic, and not fully tested.  */
+#define TRAP_EXCEPTIONS (0)
+#define EXCEPTION_CODES (0)
+
+/* Perform extra checks to avoid modifying the sign bit of NANs */
+#define STRICT_NANS (0)
+
+/* Trap signaling NANs regardless of context. */
+#define TRAP_NANS (0)
+
+/* TODO: Define service numbers according to the handler requirements */
+#define SVC_TRAP_NAN (0)
+#define SVC_FP_EXCEPTION (0)
+#define SVC_DIVISION_BY_ZERO (0)
+
+/* Push extra registers when required for 64-bit stack alignment */
+#define DOUBLE_ALIGN_STACK (1)
+
+/* Manipulate *div0() parameters to meet the ARM runtime ABI specification. */
+#define PEDANTIC_DIV0 (1)
+
+/* Define various exception codes.  These don't map to anything in particular */
+#define SUBTRACTED_INFINITY (20)
+#define INFINITY_TIMES_ZERO (21)
+#define DIVISION_0_BY_0 (22)
+#define DIVISION_INF_BY_INF (23)
+#define UNORDERED_COMPARISON (24)
+#define CAST_OVERFLOW (25)
+#define CAST_INEXACT (26)
+#define CAST_UNDEFINED (27)
+
+/* Exception control for quiet NANs.
+   If TRAP_NAN support is enabled, signaling NANs always raise exceptions. */
+#define FCMP_RAISE_EXCEPTIONS   16
+#define FCMP_NO_EXCEPTIONS      0
+
+/* The bit indexes in these assignments are significant.  See implementation.
+   They are shared publicly for eventual use by newlib.  */
+#define FCMP_3WAY           (1)
+#define FCMP_LT             (2)
+#define FCMP_EQ             (4)
+#define FCMP_GT             (8)
+
+#define FCMP_GE             (FCMP_EQ | FCMP_GT)
+#define FCMP_LE             (FCMP_LT | FCMP_EQ)
+#define FCMP_NE             (FCMP_LT | FCMP_GT)
+
+/* These flags affect the result of unordered comparisons.  See implementation.  */
+#define FCMP_UN_THREE       (128)
+#define FCMP_UN_POSITIVE    (64)
+#define FCMP_UN_ZERO        (32)
+#define FCMP_UN_NEGATIVE    (0)
+
+#endif /* __FPLIB_H */
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index 4d460a77332..188d9d7ff47 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -2010,6 +2010,7 @@  LSYM(Lchange_\register):
 #include "bpabi.S"
 #else /* NOT_ISA_TARGET_32BIT */
 #include "bpabi-v6m.S"
+#include "eabi/fplib.h"
 #include "eabi/fcmp.S"
 #endif /* NOT_ISA_TARGET_32BIT */
 #include "eabi/lcmp.S"
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf
index eb1acd8d5a2..e69579e16dd 100644
--- a/libgcc/config/arm/t-elf
+++ b/libgcc/config/arm/t-elf
@@ -30,6 +30,7 @@  LIB1ASMFUNCS += \
 ifeq (__ARM_ARCH_ISA_THUMB 1,$(ARM_ISA)$(THUMB1_ISA))
 # Group 0B: WEAK overridable function objects built for v6m only.
 LIB1ASMFUNCS += \
+	_internal_cmpsf2 \
 	_muldi3 \
 	
 endif
@@ -80,6 +81,23 @@  LIB1ASMFUNCS += \
 	_arm_negsf2 \
 	_arm_unordsf2 \
 
+ifeq (__ARM_ARCH_ISA_THUMB 1,$(ARM_ISA)$(THUMB1_ISA))
+# Group 2B: Single precision function objects built for v6m only.
+LIB1ASMFUNCS += \
+        _arm_cfcmpeq \
+        _arm_cfcmple \
+        _arm_cfrcmple \
+        _arm_fcmpeq \
+        _arm_fcmpge \
+        _arm_fcmpgt \
+        _arm_fcmple \
+        _arm_fcmplt \
+        _arm_fcmpne \
+        _arm_eqsf2 \
+        _arm_gesf2 \
+
+endif
+
 
 # Group 3: Double precision floating point function objects.
 LIB1ASMFUNCS += \