diff --git a/ADVISORIES b/ADVISORIES
new file mode 100644
index 0000000000..d4e33f2df3
--- /dev/null
+++ b/ADVISORIES
@@ -0,0 +1,2 @@
+For the GNU C Library Security Advisories, see the git master branch:
+https://sourceware.org/git/?p=glibc.git;a=tree;f=advisories;hb=HEAD
diff --git a/Makefile b/Makefile
index 7052b46df8..2e351c0321 100644
--- a/Makefile
+++ b/Makefile
@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
 	$(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \
 	$(evaluate-test)
 
+# Link libc.a as a whole to verify that it does not contain multiple
+# definitions of any symbols.
+tests-special += $(objpfx)link-static-libc.out
+$(objpfx)link-static-libc.out:
+	$(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
+	$(evaluate-test)
+
 # Print test summary for tests in $1 .sum file;
 # $2 is optional test identifier.
 # Fail if there are unexpected failures in the test results.
diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001
deleted file mode 100644
index 3d19c91b6a..0000000000
--- a/advisories/GLIBC-SA-2023-0001
+++ /dev/null
@@ -1,14 +0,0 @@
-printf: incorrect output for integers with thousands separator and width field
-
-When the printf family of functions is called with a format specifier
-that uses an <apostrophe> (enable grouping) and a minimum width
-specifier, the resulting output could be larger than reasonably expected
-by a caller that computed a tight bound on the buffer size.  The
-resulting larger than expected output could result in a buffer overflow
-in the printf family of functions.
-
-CVE-Id: CVE-2023-25139
-Public-Date: 2023-02-02
-Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37)
-Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38)
-Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4)
diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002
deleted file mode 100644
index 5122669a64..0000000000
--- a/advisories/GLIBC-SA-2023-0002
+++ /dev/null
@@ -1,15 +0,0 @@
-getaddrinfo: Stack read overflow in no-aaaa mode
-
-If the system is configured in no-aaaa mode via /etc/resolv.conf,
-getaddrinfo is called for the AF_UNSPEC address family, and a DNS
-response is received over TCP that is larger than 2048 bytes,
-getaddrinfo may potentially disclose stack contents via the returned
-address data, or crash.
-
-CVE-Id: CVE-2023-4527
-Public-Date: 2023-09-12
-Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36)
-Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39)
-Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113)
-Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38)
-Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19)
diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003
deleted file mode 100644
index d3aef80348..0000000000
--- a/advisories/GLIBC-SA-2023-0003
+++ /dev/null
@@ -1,15 +0,0 @@
-getaddrinfo: Potential use-after-free
-
-When an NSS plugin only implements the _gethostbyname2_r and
-_getcanonname_r callbacks, getaddrinfo could use memory that was freed
-during buffer resizing, potentially causing a crash or read or write to
-arbitrary memory.
-
-CVE-Id: CVE-2023-4806
-Public-Date: 2023-09-12
-Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39)
-Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420)
-Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270)
-Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115)
-Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39)
-Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20)
diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004
deleted file mode 100644
index 5286a7aa54..0000000000
--- a/advisories/GLIBC-SA-2023-0004
+++ /dev/null
@@ -1,16 +0,0 @@
-tunables: local privilege escalation through buffer overflow
-
-If a tunable of the form NAME=NAME=VAL is passed in the environment of a
-setuid program and NAME is valid, it may result in a buffer overflow,
-which could be exploited to achieve escalated privileges.  This flaw was
-introduced in glibc 2.34.
-
-CVE-Id: CVE-2023-4911
-Public-Date: 2023-10-03
-Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34)
-Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39)
-Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423)
-Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274)
-Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118)
-Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45)
-Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27)
diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005
deleted file mode 100644
index cc4eb90b82..0000000000
--- a/advisories/GLIBC-SA-2023-0005
+++ /dev/null
@@ -1,18 +0,0 @@
-getaddrinfo: DoS due to memory leak
-
-The fix for CVE-2023-4806 introduced a memory leak when an application
-calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED
-flags set.
-
-CVE-Id: CVE-2023-5156
-Public-Date: 2023-09-25
-Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420)
-Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270)
-Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115)
-Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39)
-Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20)
-Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421)
-Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272)
-Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116)
-Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42)
-Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24)
diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001
deleted file mode 100644
index 28931c75ae..0000000000
--- a/advisories/GLIBC-SA-2024-0001
+++ /dev/null
@@ -1,15 +0,0 @@
-syslog: Heap buffer overflow in __vsyslog_internal
-
-__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER
-containing a long program name failed to update the required buffer
-size, leading to the allocation and overflow of a too-small buffer on
-the heap.
-
-CVE-Id: CVE-2023-6246
-Public-Date: 2024-01-30
-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
-Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39)
-Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42)
-Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57)
-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
-Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126)
diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002
deleted file mode 100644
index 940bfcf2fc..0000000000
--- a/advisories/GLIBC-SA-2024-0002
+++ /dev/null
@@ -1,15 +0,0 @@
-syslog: Heap buffer overflow in __vsyslog_internal
-
-__vsyslog_internal used the return value of snprintf/vsnprintf to
-calculate buffer sizes for memory allocation.  If these functions (for
-any reason) failed and returned -1, the resulting buffer would be too
-small to hold output.
-
-CVE-Id: CVE-2023-6779
-Public-Date: 2024-01-30
-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
-Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39)
-Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43)
-Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58)
-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
-Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127)
diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003
deleted file mode 100644
index b43a5150ab..0000000000
--- a/advisories/GLIBC-SA-2024-0003
+++ /dev/null
@@ -1,13 +0,0 @@
-syslog: Integer overflow in __vsyslog_internal
-
-__vsyslog_internal calculated a buffer size by adding two integers, but
-did not first check if the addition would overflow.
-
-CVE-Id: CVE-2023-6780
-Public-Date: 2024-01-30
-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37)
-Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39)
-Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44)
-Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59)
-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16)
-Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128)
diff --git a/advisories/README b/advisories/README
deleted file mode 100644
index 94e68b1350..0000000000
--- a/advisories/README
+++ /dev/null
@@ -1,73 +0,0 @@
-GNU C Library Security Advisory Format
-======================================
-
-Security advisories in this directory follow a simple git commit log
-format, with a heading and free-format description augmented with tags
-to allow parsing key information.  References to code changes are
-specific to the glibc repository and follow a specific format:
-
-  Tag-name: <commit-ref> (release-version)
-
-The <commit-ref> indicates a specific commit in the repository.  The
-release-version indicates the publicly consumable release in which this
-commit is known to exist.  The release-version is derived from the
-git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and
-is of the form 2.34-NNN.  If the -NNN suffix is absent, it means that
-the change is in that release tarball, otherwise the change is on the
-release/2.YY/master branch and not in any released tarball.
-
-The following tags are currently being used:
-
-CVE-Id:
-This is the CVE-Id assigned under the CVE Program
-(https://www.cve.org/).
-
-Public-Date:
-The date this issue became publicly known.
-
-Vulnerable-Commit:
-The commit that introduced this vulnerability.  There could be multiple
-entries, one for each release branch in the glibc repository; the
-release-version portion of this tag should tell you which branch this is
-on.
-
-Fix-Commit:
-The commit that fixed this vulnerability.  There could be multiple
-entries for each release branch in the glibc repository, indicating that
-all of those commits contributed to fixing that issue in each of those
-branches.
-
-Adding an Advisory
-------------------
-
-An advisory for a CVE needs to be added on the master branch in two steps:
-
-1. Add the text of the advisory without any Fix-Commit tags along with
-   the fix for the CVE.  Add the Vulnerable-Commit tag, if applicable.
-   The advisories directory does not exist in release branches, so keep
-   the advisory text commit distinct from the code changes, to ease
-   backports.  Ask for the GLIBC-SA advisory number from the security
-   team.
-
-2. Finish all backports on release branches and then back on the msater
-   branch, add all commit refs to the advisory using the Fix-Commit
-   tags.  Don't bother adding the release-version subscript since the
-   next step will overwrite it.
-
-3. Run the process-advisories.sh script in the scripts directory on the
-   advisory:
-
-     scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN
-
-   (replace YYYY-NNNN with the actual advisory number).
-
-4. Verify the updated advisory and push the result.
-
-Getting a NEWS snippet from advisories
---------------------------------------
-
-Run:
-
-  scripts/process-advisories.sh news
-
-and copy the content into the NEWS file.
diff --git a/bits/wordsize.h b/bits/wordsize.h
index 14edae3a11..53013a9275 100644
--- a/bits/wordsize.h
+++ b/bits/wordsize.h
@@ -21,7 +21,9 @@
 #define __WORDSIZE32_PTRDIFF_LONG
 
 /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in
-   struct lastlog and struct utmp{,x} on 64-bit ports.  This may be done in
+   struct lastlog and struct utmp{,x}.  This may be done in
    order to make 64-bit ports compatible with 32-bit ports.  Set to 0 for
-   64-bit ports where the time types are 64-bits or for any 32-bit ports.  */
+   64-bit ports where the time types are 64-bits and new 32-bit ports
+   where time_t is 64 bits, and there is no companion architecture with
+   32-bit time_t.  */
 #define __WORDSIZE_TIME64_COMPAT32
diff --git a/config.h.in b/config.h.in
index 44a34072a4..1e647de585 100644
--- a/config.h.in
+++ b/config.h.in
@@ -141,6 +141,9 @@
 /* LOONGARCH floating-point ABI for ld.so.  */
 #undef LOONGARCH_ABI_FRLEN
 
+/* Define whether ARM used hard-float and support VFPvX-D32.  */
+#undef HAVE_ARM_PCS_VFP_D32
+
 /* Linux specific: minimum supported kernel version.  */
 #undef	__LINUX_KERNEL_VERSION
 
@@ -283,6 +286,9 @@
 /* Define if x86 ISA level should be included in shared libraries.  */
 #undef INCLUDE_X86_ISA_LEVEL
 
+/* The x86 ISA level.  1 for baseline.  Undefined on non-x86.  */
+#undef MINIMUM_X86_ISA_LEVEL
+
 /* Define if -msahf is enabled by default on x86.  */
 #undef HAVE_X86_LAHF_SAHF
 
diff --git a/configure b/configure
index 59ff1e415d..432e40a592 100755
--- a/configure
+++ b/configure
@@ -653,7 +653,7 @@ LIBGD
 libc_cv_cc_loop_to_function
 libc_cv_cc_submachine
 libc_cv_cc_nofma
-libc_cv_mtls_dialect_gnu2
+libc_cv_mtls_descriptor
 libc_cv_has_glob_dat
 libc_cv_fpie
 libc_cv_z_execstack
@@ -4760,6 +4760,9 @@ libc_config_ok=no
 # whether to use such directories.
 with_fp_cond=1
 
+# A preconfigure script may define another name to TLS descriptor variant
+mtls_descriptor=gnu2
+
 if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
 then
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
@@ -7006,9 +7009,9 @@ fi
 printf "%s\n" "$libc_cv_has_glob_dat" >&6; }
 
 
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5
-printf %s "checking for -mtls-dialect=gnu2... " >&6; }
-if test ${libc_cv_mtls_dialect_gnu2+y}
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5
+printf %s "checking for tls descriptor support... " >&6; }
+if test ${libc_cv_mtls_descriptor+y}
 then :
   printf %s "(cached) " >&6
 else $as_nop
@@ -7019,25 +7022,25 @@ void foo (void)
   i = 10;
 }
 EOF
-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
-		   conftest.c -o conftest 1>&5'
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
+		   -shared conftest.c -o conftest 1>&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
   printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; }
 then
-  libc_cv_mtls_dialect_gnu2=yes
+  libc_cv_mtls_descriptor=$mtls_descriptor
 else
-  libc_cv_mtls_dialect_gnu2=no
+  libc_cv_mtls_descriptor=no
 fi
 rm -f conftest*
 fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5
-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5
+printf "%s\n" "$libc_cv_mtls_descriptor" >&6; }
 
 config_vars="$config_vars
-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2"
+have-mtls-descriptor = $libc_cv_mtls_descriptor"
 
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5
 printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; }
diff --git a/configure.ac b/configure.ac
index 65799e5685..bdc385d03c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -442,6 +442,9 @@ libc_config_ok=no
 # whether to use such directories.
 with_fp_cond=1
 
+# A preconfigure script may define another name to TLS descriptor variant
+mtls_descriptor=gnu2
+
 dnl Let sysdeps/*/preconfigure act here.
 LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
 
@@ -1287,7 +1290,7 @@ fi
 rm -f conftest*])
 AC_SUBST(libc_cv_has_glob_dat)
 
-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2,
+AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor,
 [dnl
 cat > conftest.c <<EOF
 __thread int i;
@@ -1296,16 +1299,16 @@ void foo (void)
   i = 10;
 }
 EOF
-if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
-		   conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
+		   -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
 then
-  libc_cv_mtls_dialect_gnu2=yes
+  libc_cv_mtls_descriptor=$mtls_descriptor
 else
-  libc_cv_mtls_dialect_gnu2=no
+  libc_cv_mtls_descriptor=no
 fi
 rm -f conftest*])
-AC_SUBST(libc_cv_mtls_dialect_gnu2)
-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2])
+AC_SUBST(libc_cv_mtls_descriptor)
+LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor])
 
 dnl clang emits an warning for a double alias redirection, to warn the
 dnl original symbol is sed even when weak definition overrides it.
diff --git a/elf/Makefile b/elf/Makefile
index 5d78b659ce..a50a988e73 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -170,6 +170,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os))
 CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines))
 
 # Add the requested compiler flags to the early startup code.
+CFLAGS-dl-misc.os += $(rtld-early-cflags)
 CFLAGS-dl-printf.os += $(rtld-early-cflags)
 CFLAGS-dl-setup_hash.os += $(rtld-early-cflags)
 CFLAGS-dl-sysdep.os += $(rtld-early-cflags)
@@ -424,6 +425,7 @@ tests += \
   tst-glibc-hwcaps-prepend \
   tst-global1 \
   tst-global2 \
+  tst-gnu2-tls2 \
   tst-initfinilazyfail \
   tst-initorder \
   tst-initorder2 \
@@ -846,6 +848,9 @@ modules-names += \
   tst-filterobj-flt \
   tst-finilazyfailmod \
   tst-globalmod2 \
+  tst-gnu2-tls2mod0 \
+  tst-gnu2-tls2mod1 \
+  tst-gnu2-tls2mod2 \
   tst-initlazyfailmod \
   tst-initorder2a \
   tst-initorder2b \
@@ -995,13 +1000,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\
 # For +depfiles in Makerules.
 extra-test-objs += tst-auditmod17.os
 
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
 tests += tst-gnu2-tls1
 modules-names += tst-gnu2-tls1mod
 $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
 tst-gnu2-tls1mod.so-no-z-defs = yes
-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
-endif # $(have-mtls-dialect-gnu2)
+CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor)
+endif # $(have-mtls-descriptor)
 
 ifeq (yes,$(have-protected-data))
 modules-names += tst-protected1moda tst-protected1modb
@@ -2968,11 +2973,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \
 $(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
 			    $(objpfx)tst-audit-tlsdesc-mod2.so \
 			    $(shared-thread-library)
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
 # The test is valid for all TLS types, but we want to exercise GNU2
 # TLS if possible.
-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
+CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
 endif
 $(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
 $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
@@ -3044,8 +3049,18 @@ $(objpfx)tst-tlsgap.out: \
   $(objpfx)tst-tlsgap-mod0.so \
   $(objpfx)tst-tlsgap-mod1.so \
   $(objpfx)tst-tlsgap-mod2.so
-ifeq (yes,$(have-mtls-dialect-gnu2))
-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
+
+$(objpfx)tst-gnu2-tls2: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2.out: \
+  $(objpfx)tst-gnu2-tls2mod0.so \
+  $(objpfx)tst-gnu2-tls2mod1.so \
+  $(objpfx)tst-gnu2-tls2mod2.so
+
+ifneq (no,$(have-mtls-descriptor))
+CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
 endif
diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
index 7345ebc4e5..aaf67b87e8 100644
--- a/elf/dl-diagnostics.c
+++ b/elf/dl-diagnostics.c
@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ)
   _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
   _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
   _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
+  _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3));
+  _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4));
   _dl_diagnostics_print_labeled_string
     ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
   _dl_diagnostics_print_labeled_value
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 2f502c8b0d..451932dd03 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr;
 size_t _dl_phnum;
 uint64_t _dl_hwcap;
 uint64_t _dl_hwcap2;
+uint64_t _dl_hwcap3;
+uint64_t _dl_hwcap4;
 
 enum dso_sort_algorithm _dl_dso_sort_algo;
 
diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
index 03e1a68675..614ac9c047 100644
--- a/elf/dl-tunables.c
+++ b/elf/dl-tunables.c
@@ -32,6 +32,7 @@
 #include <ldsodefs.h>
 #include <array_length.h>
 #include <dl-minimal-malloc.h>
+#include <dl-symbol-redir-ifunc.h>
 
 #define TUNABLES_INTERNAL 1
 #include "dl-tunables.h"
@@ -223,6 +224,7 @@ parse_tunables_string (const char *valstring, struct tunable_toset_t *tunables)
 	    {
 	      tunables[ntunables++] =
 		(struct tunable_toset_t) { cur, value, p - value };
+
 	      break;
 	    }
 	}
@@ -234,23 +236,27 @@ parse_tunables_string (const char *valstring, struct tunable_toset_t *tunables)
 static void
 parse_tunables (const char *valstring)
 {
-  struct tunable_toset_t tunables[tunables_list_size];
-  int ntunables = parse_tunables_string (valstring, tunables);
-  if (ntunables == -1)
+  struct tunable_toset_t tunables[tunables_list_size] = { 0 };
+  if (parse_tunables_string (valstring, tunables) == -1)
     {
       _dl_error_printf (
         "WARNING: ld.so: invalid GLIBC_TUNABLES `%s': ignored.\n", valstring);
       return;
     }
 
-  for (int i = 0; i < ntunables; i++)
-    if (!tunable_initialize (tunables[i].t, tunables[i].value,
-			     tunables[i].len))
-      _dl_error_printf ("WARNING: ld.so: invalid GLIBC_TUNABLES value `%.*s' "
-		       "for option `%s': ignored.\n",
-		       (int) tunables[i].len,
-		       tunables[i].value,
-		       tunables[i].t->name);
+  for (int i = 0; i < tunables_list_size; i++)
+    {
+      if (tunables[i].t == NULL)
+	continue;
+
+      if (!tunable_initialize (tunables[i].t, tunables[i].value,
+			       tunables[i].len))
+	_dl_error_printf ("WARNING: ld.so: invalid GLIBC_TUNABLES value `%.*s' "
+			  "for option `%s': ignored.\n",
+			  (int) tunables[i].len,
+			  tunables[i].value,
+			  tunables[i].t->name);
+    }
 }
 
 /* Initialize the tunables list from the environment.  For now we only use the
diff --git a/elf/elf.h b/elf/elf.h
index 455731663c..1c394c64cd 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -1234,6 +1234,10 @@ typedef struct
 #define AT_RSEQ_FEATURE_SIZE	27	/* rseq supported feature size.  */
 #define AT_RSEQ_ALIGN	28		/* rseq allocation alignment.  */
 
+/* More machine-dependent hints about processor capabilities.  */
+#define AT_HWCAP3	29		/* extension of AT_HWCAP.  */
+#define AT_HWCAP4	30		/* extension of AT_HWCAP.  */
+
 #define AT_EXECFN	31		/* Filename of executable.  */
 
 /* Pointer to the global system page used for system calls and other
diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c
new file mode 100644
index 0000000000..7ac04d7f33
--- /dev/null
+++ b/elf/tst-gnu2-tls2.c
@@ -0,0 +1,122 @@
+/* Test TLSDESC relocation.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+#include <support/check.h>
+#include <support/test-driver.h>
+#include "tst-gnu2-tls2.h"
+
+#ifndef IS_SUPPORTED
+# define IS_SUPPORTED() true
+#endif
+
+/* An architecture can define it to clobber caller-saved registers in
+   malloc below to verify that the implicit TLSDESC call won't change
+   caller-saved registers.  */
+#ifndef PREPARE_MALLOC
+# define PREPARE_MALLOC()
+#endif
+
+extern void * __libc_malloc (size_t);
+
+size_t malloc_counter = 0;
+
+void *
+malloc (size_t n)
+{
+  PREPARE_MALLOC ();
+  malloc_counter++;
+  return __libc_malloc (n);
+}
+
+static void *mod[3];
+#ifndef MOD
+# define MOD(i) "tst-gnu2-tls2mod" #i ".so"
+#endif
+static const char *modname[3] = { MOD(0), MOD(1), MOD(2) };
+#undef MOD
+
+static void
+open_mod (int i)
+{
+  mod[i] = xdlopen (modname[i], RTLD_LAZY);
+  printf ("open %s\n", modname[i]);
+}
+
+static void
+close_mod (int i)
+{
+  xdlclose (mod[i]);
+  mod[i] = NULL;
+  printf ("close %s\n", modname[i]);
+}
+
+static void
+access_mod (int i, const char *sym)
+{
+  struct tls var = { -1, -1, -1, -1 };
+  struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym);
+  /* Check that our malloc is called.  */
+  malloc_counter = 0;
+  struct tls *p = f (&var);
+  TEST_VERIFY (malloc_counter != 0);
+  printf ("access %s: %s() = %p\n", modname[i], sym, p);
+  TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0);
+  ++(p->a);
+}
+
+static void *
+start (void *arg)
+{
+  /* The DTV generation is at the last dlopen of mod0 and the
+     entry for mod1 is NULL.  */
+
+  open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS.  */
+
+  /* Force the slow path in GNU2 TLS descriptor call.  */
+  access_mod (1, "apply_tls");
+
+  return arg;
+}
+
+static int
+do_test (void)
+{
+  if (!IS_SUPPORTED ())
+    return EXIT_UNSUPPORTED;
+
+  open_mod (0);
+  open_mod (1);
+  open_mod (2);
+  close_mod (0);
+  close_mod (1); /* Create modid gap at mod1.  */
+  open_mod (0); /* Reuse modid of mod0, bump generation count.  */
+
+  /* Create a thread where DTV of mod1 is NULL.  */
+  pthread_t t = xpthread_create (NULL, start, NULL);
+  xpthread_join (t);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
new file mode 100644
index 0000000000..1ade8151e2
--- /dev/null
+++ b/elf/tst-gnu2-tls2.h
@@ -0,0 +1,40 @@
+/* Test TLSDESC relocation.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+struct tls
+{
+  int64_t a, b, c, d;
+};
+
+extern struct tls *apply_tls (struct tls *);
+
+/* An architecture can define them to verify that clobber caller-saved
+   registers aren't changed by the implicit TLSDESC call.  */
+#ifndef INIT_TLSDESC_CALL
+# define INIT_TLSDESC_CALL()
+#endif
+
+#ifndef BEFORE_TLSDESC_CALL
+# define BEFORE_TLSDESC_CALL()
+#endif
+
+#ifndef AFTER_TLSDESC_CALL
+# define AFTER_TLSDESC_CALL()
+#endif
diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
new file mode 100644
index 0000000000..3fe3c14277
--- /dev/null
+++ b/elf/tst-gnu2-tls2mod0.c
@@ -0,0 +1,32 @@
+/* DSO used by tst-gnu2-tls2.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2.h>
+
+__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
+
+struct tls *
+apply_tls (struct tls *p)
+{
+  INIT_TLSDESC_CALL ();
+  BEFORE_TLSDESC_CALL ();
+  tls_var0 = *p;
+  struct tls *ret = &tls_var0;
+  AFTER_TLSDESC_CALL ();
+  return ret;
+}
diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
new file mode 100644
index 0000000000..e210538468
--- /dev/null
+++ b/elf/tst-gnu2-tls2mod1.c
@@ -0,0 +1,32 @@
+/* DSO used by tst-gnu2-tls2.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2.h>
+
+__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
+
+struct tls *
+apply_tls (struct tls *p)
+{
+  INIT_TLSDESC_CALL ();
+  BEFORE_TLSDESC_CALL ();
+  tls_var1[1] = *p;
+  struct tls *ret = &tls_var1[1];
+  AFTER_TLSDESC_CALL ();
+  return ret;
+}
diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
new file mode 100644
index 0000000000..6d3031dc5f
--- /dev/null
+++ b/elf/tst-gnu2-tls2mod2.c
@@ -0,0 +1,32 @@
+/* DSO used by tst-gnu2-tls2.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <tst-gnu2-tls2.h>
+
+__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
+
+struct tls *
+apply_tls (struct tls *p)
+{
+  INIT_TLSDESC_CALL ();
+  BEFORE_TLSDESC_CALL ();
+  tls_var2 = *p;
+  struct tls *ret = &tls_var2;
+  AFTER_TLSDESC_CALL ();
+  return ret;
+}
diff --git a/elf/tst-tunables.c b/elf/tst-tunables.c
index 095b5c81d9..dff34ed748 100644
--- a/elf/tst-tunables.c
+++ b/elf/tst-tunables.c
@@ -17,6 +17,10 @@
    <https://www.gnu.org/licenses/>.  */
 
 #include <array_length.h>
+/* The test uses the tunable_list size, which is only exported for
+   ld.so.  This will result in a copy of tunable_list, which is ununsed by
+   the test itself.  */
+#define TUNABLES_INTERNAL 1
 #include <dl-tunables.h>
 #include <getopt.h>
 #include <intprops.h>
@@ -24,12 +28,13 @@
 #include <stdlib.h>
 #include <support/capture_subprocess.h>
 #include <support/check.h>
+#include <support/support.h>
 
 static int restart;
 #define CMDLINE_OPTIONS \
   { "restart", no_argument, &restart, 1 },
 
-static const struct test_t
+static struct test_t
 {
   const char *name;
   const char *value;
@@ -284,6 +289,29 @@ static const struct test_t
     0,
     0,
   },
+  /* Also check for repeated tunables with a count larger than the total number
+     of tunables.  */
+  {
+    "GLIBC_TUNABLES",
+    NULL,
+    2,
+    0,
+    0,
+  },
+  {
+    "GLIBC_TUNABLES",
+    NULL,
+    1,
+    0,
+    0,
+  },
+  {
+    "GLIBC_TUNABLES",
+    NULL,
+    0,
+    0,
+    0,
+  },
 };
 
 static int
@@ -327,6 +355,37 @@ do_test (int argc, char *argv[])
     spargv[i] = NULL;
   }
 
+  /* Create a tunable line with the duplicate values with a total number
+     larger than the different number of tunables.  */
+  {
+    enum { tunables_list_size = array_length (tunable_list) };
+    const char *value = "";
+    for (int i = 0; i < tunables_list_size; i++)
+      value = xasprintf ("%sglibc.malloc.check=2%c",
+			 value,
+			 i == (tunables_list_size - 1) ? '\0' : ':');
+    tests[33].value = value;
+  }
+  /* Same as before, but the last tunable values is differen than the
+     rest.  */
+  {
+    enum { tunables_list_size = array_length (tunable_list) };
+    const char *value = "";
+    for (int i = 0; i < tunables_list_size - 1; i++)
+      value = xasprintf ("%sglibc.malloc.check=2:", value);
+    value = xasprintf ("%sglibc.malloc.check=1", value);
+    tests[34].value = value;
+  }
+  /* Same as before, but with an invalid last entry.  */
+  {
+    enum { tunables_list_size = array_length (tunable_list) };
+    const char *value = "";
+    for (int i = 0; i < tunables_list_size - 1; i++)
+      value = xasprintf ("%sglibc.malloc.check=2:", value);
+    value = xasprintf ("%sglibc.malloc.check=1=1", value);
+    tests[35].value = value;
+  }
+
   for (int i = 0; i < array_length (tests); i++)
     {
       snprintf (nteststr, sizeof nteststr, "%d", i);
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index ea019ce5c0..7196a8744b 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared))
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
-	bug-iconv13 bug-iconv14 bug-iconv15
+	bug-iconv13 bug-iconv14 bug-iconv15 \
+	tst-iconv-iso-2022-cn-ext
 ifeq ($(have-thread-library),yes)
 tests += bug-iconv3
 endif
@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
 			  $(addprefix $(objpfx),$(modules.so))
 $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
 			  $(addprefix $(objpfx),$(modules.so))
+$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \
+					$(addprefix $(objpfx),$(modules.so))
 
 $(objpfx)iconv-test.out: run-iconv-test.sh \
 			 $(addprefix $(objpfx), $(gconv-modules)) \
diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c
index b34c8a36f4..cce29b1969 100644
--- a/iconvdata/iso-2022-cn-ext.c
+++ b/iconvdata/iso-2022-cn-ext.c
@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
 	      {								      \
 		const char *escseq;					      \
 									      \
+		if (outptr + 4 > outend)				      \
+		  {							      \
+		    result = __GCONV_FULL_OUTPUT;			      \
+		    break;						      \
+		  }							      \
+									      \
 		assert (used == CNS11643_2_set); /* XXX */		      \
 		escseq = "*H";						      \
 		*outptr++ = ESC;					      \
@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
 	      {								      \
 		const char *escseq;					      \
 									      \
+		if (outptr + 4 > outend)				      \
+		  {							      \
+		    result = __GCONV_FULL_OUTPUT;			      \
+		    break;						      \
+		  }							      \
+									      \
 		assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \
 		escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \
 		*outptr++ = ESC;					      \
diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c
new file mode 100644
index 0000000000..96a8765fd5
--- /dev/null
+++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c
@@ -0,0 +1,128 @@
+/* Verify ISO-2022-CN-EXT does not write out of the bounds.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <errno.h>
+#include <iconv.h>
+#include <sys/mman.h>
+
+#include <support/xunistd.h>
+#include <support/check.h>
+#include <support/support.h>
+
+/* The test sets up a two memory page buffer with the second page marked
+   PROT_NONE to trigger a fault if the conversion writes beyond the exact
+   expected amount.  Then we carry out various conversions and precisely
+   place the start of the output buffer in order to trigger a SIGSEGV if the
+   process writes anywhere between 1 and page sized bytes more (only one
+   PROT_NONE page is setup as a canary) than expected.  These tests exercise
+   all three of the cases in ISO-2022-CN-EXT where the converter must switch
+   character sets and may run out of buffer space while doing the
+   operation.  */
+
+static int
+do_test (void)
+{
+  iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8");
+  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+  char *ntf;
+  size_t ntfsize;
+  char *outbufbase;
+  {
+    int pgz = getpagesize ();
+    TEST_VERIFY_EXIT (pgz > 0);
+    ntfsize = 2 * pgz;
+
+    ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE
+		 | MAP_ANONYMOUS, -1);
+    xmprotect (ntf + pgz, pgz, PROT_NONE);
+
+    outbufbase = ntf + pgz;
+  }
+
+  /* Check if SOdesignation escape sequence does not trigger an OOB write.  */
+  {
+    char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2";
+
+    for (int i = 0; i < 9; i++)
+      {
+	char *inp = inbuf;
+	size_t inleft = sizeof (inbuf) - 1;
+
+	char *outp = outbufbase - i;
+	size_t outleft = i;
+
+	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+			  == (size_t) -1);
+	TEST_COMPARE (errno, E2BIG);
+
+	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+      }
+  }
+
+  /* Same as before for SS2designation.  */
+  {
+    char inbuf[] = "㴽 \xe3\xb4\xbd";
+
+    for (int i = 0; i < 14; i++)
+      {
+	char *inp = inbuf;
+	size_t inleft = sizeof (inbuf) - 1;
+
+	char *outp = outbufbase - i;
+	size_t outleft = i;
+
+	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+			  == (size_t) -1);
+	TEST_COMPARE (errno, E2BIG);
+
+	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+      }
+  }
+
+  /* Same as before for SS3designation.  */
+  {
+    char inbuf[] = "劄 \xe5\x8a\x84";
+
+    for (int i = 0; i < 14; i++)
+      {
+	char *inp = inbuf;
+	size_t inleft = sizeof (inbuf) - 1;
+
+	char *outp = outbufbase - i;
+	size_t outleft = i;
+
+	TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft)
+			  == (size_t) -1);
+	TEST_COMPARE (errno, E2BIG);
+
+	TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0);
+      }
+  }
+
+  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
+
+  xmunmap (ntf, ntfsize);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/login/Makefile b/login/Makefile
index 1e22008a61..f91190e3dc 100644
--- a/login/Makefile
+++ b/login/Makefile
@@ -44,7 +44,9 @@ subdir-dirs = programs
 vpath %.c programs
 
 tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \
-  tst-pututxline-lockfail tst-pututxline-cache
+  tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64
+
+CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64
 
 # Empty compatibility library for old binaries.
 extra-libs      := libutil
diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c
new file mode 100644
index 0000000000..7a581a4c12
--- /dev/null
+++ b/login/tst-utmp-size-64.c
@@ -0,0 +1,2 @@
+/* The on-disk layout must not change in time64 mode.  */
+#include "tst-utmp-size.c"
diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c
new file mode 100644
index 0000000000..1b7f7ff042
--- /dev/null
+++ b/login/tst-utmp-size.c
@@ -0,0 +1,33 @@
+/* Check expected sizes of struct utmp, struct utmpx, struct lastlog.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <utmp.h>
+#include <utmpx.h>
+#include <utmp-size.h>
+
+static int
+do_test (void)
+{
+  _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size");
+  _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size");
+  _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE,
+                  "struct lastlog size");
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/manual/stdbit.texi b/manual/stdbit.texi
index fe41c671d8..6c75ed9a20 100644
--- a/manual/stdbit.texi
+++ b/manual/stdbit.texi
@@ -32,7 +32,13 @@ and @code{unsigned long long int}.  In addition, there is a
 corresponding type-generic macro (not listed below), named the same as
 the functions but without any suffix such as @samp{_uc}.  The
 type-generic macro can only be used with an argument of an unsigned
-integer type with a width of 8, 16, 32 or 64 bits.
+integer type with a width of 8, 16, 32 or 64 bits, or when using
+a compiler with support for
+@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}},
+etc.@:, built-in functions such as GCC 14.1 or later
+any unsigned integer type those built-in functions support.
+In GCC 14.1 that includes support for @code{unsigned __int128} and
+@code{unsigned _BitInt(@var{n})} if supported by the target.
 
 @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x})
 @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x})
diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
index 0c6e46f15c..01d554af9c 100644
--- a/nscd/netgroupcache.c
+++ b/nscd/netgroupcache.c
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/mman.h>
+#include <scratch_buffer.h>
 
 #include "../nss/netgroup.h"
 #include "nscd.h"
@@ -65,6 +66,16 @@ struct dataset
   char strdata[0];
 };
 
+/* Send a notfound response to FD.  Always returns -1 to indicate an
+   ephemeral error.  */
+static time_t
+send_notfound (int fd)
+{
+  if (fd != -1)
+    TEMP_FAILURE_RETRY (send (fd, &notfound, sizeof (notfound), MSG_NOSIGNAL));
+  return -1;
+}
+
 /* Sends a notfound message and prepares a notfound dataset to write to the
    cache.  Returns true if there was enough memory to allocate the dataset and
    returns the dataset in DATASETP, total bytes to write in TOTALP and the
@@ -83,8 +94,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
   total = sizeof (notfound);
   timeout = time (NULL) + db->negtimeout;
 
-  if (fd != -1)
-    TEMP_FAILURE_RETRY (send (fd, &notfound, total, MSG_NOSIGNAL));
+  send_notfound (fd);
 
   dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1);
   /* If we cannot permanently store the result, so be it.  */
@@ -109,11 +119,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req,
   return cacheable;
 }
 
+struct addgetnetgrentX_scratch
+{
+  /* This is the result that the caller should use.  It can be NULL,
+     point into buffer, or it can be in the cache.  */
+  struct dataset *dataset;
+
+  struct scratch_buffer buffer;
+
+  /* Used internally in addgetnetgrentX as a staging area.  */
+  struct scratch_buffer tmp;
+
+  /* Number of bytes in buffer that are actually used.  */
+  size_t buffer_used;
+};
+
+static void
+addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch)
+{
+  scratch->dataset = NULL;
+  scratch_buffer_init (&scratch->buffer);
+  scratch_buffer_init (&scratch->tmp);
+
+  /* Reserve space for the header.  */
+  scratch->buffer_used = sizeof (struct dataset);
+  static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space),
+		 "initial buffer space");
+  memset (scratch->tmp.data, 0, sizeof (struct dataset));
+}
+
+static void
+addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch)
+{
+  scratch_buffer_free (&scratch->buffer);
+  scratch_buffer_free (&scratch->tmp);
+}
+
+/* Copy LENGTH bytes from S into SCRATCH.  Returns NULL if SCRATCH
+   could not be resized, otherwise a pointer to the copy.  */
+static char *
+addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch,
+			  const char *s, size_t length)
+{
+  while (true)
+    {
+      size_t remaining = scratch->buffer.length - scratch->buffer_used;
+      if (remaining >= length)
+	break;
+      if (!scratch_buffer_grow_preserve (&scratch->buffer))
+	return NULL;
+    }
+  char *copy = scratch->buffer.data + scratch->buffer_used;
+  memcpy (copy, s, length);
+  scratch->buffer_used += length;
+  return copy;
+}
+
+/* Copy S into SCRATCH, including its null terminator.  Returns false
+   if SCRATCH could not be resized.  */
+static bool
+addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s)
+{
+  if (s == NULL)
+    s = "";
+  return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL;
+}
+
+/* Caller must initialize and free *SCRATCH.  If the return value is
+   negative, this function has sent a notfound response.  */
 static time_t
 addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
 		 const char *key, uid_t uid, struct hashentry *he,
-		 struct datahead *dh, struct dataset **resultp,
-		 void **tofreep)
+		 struct datahead *dh, struct addgetnetgrentX_scratch *scratch)
 {
   if (__glibc_unlikely (debug_level > 0))
     {
@@ -132,14 +209,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
 
   char *key_copy = NULL;
   struct __netgrent data;
-  size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len);
-  size_t buffilled = sizeof (*dataset);
-  char *buffer = NULL;
   size_t nentries = 0;
   size_t group_len = strlen (key) + 1;
   struct name_list *first_needed
     = alloca (sizeof (struct name_list) + group_len);
-  *tofreep = NULL;
 
   if (netgroup_database == NULL
       && !__nss_database_get (nss_database_netgroup, &netgroup_database))
@@ -147,12 +220,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
       /* No such service.  */
       cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
 			       &key_copy);
-      goto writeout;
+      goto maybe_cache_add;
     }
 
   memset (&data, '\0', sizeof (data));
-  buffer = xmalloc (buflen);
-  *tofreep = buffer;
   first_needed->next = first_needed;
   memcpy (first_needed->name, key, group_len);
   data.needed_groups = first_needed;
@@ -195,8 +266,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
 		while (1)
 		  {
 		    int e;
-		    status = getfct.f (&data, buffer + buffilled,
-				       buflen - buffilled - req->key_len, &e);
+		    status = getfct.f (&data, scratch->tmp.data,
+				       scratch->tmp.length, &e);
 		    if (status == NSS_STATUS_SUCCESS)
 		      {
 			if (data.type == triple_val)
@@ -204,68 +275,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
 			    const char *nhost = data.val.triple.host;
 			    const char *nuser = data.val.triple.user;
 			    const char *ndomain = data.val.triple.domain;
-
-			    size_t hostlen = strlen (nhost ?: "") + 1;
-			    size_t userlen = strlen (nuser ?: "") + 1;
-			    size_t domainlen = strlen (ndomain ?: "") + 1;
-
-			    if (nhost == NULL || nuser == NULL || ndomain == NULL
-				|| nhost > nuser || nuser > ndomain)
-			      {
-				const char *last = nhost;
-				if (last == NULL
-				    || (nuser != NULL && nuser > last))
-				  last = nuser;
-				if (last == NULL
-				    || (ndomain != NULL && ndomain > last))
-				  last = ndomain;
-
-				size_t bufused
-				  = (last == NULL
-				     ? buffilled
-				     : last + strlen (last) + 1 - buffer);
-
-				/* We have to make temporary copies.  */
-				size_t needed = hostlen + userlen + domainlen;
-
-				if (buflen - req->key_len - bufused < needed)
-				  {
-				    buflen += MAX (buflen, 2 * needed);
-				    /* Save offset in the old buffer.  We don't
-				       bother with the NULL check here since
-				       we'll do that later anyway.  */
-				    size_t nhostdiff = nhost - buffer;
-				    size_t nuserdiff = nuser - buffer;
-				    size_t ndomaindiff = ndomain - buffer;
-
-				    char *newbuf = xrealloc (buffer, buflen);
-				    /* Fix up the triplet pointers into the new
-				       buffer.  */
-				    nhost = (nhost ? newbuf + nhostdiff
-					     : NULL);
-				    nuser = (nuser ? newbuf + nuserdiff
-					     : NULL);
-				    ndomain = (ndomain ? newbuf + ndomaindiff
-					       : NULL);
-				    *tofreep = buffer = newbuf;
-				  }
-
-				nhost = memcpy (buffer + bufused,
-						nhost ?: "", hostlen);
-				nuser = memcpy ((char *) nhost + hostlen,
-						nuser ?: "", userlen);
-				ndomain = memcpy ((char *) nuser + userlen,
-						  ndomain ?: "", domainlen);
-			      }
-
-			    char *wp = buffer + buffilled;
-			    wp = memmove (wp, nhost ?: "", hostlen);
-			    wp += hostlen;
-			    wp = memmove (wp, nuser ?: "", userlen);
-			    wp += userlen;
-			    wp = memmove (wp, ndomain ?: "", domainlen);
-			    wp += domainlen;
-			    buffilled = wp - buffer;
+			    if (!(addgetnetgrentX_append (scratch, nhost)
+				  && addgetnetgrentX_append (scratch, nuser)
+				  && addgetnetgrentX_append (scratch, ndomain)))
+			      return send_notfound (fd);
 			    ++nentries;
 			  }
 			else
@@ -317,8 +330,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
 		      }
 		    else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
 		      {
-			buflen *= 2;
-			*tofreep = buffer = xrealloc (buffer, buflen);
+			if (!scratch_buffer_grow (&scratch->tmp))
+			  return send_notfound (fd);
 		      }
 		    else if (status == NSS_STATUS_RETURN
 			     || status == NSS_STATUS_NOTFOUND
@@ -348,13 +361,20 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
     {
       cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout,
 			       &key_copy);
-      goto writeout;
+      goto maybe_cache_add;
     }
 
-  total = buffilled;
+  /* Capture the result size without the key appended.   */
+  total = scratch->buffer_used;
+
+  /* Make a copy of the key.  The scratch buffer must not move after
+     this point.  */
+  key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len);
+  if (key_copy == NULL)
+    return send_notfound (fd);
 
   /* Fill in the dataset.  */
-  dataset = (struct dataset *) buffer;
+  dataset = scratch->buffer.data;
   timeout = datahead_init_pos (&dataset->head, total + req->key_len,
 			       total - offsetof (struct dataset, resp),
 			       he == NULL ? 0 : dh->nreloads + 1,
@@ -363,11 +383,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
   dataset->resp.version = NSCD_VERSION;
   dataset->resp.found = 1;
   dataset->resp.nresults = nentries;
-  dataset->resp.result_len = buffilled - sizeof (*dataset);
-
-  assert (buflen - buffilled >= req->key_len);
-  key_copy = memcpy (buffer + buffilled, key, req->key_len);
-  buffilled += req->key_len;
+  dataset->resp.result_len = total - sizeof (*dataset);
 
   /* Now we can determine whether on refill we have to create a new
      record or not.  */
@@ -398,7 +414,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
     if (__glibc_likely (newp != NULL))
       {
 	/* Adjust pointer into the memory block.  */
-	key_copy = (char *) newp + (key_copy - buffer);
+	key_copy = (char *) newp + (key_copy - (char *) dataset);
 
 	dataset = memcpy (newp, dataset, total + req->key_len);
 	cacheable = true;
@@ -410,14 +426,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
   }
 
   if (he == NULL && fd != -1)
-    {
-      /* We write the dataset before inserting it to the database
-	 since while inserting this thread might block and so would
-	 unnecessarily let the receiver wait.  */
-    writeout:
+    /* We write the dataset before inserting it to the database since
+       while inserting this thread might block and so would
+       unnecessarily let the receiver wait.  */
       writeall (fd, &dataset->resp, dataset->head.recsize);
-    }
 
+ maybe_cache_add:
   if (cacheable)
     {
       /* If necessary, we also propagate the data to disk.  */
@@ -441,7 +455,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
     }
 
  out:
-  *resultp = dataset;
+  scratch->dataset = dataset;
 
   return timeout;
 }
@@ -462,6 +476,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
   if (user != NULL)
     key = strchr (key, '\0') + 1;
   const char *domain = *key++ ? key : NULL;
+  struct addgetnetgrentX_scratch scratch;
+
+  addgetnetgrentX_scratch_init (&scratch);
 
   if (__glibc_unlikely (debug_level > 0))
     {
@@ -477,12 +494,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
 							    group, group_len,
 							    db, uid);
   time_t timeout;
-  void *tofree;
   if (result != NULL)
-    {
-      timeout = result->head.timeout;
-      tofree = NULL;
-    }
+    timeout = result->head.timeout;
   else
     {
       request_header req_get =
@@ -491,7 +504,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
 	  .key_len = group_len
 	};
       timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL,
-				 &result, &tofree);
+				 &scratch);
+      result = scratch.dataset;
+      if (timeout < 0)
+	goto out;
     }
 
   struct indataset
@@ -502,24 +518,26 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
       = (struct indataset *) mempool_alloc (db,
 					    sizeof (*dataset) + req->key_len,
 					    1);
-  struct indataset dataset_mem;
   bool cacheable = true;
   if (__glibc_unlikely (dataset == NULL))
     {
       cacheable = false;
-      dataset = &dataset_mem;
+      /* The alloca is safe because nscd_run_worker verfies that
+	 key_len is not larger than MAXKEYLEN.  */
+      dataset = alloca (sizeof (*dataset) + req->key_len);
     }
 
   datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len,
 		     sizeof (innetgroup_response_header),
-		     he == NULL ? 0 : dh->nreloads + 1, result->head.ttl);
+		     he == NULL ? 0 : dh->nreloads + 1,
+		     result == NULL ? db->negtimeout : result->head.ttl);
   /* Set the notfound status and timeout based on the result from
      getnetgrent.  */
-  dataset->head.notfound = result->head.notfound;
+  dataset->head.notfound = result == NULL || result->head.notfound;
   dataset->head.timeout = timeout;
 
   dataset->resp.version = NSCD_VERSION;
-  dataset->resp.found = result->resp.found;
+  dataset->resp.found = result != NULL && result->resp.found;
   /* Until we find a matching entry the result is 0.  */
   dataset->resp.result = 0;
 
@@ -567,7 +585,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
       goto out;
     }
 
-  if (he == NULL)
+  /* addgetnetgrentX may have already sent a notfound response.  Do
+     not send another one.  */
+  if (he == NULL && dataset->resp.found)
     {
       /* We write the dataset before inserting it to the database
 	 since while inserting this thread might block and so would
@@ -601,7 +621,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req,
     }
 
  out:
-  free (tofree);
+  addgetnetgrentX_scratch_free (&scratch);
   return timeout;
 }
 
@@ -611,11 +631,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req,
 			const char *key, uid_t uid, struct hashentry *he,
 			struct datahead *dh)
 {
-  struct dataset *ignore;
-  void *tofree;
-  time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh,
-				    &ignore, &tofree);
-  free (tofree);
+  struct addgetnetgrentX_scratch scratch;
+  addgetnetgrentX_scratch_init (&scratch);
+  time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch);
+  addgetnetgrentX_scratch_free (&scratch);
+  if (timeout < 0)
+    timeout = 0;
   return timeout;
 }
 
@@ -659,5 +680,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he,
       .key_len = he->len
     };
 
-  return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh);
+  time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner,
+				he, dh);
+  if (timeout < 0)
+    timeout = 0;
+  return timeout;
 }
diff --git a/stdlib/Makefile b/stdlib/Makefile
index d587f054d1..9898cc5d8a 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -308,6 +308,7 @@ tests := \
   tst-setcontext10 \
   tst-setcontext11 \
   tst-stdbit-Wconversion \
+  tst-stdbit-builtins \
   tst-stdc_bit_ceil \
   tst-stdc_bit_floor \
   tst-stdc_bit_width \
diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h
index f334eb174d..2801590c63 100644
--- a/stdlib/stdbit.h
+++ b/stdlib/stdbit.h
@@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_leading_zeros(x)				\
+#if __glibc_has_builtin (__builtin_stdc_leading_zeros)
+# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x))
+#else
+# define stdc_leading_zeros(x)				\
   (stdc_leading_zeros_ull (x)				\
    - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline unsigned int
@@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_leading_ones_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_leading_ones(x)					\
+#if __glibc_has_builtin (__builtin_stdc_leading_ones)
+# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x))
+#else
+# define stdc_leading_ones(x)					\
   (stdc_leading_ones_ull ((unsigned long long int) (x)		\
 			  << 8 * (sizeof (0ULL) - sizeof (x))))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline unsigned int
@@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_trailing_zeros(x)				\
+#if __glibc_has_builtin (__builtin_stdc_trailing_zeros)
+# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x))
+#else
+# define stdc_trailing_zeros(x)				\
   (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x)	\
    : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x)	\
    : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x))	\
    : stdc_trailing_zeros_uc (__pacify_uint8 (x)))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
 static __always_inline unsigned int
@@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
+#if __glibc_has_builtin (__builtin_stdc_trailing_ones)
+# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x))
+#else
+# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
 static __always_inline unsigned int
@@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_first_leading_zero(x)			\
+#if __glibc_has_builtin (__builtin_stdc_first_leading_zero)
+# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x))
+#else
+# define stdc_first_leading_zero(x)			\
   (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x)	\
    : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x)	\
    : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x))	\
    : stdc_first_leading_zero_uc (__pacify_uint8 (x)))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline unsigned int
@@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_first_leading_one(x)			\
+#if __glibc_has_builtin (__builtin_stdc_first_leading_one)
+# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x))
+#else
+# define stdc_first_leading_one(x)			\
   (sizeof (x) == 8 ? stdc_first_leading_one_ull (x)	\
    : sizeof (x) == 4 ? stdc_first_leading_one_ui (x)	\
    : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x))	\
    : stdc_first_leading_one_uc (__pacify_uint8 (x)))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline unsigned int
@@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_first_trailing_zero(x)			\
+#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero)
+# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x))
+#else
+# define stdc_first_trailing_zero(x)			\
   (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x)	\
    : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x)	\
    : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \
    : stdc_first_trailing_zero_uc (__pacify_uint8 (x)))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
 static __always_inline unsigned int
@@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_first_trailing_one(x)			\
+#if __glibc_has_builtin (__builtin_stdc_first_trailing_one)
+# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x))
+#else
+# define stdc_first_trailing_one(x)			\
   (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x)	\
    : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x)	\
    : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x))	\
    : stdc_first_trailing_one_uc (__pacify_uint8 (x)))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll)
 static __always_inline unsigned int
@@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_count_zeros_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_count_zeros(x)				\
+#if __glibc_has_builtin (__builtin_stdc_count_zeros)
+# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x))
+#else
+# define stdc_count_zeros(x)				\
   (stdc_count_zeros_ull (x)				\
    - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x))))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
 static __always_inline unsigned int
@@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_count_ones_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_count_ones(x) (stdc_count_ones_ull (x))
+#if __glibc_has_builtin (__builtin_stdc_count_ones)
+# define stdc_count_ones(x) (__builtin_stdc_count_ones (x))
+#else
+# define stdc_count_ones(x) (stdc_count_ones_ull (x))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll)
 static __always_inline unsigned int
@@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x)
 __extension__
 extern bool stdc_has_single_bit_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_has_single_bit(x)				\
+#if __glibc_has_builtin (__builtin_stdc_has_single_bit)
+# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x))
+#else
+# define stdc_has_single_bit(x)				\
   ((bool) (sizeof (x) <= sizeof (unsigned int)		\
 	   ? stdc_has_single_bit_ui (x)			\
 	   : stdc_has_single_bit_ull (x)))
+#endif
 
 static __always_inline bool
 __hsb64_inline (uint64_t __x)
@@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x)
 __extension__
 extern unsigned int stdc_bit_width_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_bit_width(x) (stdc_bit_width_ull (x))
+#if __glibc_has_builtin (__builtin_stdc_bit_width)
+# define stdc_bit_width(x) (__builtin_stdc_bit_width (x))
+#else
+# define stdc_bit_width(x) (stdc_bit_width_ull (x))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline unsigned int
@@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x)
 __extension__
 extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
+#if __glibc_has_builtin (__builtin_stdc_bit_floor)
+# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x))
+#else
+# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline uint64_t
@@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x)
 __extension__
 extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x)
      __THROW __attribute_const__;
-#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
+#if __glibc_has_builtin (__builtin_stdc_bit_ceil)
+# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x))
+#else
+# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x))
+#endif
 
 #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll)
 static __always_inline uint64_t
diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c
new file mode 100644
index 0000000000..536841ca8a
--- /dev/null
+++ b/stdlib/tst-stdbit-builtins.c
@@ -0,0 +1,778 @@
+/* Test <stdbit.h> type-generic macros with compiler __builtin_stdc_* support.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdbit.h>
+#include <limits.h>
+#include <support/check.h>
+
+#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \
+    && __glibc_has_builtin (__builtin_stdc_leading_ones) \
+    && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \
+    && __glibc_has_builtin (__builtin_stdc_trailing_ones) \
+    && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \
+    && __glibc_has_builtin (__builtin_stdc_first_leading_one) \
+    && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \
+    && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \
+    && __glibc_has_builtin (__builtin_stdc_count_zeros) \
+    && __glibc_has_builtin (__builtin_stdc_count_ones) \
+    && __glibc_has_builtin (__builtin_stdc_has_single_bit) \
+    && __glibc_has_builtin (__builtin_stdc_bit_width) \
+    && __glibc_has_builtin (__builtin_stdc_bit_floor) \
+    && __glibc_has_builtin (__builtin_stdc_bit_ceil)
+
+# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__)
+#  define BITINT_MAXWIDTH __BITINT_MAXWIDTH__
+# endif
+
+typedef unsigned char uc;
+typedef unsigned short us;
+typedef unsigned int ui;
+typedef unsigned long int ul;
+typedef unsigned long long int ull;
+
+# define expr_has_type(e, t) _Generic (e, default : 0, t : 1)
+
+static int
+do_test (void)
+{
+  TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0);
+  TEST_COMPARE (stdc_leading_zeros (~0U), 0);
+  TEST_COMPARE (stdc_leading_zeros (~0UL), 0);
+  TEST_COMPARE (stdc_leading_zeros (~0ULL), 0);
+  TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2);
+  TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4);
+  TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6);
+  TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8);
+  TEST_COMPARE (stdc_leading_zeros (512ULL),
+		sizeof (long long int) * CHAR_BIT - 10);
+  TEST_COMPARE (stdc_leading_ones ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_leading_ones ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_leading_ones (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1);
+  TEST_COMPARE (stdc_leading_ones (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1);
+  TEST_COMPARE (stdc_leading_ones (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1);
+  TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT);
+  TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2);
+  TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4);
+  TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6);
+  TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8);
+  TEST_COMPARE (stdc_leading_ones (~512ULL),
+		sizeof (long long int) * CHAR_BIT - 10);
+  TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0);
+  TEST_COMPARE (stdc_trailing_zeros (~0U), 0);
+  TEST_COMPARE (stdc_trailing_zeros (~0UL), 0);
+  TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0);
+  TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1);
+  TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3);
+  TEST_COMPARE (stdc_trailing_zeros (32U), 5);
+  TEST_COMPARE (stdc_trailing_zeros (128UL), 7);
+  TEST_COMPARE (stdc_trailing_zeros (512ULL), 9);
+  TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1);
+  TEST_COMPARE (stdc_trailing_ones ((us) 15), 4);
+  TEST_COMPARE (stdc_trailing_ones (127U), 7);
+  TEST_COMPARE (stdc_trailing_ones (511UL), 9);
+  TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2),
+		sizeof (long long int) * CHAR_BIT - 2);
+  TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (0U), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (0UL), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (0ULL), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0);
+  TEST_COMPARE (stdc_first_leading_zero (~0U), 0);
+  TEST_COMPARE (stdc_first_leading_zero (~0UL), 0);
+  TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0);
+  TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1);
+  TEST_COMPARE (stdc_first_leading_zero ((us) ~15U),
+		sizeof (short) * CHAR_BIT - 3);
+  TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5);
+  TEST_COMPARE (stdc_first_leading_zero (~255UL),
+		sizeof (long int) * CHAR_BIT - 7);
+  TEST_COMPARE (stdc_first_leading_zero (~1023ULL),
+		sizeof (long long int) * CHAR_BIT - 9);
+  TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1);
+  TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1);
+  TEST_COMPARE (stdc_first_leading_one (~0U), 1);
+  TEST_COMPARE (stdc_first_leading_one (~0UL), 1);
+  TEST_COMPARE (stdc_first_leading_one (~0ULL), 1);
+  TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1);
+  TEST_COMPARE (stdc_first_leading_one ((us) 9),
+		sizeof (short) * CHAR_BIT - 3);
+  TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5);
+  TEST_COMPARE (stdc_first_leading_one (130UL),
+		sizeof (long int) * CHAR_BIT - 7);
+  TEST_COMPARE (stdc_first_leading_one (512ULL),
+		sizeof (long long int) * CHAR_BIT - 9);
+  TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (0U), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (0UL), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0);
+  TEST_COMPARE (stdc_first_trailing_zero (~0U), 0);
+  TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0);
+  TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0);
+  TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1);
+  TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5);
+  TEST_COMPARE (stdc_first_trailing_zero (63U), 7);
+  TEST_COMPARE (stdc_first_trailing_zero (128UL), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10);
+  TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1);
+  TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~0U), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~0UL), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1);
+  TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3);
+  TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6);
+  TEST_COMPARE (stdc_first_trailing_one (127U), 1);
+  TEST_COMPARE (stdc_first_trailing_one (511UL), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13);
+  TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1);
+  TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1);
+  TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1);
+  TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0);
+  TEST_COMPARE (stdc_count_zeros (~0U), 0);
+  TEST_COMPARE (stdc_count_zeros (~0UL), 0);
+  TEST_COMPARE (stdc_count_zeros (~0ULL), 0);
+  TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1);
+  TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3);
+  TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4);
+  TEST_COMPARE (stdc_count_zeros (~1315UL), 5);
+  TEST_COMPARE (stdc_count_zeros (3363ULL),
+		sizeof (long long int) * CHAR_BIT - 6);
+  TEST_COMPARE (stdc_count_ones ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_count_ones ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_count_ones (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1);
+  TEST_COMPARE (stdc_count_ones (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1);
+  TEST_COMPARE (stdc_count_ones (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1);
+  TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT);
+  TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1);
+  TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3);
+  TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4);
+  TEST_COMPARE (stdc_count_ones (1315UL), 5);
+  TEST_COMPARE (stdc_count_ones (~3363ULL),
+		sizeof (long long int) * CHAR_BIT - 6);
+  TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1);
+  TEST_COMPARE (stdc_has_single_bit ((us) 8), 1);
+  TEST_COMPARE (stdc_has_single_bit (32U), 1);
+  TEST_COMPARE (stdc_has_single_bit (128UL), 1);
+  TEST_COMPARE (stdc_has_single_bit (512ULL), 1);
+  TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0);
+  TEST_COMPARE (stdc_has_single_bit ((us) 96), 0);
+  TEST_COMPARE (stdc_has_single_bit (513U), 0);
+  TEST_COMPARE (stdc_has_single_bit (1022UL), 0);
+  TEST_COMPARE (stdc_has_single_bit (12ULL), 0);
+  TEST_COMPARE (stdc_bit_width ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1);
+  TEST_COMPARE (stdc_bit_width ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1);
+  TEST_COMPARE (stdc_bit_width (0U), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1);
+  TEST_COMPARE (stdc_bit_width (0UL), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1);
+  TEST_COMPARE (stdc_bit_width (0ULL), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1);
+  TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT);
+  TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT);
+  TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT);
+  TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT);
+  TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1);
+  TEST_COMPARE (stdc_bit_width ((uc) 6), 3);
+  TEST_COMPARE (stdc_bit_width ((us) 12U), 4);
+  TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)),
+		sizeof (short) * CHAR_BIT - 5);
+  TEST_COMPARE (stdc_bit_width (137U), 8);
+  TEST_COMPARE (stdc_bit_width (269U), 9);
+  TEST_COMPARE (stdc_bit_width (39UL), 6);
+  TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2);
+  TEST_COMPARE (stdc_bit_width (1023ULL), 10);
+  TEST_COMPARE (stdc_bit_width (1024ULL), 11);
+  TEST_COMPARE (stdc_bit_floor ((uc) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1);
+  TEST_COMPARE (stdc_bit_floor ((us) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1);
+  TEST_COMPARE (stdc_bit_floor (0U), 0U);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1);
+  TEST_COMPARE (stdc_bit_floor (0UL), 0UL);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1);
+  TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1);
+  TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_floor ((us) ~0U),
+		(1U << (sizeof (short) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_floor (~0UL),
+		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_floor (~0ULL),
+		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_floor ((uc) 4), 4);
+  TEST_COMPARE (stdc_bit_floor ((uc) 7), 4);
+  TEST_COMPARE (stdc_bit_floor ((us) 8U), 8);
+  TEST_COMPARE (stdc_bit_floor ((us) 31U), 16);
+  TEST_COMPARE (stdc_bit_floor (137U), 128U);
+  TEST_COMPARE (stdc_bit_floor (269U), 256U);
+  TEST_COMPARE (stdc_bit_floor (511UL), 256UL);
+  TEST_COMPARE (stdc_bit_floor (512UL), 512UL);
+  TEST_COMPARE (stdc_bit_floor (513UL), 512ULL);
+  TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL);
+  TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1);
+  TEST_COMPARE (stdc_bit_ceil ((us) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1);
+  TEST_COMPARE (stdc_bit_ceil (0U), 1U);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1);
+  TEST_COMPARE (stdc_bit_ceil (0UL), 1UL);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1);
+  TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1);
+  TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0);
+  TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0);
+  TEST_COMPARE (stdc_bit_ceil (~0U), 0U);
+  TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL);
+  TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL);
+  TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
+		(1U << (sizeof (short) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)),
+		(1U << (sizeof (short) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (~0U >> 1),
+		(1U << (sizeof (int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)),
+		(1U << (sizeof (int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
+		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (~0UL >> 1),
+		(1UL << (sizeof (long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (1ULL
+			       << (sizeof (long long int) * CHAR_BIT - 1)),
+		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1),
+		(1ULL << (sizeof (long long int) * CHAR_BIT - 1)));
+  TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1);
+  TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2);
+  TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4);
+  TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4);
+  TEST_COMPARE (stdc_bit_ceil (5U), 8U);
+  TEST_COMPARE (stdc_bit_ceil (269U), 512U);
+  TEST_COMPARE (stdc_bit_ceil (511UL), 512UL);
+  TEST_COMPARE (stdc_bit_ceil (512UL), 512UL);
+  TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL);
+  TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL);
+# ifdef __SIZEOF_INT128__
+  TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui),
+		1);
+  TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui),
+		1);
+  TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui),
+		1);
+  TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1);
+  TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128)
+							 0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1);
+  TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui),
+		1);
+  TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui),
+		1);
+  TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0),
+		_Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0);
+  TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1);
+  TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0),
+		sizeof (__int128) * CHAR_BIT);
+  TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0),
+			       unsigned __int128), 1);
+  TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0)
+		!= ((unsigned __int128) 1) << (sizeof (__int128)
+					       * CHAR_BIT - 1), 0);
+  TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0),
+			       unsigned __int128), 1);
+  TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0);
+  TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1)
+		!= ((unsigned __int128) 1) << (sizeof (__int128)
+					       * CHAR_BIT - 1), 0);
+  TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0);
+# endif
+  uc a = 0;
+  TEST_COMPARE (stdc_bit_width (a++), 0);
+  TEST_COMPARE (a, 1);
+  ull b = 0;
+  TEST_COMPARE (stdc_bit_width (b++), 0);
+  TEST_COMPARE (b, 1);
+  TEST_COMPARE (stdc_bit_floor (a++), 1);
+  TEST_COMPARE (a, 2);
+  TEST_COMPARE (stdc_bit_floor (b++), 1);
+  TEST_COMPARE (b, 2);
+  TEST_COMPARE (stdc_bit_ceil (a++), 2);
+  TEST_COMPARE (a, 3);
+  TEST_COMPARE (stdc_bit_ceil (b++), 2);
+  TEST_COMPARE (b, 3);
+  TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2);
+  TEST_COMPARE (a, 4);
+  TEST_COMPARE (stdc_leading_zeros (b++),
+		sizeof (long long int) * CHAR_BIT - 2);
+  TEST_COMPARE (b, 4);
+  TEST_COMPARE (stdc_leading_ones (a++), 0);
+  TEST_COMPARE (a, 5);
+  TEST_COMPARE (stdc_leading_ones (b++), 0);
+  TEST_COMPARE (b, 5);
+  TEST_COMPARE (stdc_trailing_zeros (a++), 0);
+  TEST_COMPARE (a, 6);
+  TEST_COMPARE (stdc_trailing_zeros (b++), 0);
+  TEST_COMPARE (b, 6);
+  TEST_COMPARE (stdc_trailing_ones (a++), 0);
+  TEST_COMPARE (a, 7);
+  TEST_COMPARE (stdc_trailing_ones (b++), 0);
+  TEST_COMPARE (b, 7);
+  TEST_COMPARE (stdc_first_leading_zero (a++), 1);
+  TEST_COMPARE (a, 8);
+  TEST_COMPARE (stdc_first_leading_zero (b++), 1);
+  TEST_COMPARE (b, 8);
+  TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3);
+  TEST_COMPARE (a, 9);
+  TEST_COMPARE (stdc_first_leading_one (b++),
+		sizeof (long long int) * CHAR_BIT - 3);
+  TEST_COMPARE (b, 9);
+  TEST_COMPARE (stdc_first_trailing_zero (a++), 2);
+  TEST_COMPARE (a, 10);
+  TEST_COMPARE (stdc_first_trailing_zero (b++), 2);
+  TEST_COMPARE (b, 10);
+  TEST_COMPARE (stdc_first_trailing_one (a++), 2);
+  TEST_COMPARE (a, 11);
+  TEST_COMPARE (stdc_first_trailing_one (b++), 2);
+  TEST_COMPARE (b, 11);
+  TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3);
+  TEST_COMPARE (a, 12);
+  TEST_COMPARE (stdc_count_zeros (b++),
+		sizeof (long long int) * CHAR_BIT - 3);
+  TEST_COMPARE (b, 12);
+  TEST_COMPARE (stdc_count_ones (a++), 2);
+  TEST_COMPARE (a, 13);
+  TEST_COMPARE (stdc_count_ones (b++), 2);
+  TEST_COMPARE (b, 13);
+  TEST_COMPARE (stdc_has_single_bit (a++), 0);
+  TEST_COMPARE (a, 14);
+  TEST_COMPARE (stdc_has_single_bit (b++), 0);
+  TEST_COMPARE (b, 14);
+# ifdef BITINT_MAXWIDTH
+#  if BITINT_MAXWIDTH >= 64
+  TEST_COMPARE (stdc_leading_zeros (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1);
+  TEST_COMPARE (stdc_leading_zeros (1uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1);
+  TEST_COMPARE (stdc_leading_ones (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1);
+  TEST_COMPARE (stdc_leading_ones (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (1uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (1uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1);
+  TEST_COMPARE (stdc_count_zeros (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1);
+  TEST_COMPARE (stdc_count_zeros (1uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1);
+  TEST_COMPARE (stdc_count_ones (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1);
+  TEST_COMPARE (stdc_count_ones (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1);
+  TEST_COMPARE (stdc_has_single_bit (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1);
+  TEST_COMPARE (stdc_bit_width (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1);
+  TEST_COMPARE (stdc_bit_width (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1);
+  TEST_COMPARE (stdc_bit_floor (0uwb), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1);
+  TEST_COMPARE (stdc_bit_floor (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1);
+  TEST_COMPARE (stdc_bit_ceil (0uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1);
+  TEST_COMPARE (stdc_bit_ceil (1uwb), 1);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1);
+  unsigned _BitInt(1) c = 0;
+  TEST_COMPARE (stdc_bit_floor (c++), 0);
+  TEST_COMPARE (c, 1);
+  TEST_COMPARE (stdc_bit_floor (c++), 1);
+  TEST_COMPARE (c, 0);
+  TEST_COMPARE (stdc_bit_ceil (c++), 1);
+  TEST_COMPARE (c, 1);
+  TEST_COMPARE (stdc_bit_ceil (c++), 1);
+  TEST_COMPARE (c, 0);
+#  endif
+#  if BITINT_MAXWIDTH >= 512
+  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9);
+  TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10);
+  TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9);
+  TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10);
+  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8);
+  TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9);
+  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8);
+  TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2),
+		373 - 2);
+  TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512))
+							0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373))
+							0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511),
+		512 - 8);
+  TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023),
+		373 - 9);
+  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512))
+						       0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373))
+						       0), ui), 1);
+  TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1);
+  TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1);
+  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8);
+  TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9);
+  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
+							  _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned
+							  _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9);
+  TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10);
+  TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512))
+							0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373))
+							0), ui), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1);
+  TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1);
+  TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175),
+		176);
+  TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311),
+		312);
+  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5);
+  TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6);
+  TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5);
+  TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6);
+  TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0),
+			       _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0),
+			       _Bool), 1);
+  TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279),
+		0);
+  TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0);
+  TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0),
+			       ui), 1);
+  TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512);
+  TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373);
+  TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405),
+		405 + 10);
+  TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242),
+		242 + 11);
+  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0),
+			       unsigned _BitInt(512)), 1);
+  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0),
+			       unsigned _BitInt(373)), 1);
+  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0)
+		!= ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
+  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0)
+		!= ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
+  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405)
+		!= (((unsigned _BitInt(512)) 256) << 405), 0);
+  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242)
+		!= (((unsigned _BitInt(512)) 512) << 242), 0);
+  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0),
+			       unsigned _BitInt(512)), 1);
+  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0);
+  TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0),
+			       unsigned _BitInt(373)), 1);
+  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0);
+  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1))
+		!= ((unsigned _BitInt(512)) 1) << (512 - 1), 0);
+  TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1)
+		!= ((unsigned _BitInt(373)) 1) << (373 - 1), 0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405)
+		!= (((unsigned _BitInt(512)) 512) << 405), 0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242)
+		!= (((unsigned _BitInt(512)) 1024) << 242), 0);
+  TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
+		0);
+  TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0)
+		!= ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
+							       - 1), 0);
+  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511)
+				<< 405)
+		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0);
+  TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
+				<< 405)
+		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
+  TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0);
+  TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0,
+		0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1)
+			       << (BITINT_MAXWIDTH - 1))
+		!= ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH
+							       - 1), 0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512)
+			       << 405)
+		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0);
+  TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513)
+			       << 405)
+		!= (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0);
+#  endif
+# endif
+  return 0;
+}
+#else
+static int
+do_test (void)
+{
+  return 0;
+}
+#endif
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
old mode 100644
new mode 100755
index ca57edce47..9606137e8d
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -325,9 +325,10 @@ then :
   printf %s "(cached) " >&6
 else $as_nop
   cat > conftest.s <<\EOF
-        ptrue p0.b
+	.arch armv8.2-a+sve
+	ptrue p0.b
 EOF
-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5'
+if { ac_try='${CC-cc} -c conftest.s 1>&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 27874eceb4..56d12d661d 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
 # Check if asm support armv8.2-a+sve
 AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl
 cat > conftest.s <<\EOF
-        ptrue p0.b
+	.arch armv8.2-a+sve
+	ptrue p0.b
 EOF
-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then
+if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then
   libc_cv_aarch64_sve_asm=yes
 else
   libc_cv_aarch64_sve_asm=no
diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h
index 77a782422a..5f2da91ebb 100644
--- a/sysdeps/aarch64/cpu-features.h
+++ b/sysdeps/aarch64/cpu-features.h
@@ -71,6 +71,7 @@ struct cpu_features
   /* Currently, the GLIBC memory tagging tunable only defines 8 bits.  */
   uint8_t mte_state;
   bool sve;
+  bool prefer_sve_ifuncs;
   bool mops;
 };
 
diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c
index a8eabb5e71..0a86c9823a 100644
--- a/sysdeps/aarch64/fpu/acos_advsimd.c
+++ b/sysdeps/aarch64/fpu/acos_advsimd.c
@@ -40,8 +40,8 @@ static const struct data
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define Oneu (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-53.  */
+#define Oneu 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-53.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c
index 141646e954..2de6eff407 100644
--- a/sysdeps/aarch64/fpu/asin_advsimd.c
+++ b/sysdeps/aarch64/fpu/asin_advsimd.c
@@ -39,8 +39,8 @@ static const struct data
 };
 
 #define AllMask v_u64 (0xffffffffffffffff)
-#define One (0x3ff0000000000000)
-#define Small (0x3e50000000000000) /* 2^-12.  */
+#define One 0x3ff0000000000000
+#define Small 0x3e50000000000000 /* 2^-12.  */
 
 #if WANT_SIMD_EXCEPT
 static float64x2_t VPCS_ATTR NOINLINE
diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
index 09a4c559b8..04fa71fa37 100644
--- a/sysdeps/aarch64/fpu/atan2_sve.c
+++ b/sysdeps/aarch64/fpu/atan2_sve.c
@@ -37,9 +37,6 @@ static const struct data
   .pi_over_2 = 0x1.921fb54442d18p+0,
 };
 
-/* Useful constants.  */
-#define SignMask sv_u64 (0x8000000000000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
 static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
@@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint64_t sign_x = svand_x (pg, ix, SignMask);
-  svuint64_t sign_y = svand_x (pg, iy, SignMask);
-  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat64_t ax = svabs_x (pg, x);
   svfloat64_t ay = svabs_x (pg, y);
+  svuint64_t iax = svreinterpret_u64 (ax);
+  svuint64_t iay = svreinterpret_u64 (ay);
+
+  svuint64_t sign_x = sveor_x (pg, ix, iax);
+  svuint64_t sign_y = sveor_x (pg, iy, iay);
+  svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
@@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   svfloat64_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f64 (1.0), shift);
+  shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift)));
   shift = svmul_x (pg, shift, data_ptr->pi_over_2);
 
   /* Use split Estrin scheme for P(z^2) with deg(P)=19.  */
@@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
-
   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
-
-  return ret;
+    return special_case (
+	y, x,
+	svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)),
+	cmp_xy);
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
 }
diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
index b92f83cdea..9ea197147c 100644
--- a/sysdeps/aarch64/fpu/atan2f_sve.c
+++ b/sysdeps/aarch64/fpu/atan2f_sve.c
@@ -32,10 +32,8 @@ static const struct data
   .pi_over_2 = 0x1.921fb6p+0f,
 };
 
-#define SignMask sv_u32 (0x80000000)
-
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static inline svfloat32_t
+static svfloat32_t NOINLINE
 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 	      const svbool_t cmp)
 {
@@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   svbool_t cmp_y = zeroinfnan (iy, pg);
   svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
 
-  svuint32_t sign_x = svand_x (pg, ix, SignMask);
-  svuint32_t sign_y = svand_x (pg, iy, SignMask);
-  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
-
   svfloat32_t ax = svabs_x (pg, x);
   svfloat32_t ay = svabs_x (pg, y);
+  svuint32_t iax = svreinterpret_u32 (ax);
+  svuint32_t iay = svreinterpret_u32 (ay);
+
+  svuint32_t sign_x = sveor_x (pg, ix, iax);
+  svuint32_t sign_y = sveor_x (pg, iy, iay);
+  svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
 
-  svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
   svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
 
   /* Set up z for call to atan.  */
@@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   svfloat32_t z = svdiv_x (pg, n, d);
 
   /* Work out the correct shift.  */
-  svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
-  shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+  svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1));
+  shift = svsel (pred_aygtax, sv_f32 (1.0), shift);
+  shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift)));
   shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
 
-  /* Use split Estrin scheme for P(z^2) with deg(P)=7.  */
+  /* Use pure Estrin scheme for P(z^2) with deg(P)=7.  */
   svfloat32_t z2 = svmul_x (pg, z, z);
   svfloat32_t z4 = svmul_x (pg, z2, z2);
   svfloat32_t z8 = svmul_x (pg, z4, z4);
@@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
   ret = svadd_m (pg, ret, shift);
 
   /* Account for the sign of x and y.  */
-  ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 
   if (__glibc_unlikely (svptest_any (pg, cmp_xy)))
-    return special_case (y, x, ret, cmp_xy);
+    return special_case (
+	y, x,
+	svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)),
+	cmp_xy);
 
-  return ret;
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
 }
diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c
index 2897e8b909..3924c9ce44 100644
--- a/sysdeps/aarch64/fpu/cos_advsimd.c
+++ b/sysdeps/aarch64/fpu/cos_advsimd.c
@@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x)
        special-case handler later.  */
     r = vbslq_f64 (cmp, v_f64 (1.0), r);
 #else
-  cmp = vcageq_f64 (d->range_val, x);
-  cmp = vceqzq_u64 (cmp); /* cmp = ~cmp.  */
+  cmp = vcageq_f64 (x, d->range_val);
   r = x;
 #endif
 
diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c
index 60abc8dfcf..d0c285b03a 100644
--- a/sysdeps/aarch64/fpu/cosf_advsimd.c
+++ b/sysdeps/aarch64/fpu/cosf_advsimd.c
@@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
        special-case handler later.  */
     r = vbslq_f32 (cmp, v_f32 (1.0f), r);
 #else
-  cmp = vcageq_f32 (d->range_val, x);
-  cmp = vceqzq_u32 (cmp); /* cmp = ~cmp.  */
+  cmp = vcageq_f32 (x, d->range_val);
   r = x;
 #endif
 
diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c
index fe7149b191..eeb31ca839 100644
--- a/sysdeps/aarch64/fpu/exp10_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp10_advsimd.c
@@ -57,7 +57,7 @@ const static struct data
 # define BigBound v_u64 (0x4070000000000000)  /* asuint64 (0x1p8).  */
 # define Thres v_u64 (0x2070000000000000)     /* BigBound - TinyBound.  */
 
-static inline float64x2_t VPCS_ATTR
+static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
@@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 # define SpecialBias1 v_u64 (0x7000000000000000)  /* 0x1p769.  */
 # define SpecialBias2 v_u64 (0x3010000000000000)  /* 0x1p-254.  */
 
-static float64x2_t VPCS_ATTR NOINLINE
+static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
 	      const struct data *d)
 {
diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
index 7ee0c90948..ab117b69da 100644
--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
@@ -25,7 +25,8 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t shift, log10_2, log2_10_hi, log2_10_lo;
+  float32x4_t log10_2_and_inv, shift;
+
 #if !WANT_SIMD_EXCEPT
   float32x4_t scale_thresh;
 #endif
@@ -38,9 +39,9 @@ static const struct data
   .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
 	    V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
   .shift = V4 (0x1.8p23f),
-  .log10_2 = V4 (0x1.a934fp+1),
-  .log2_10_hi = V4 (0x1.344136p-2),
-  .log2_10_lo = V4 (-0x1.ec10cp-27),
+
+  /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0.  */
+  .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
 #if !WANT_SIMD_EXCEPT
   .scale_thresh = V4 (ScaleBound)
 #endif
@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
 #if WANT_SIMD_EXCEPT
   /* asuint(x) - TinyBound >= BigBound - TinyBound.  */
   uint32x4_t cmp = vcgeq_u32 (
-      vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
-		 TinyBound),
-      Thres);
+      vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
   float32x4_t xm = x;
   /* If any lanes are special, mask them with 1 and retain a copy of x to allow
      special case handler to fix special lanes later. This is only necessary if
      fenv exceptions are to be triggered correctly.  */
   if (__glibc_unlikely (v_any_u32 (cmp)))
-    x = vbslq_f32 (cmp, v_f32 (1), x);
+    x = v_zerofy_f32 (x, cmp);
 #endif
 
   /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
      with poly(r) in [1/sqrt(2), sqrt(2)] and
      x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2].  */
-  float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2);
+  float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
   float32x4_t n = vsubq_f32 (z, d->shift);
-  float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi);
-  r = vfmsq_f32 (r, n, d->log2_10_lo);
+  float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
+  r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
   uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
 
   float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c
index 391a93180c..ae1e63d503 100644
--- a/sysdeps/aarch64/fpu/exp2_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp2_advsimd.c
@@ -24,6 +24,7 @@
 #define IndexMask (N - 1)
 #define BigBound 1022.0
 #define UOFlowBound 1280.0
+#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 
 static const struct data
 {
@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i)
 
 #if WANT_SIMD_EXCEPT
 
-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511).  */
 # define Thres 0x2080000000000000     /* asuint64(512.0) - TinyBound.  */
 
 /* Call scalar exp2 as a fallback.  */
 static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
 {
-  return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff));
+  return v_call_f64 (exp2, x, y, is_special);
 }
 
 #else
@@ -65,7 +65,7 @@ special_case (float64x2_t x)
 # define SpecialBias1 0x7000000000000000 /* 0x1p769.  */
 # define SpecialBias2 0x3010000000000000 /* 0x1p-254.  */
 
-static float64x2_t VPCS_ATTR
+static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n,
 	      const struct data *d)
 {
@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
 #if WANT_SIMD_EXCEPT
   uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
   cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
-  /* If any special case (inf, nan, small and large x) is detected,
-     fall back to scalar for all lanes.  */
-  if (__glibc_unlikely (v_any_u64 (cmp)))
-    return special_case (x);
+  /* Mask special lanes and retain a copy of x for passing to special-case
+     handler.  */
+  float64x2_t xc = x;
+  x = v_zerofy_f64 (x, cmp);
 #else
   cmp = vcagtq_f64 (x, d->scale_big_bound);
 #endif
@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
   float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
   y = vmulq_f64 (r, y);
 
-#if !WANT_SIMD_EXCEPT
   if (__glibc_unlikely (v_any_u64 (cmp)))
+#if !WANT_SIMD_EXCEPT
     return special_case (s, y, n, d);
+#else
+    return special_case (xc, vfmaq_f64 (s, s, y), cmp);
 #endif
   return vfmaq_f64 (s, s, y);
 }
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index 9a5a523a10..8a686e3e05 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -20,6 +20,8 @@
 #include "sv_math.h"
 #include "poly_sve_f32.h"
 
+#define Thres 0x1.5d5e2ap+6f
+
 static const struct data
 {
   float poly[5];
@@ -33,7 +35,7 @@ static const struct data
   .shift = 0x1.903f8p17f,
   /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
      correctly by FEXPA.  */
-  .thres = 0x1.5d5e2ap+6f,
+  .thres = Thres,
 };
 
 static svfloat32_t NOINLINE
diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c
index fd215f1d2c..5e3a9a0d44 100644
--- a/sysdeps/aarch64/fpu/exp_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp_advsimd.c
@@ -54,7 +54,7 @@ const static volatile struct
 # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9).  */
 # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound.  */
 
-static inline float64x2_t VPCS_ATTR
+static float64x2_t VPCS_ATTR NOINLINE
 special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 {
   /* If fenv exceptions are to be triggered correctly, fall back to the scalar
@@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
 # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769.  */
 # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254.  */
 
-static float64x2_t VPCS_ATTR NOINLINE
+static inline float64x2_t VPCS_ATTR
 special_case (float64x2_t s, float64x2_t y, float64x2_t n)
 {
   /* 2^(n/N) may overflow, break it up into s1*s2.  */
diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
index 0b85bd06f3..3628398674 100644
--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float64x2_t poly[11];
-  float64x2_t invln2, ln2_lo, ln2_hi, shift;
+  float64x2_t invln2, ln2, shift;
   int64x2_t exponent_bias;
 #if WANT_SIMD_EXCEPT
   uint64x2_t thresh, tiny_bound;
@@ -38,8 +38,7 @@ static const struct data
 	    V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
 	    V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
   .invln2 = V2 (0x1.71547652b82fep0),
-  .ln2_hi = V2 (0x1.62e42fefa39efp-1),
-  .ln2_lo = V2 (0x1.abc9e3b39803fp-56),
+  .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
   .shift = V2 (0x1.8p52),
   .exponent_bias = V2 (0x3ff0000000000000),
 #if WANT_SIMD_EXCEPT
@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
     x = v_zerofy_f64 (x, special);
 #else
   /* Large input, NaNs and Infs.  */
-  uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound));
+  uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
 #endif
 
   /* Reduce argument to smaller range:
@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
      where 2^i is exact because i is an integer.  */
   float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
   int64x2_t i = vcvtq_s64_f64 (n);
-  float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi);
-  f = vfmsq_f64 (f, n, d->ln2_lo);
+  float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
+  f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
index 8d4c9a2193..93db200f61 100644
--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
@@ -23,7 +23,8 @@
 static const struct data
 {
   float32x4_t poly[5];
-  float32x4_t invln2, ln2_lo, ln2_hi, shift;
+  float32x4_t invln2_and_ln2;
+  float32x4_t shift;
   int32x4_t exponent_bias;
 #if WANT_SIMD_EXCEPT
   uint32x4_t thresh;
@@ -34,9 +35,8 @@ static const struct data
   /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2].  */
   .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
 	    V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
-  .invln2 = V4 (0x1.715476p+0f),
-  .ln2_hi = V4 (0x1.62e4p-1f),
-  .ln2_lo = V4 (0x1.7f7d1cp-20f),
+  /* Stores constants: invln2, ln2_hi, ln2_lo, 0.  */
+  .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
   .shift = V4 (0x1.8p23f),
   .exponent_bias = V4 (0x3f800000),
 #if !WANT_SIMD_EXCEPT
@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
     x = v_zerofy_f32 (x, special);
 #else
   /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf.  */
-  uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound));
+  uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
 #endif
 
   /* Reduce argument to smaller range:
@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
      and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
      exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
      where 2^i is exact because i is an integer.  */
-  float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift);
+  float32x4_t j = vsubq_f32 (
+      vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
   int32x4_t i = vcvtq_s32_f32 (j);
-  float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi);
-  f = vfmsq_f32 (f, j, d->ln2_lo);
+  float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
+  f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
 
   /* Approximate expm1(f) using polynomial.
      Taylor expansion for expm1(x) has the form:
diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c
index 067ae79613..21df61728c 100644
--- a/sysdeps/aarch64/fpu/log_advsimd.c
+++ b/sysdeps/aarch64/fpu/log_advsimd.c
@@ -58,8 +58,13 @@ lookup (uint64x2_t i)
   uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
   float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
   float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+#if __BYTE_ORDER == __LITTLE_ENDIAN
   e.invc = vuzp1q_f64 (e0, e1);
   e.logc = vuzp2q_f64 (e0, e1);
+#else
+  e.invc = vuzp1q_f64 (e1, e0);
+  e.logc = vuzp2q_f64 (e1, e0);
+#endif
   return e;
 }
 
diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c
index efce183e86..a0d9d3b819 100644
--- a/sysdeps/aarch64/fpu/sin_advsimd.c
+++ b/sysdeps/aarch64/fpu/sin_advsimd.c
@@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
   r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x);
 #else
   r = x;
-  cmp = vcageq_f64 (d->range_val, x);
-  cmp = vceqzq_u64 (cmp); /* cmp = ~cmp.  */
+  cmp = vcageq_f64 (x, d->range_val);
 #endif
 
   /* n = rint(|x|/pi).  */
diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c
index 60cf3f2ca1..375dfc3331 100644
--- a/sysdeps/aarch64/fpu/sinf_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinf_advsimd.c
@@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
   r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
 #else
   r = x;
-  cmp = vcageq_f32 (d->range_val, x);
-  cmp = vceqzq_u32 (cmp); /* cmp = ~cmp.  */
+  cmp = vcageq_f32 (x, d->range_val);
 #endif
 
   /* n = rint(|x|/pi) */
diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
index d7e5ba7b1a..0459821ab2 100644
--- a/sysdeps/aarch64/fpu/tan_advsimd.c
+++ b/sysdeps/aarch64/fpu/tan_advsimd.c
@@ -23,7 +23,7 @@
 static const struct data
 {
   float64x2_t poly[9];
-  float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift;
+  float64x2_t half_pi, two_over_pi, shift;
 #if !WANT_SIMD_EXCEPT
   float64x2_t range_val;
 #endif
@@ -34,8 +34,7 @@ static const struct data
 	    V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9),
 	    V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11),
 	    V2 (0x1.4e4fd14147622p-12) },
-  .half_pi_hi = V2 (0x1.921fb54442d18p0),
-  .half_pi_lo = V2 (0x1.1a62633145c07p-54),
+  .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
   .two_over_pi = V2 (0x1.45f306dc9c883p-1),
   .shift = V2 (0x1.8p52),
 #if !WANT_SIMD_EXCEPT
@@ -56,15 +55,15 @@ special_case (float64x2_t x)
 
 /* Vector approximation for double-precision tan.
    Maximum measured error is 3.48 ULP:
-   __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
-				 want -0x1.f6ccd8ecf7deap+37.   */
+   _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+				      want -0x1.f6ccd8ecf7deap+37.  */
 float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
 {
   const struct data *dat = ptr_barrier (&data);
-  /* Our argument reduction cannot calculate q with sufficient accuracy for very
-     large inputs. Fall back to scalar routine for all lanes if any are too
-     large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny
-     input to avoid underflow.  */
+  /* Our argument reduction cannot calculate q with sufficient accuracy for
+     very large inputs. Fall back to scalar routine for all lanes if any are
+     too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
+     tiny input to avoid underflow.  */
 #if WANT_SIMD_EXCEPT
   uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
   /* iax - tiny_bound > range_val - tiny_bound.  */
@@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
   /* Use q to reduce x to r in [-pi/4, pi/4], by:
      r = x - q * pi/2, in extended precision.  */
   float64x2_t r = x;
-  r = vfmsq_f64 (r, q, dat->half_pi_hi);
-  r = vfmsq_f64 (r, q, dat->half_pi_lo);
+  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
+  r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
   /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
      formula.  */
   r = vmulq_n_f64 (r, 0.5);
@@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
      and reciprocity around pi/2:
      tan(x) = 1 / (tan(pi/2 - x))
      to assemble result using change-of-sign and conditional selection of
-     numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */
+     numerator/denominator, dependent on odd/even-ness of q (hence quadrant).
+   */
   float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p);
   float64x2_t d = vaddq_f64 (p, p);
 
   uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
 
 #if !WANT_SIMD_EXCEPT
-  uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val));
+  uint64x2_t special = vcageq_f64 (x, dat->range_val);
   if (__glibc_unlikely (v_any_u64 (special)))
     return special_case (x);
 #endif
diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c
index 1f16103f8a..5a7489390a 100644
--- a/sysdeps/aarch64/fpu/tanf_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanf_advsimd.c
@@ -23,7 +23,8 @@
 static const struct data
 {
   float32x4_t poly[6];
-  float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift;
+  float32x4_t pi_consts;
+  float32x4_t shift;
 #if !WANT_SIMD_EXCEPT
   float32x4_t range_val;
 #endif
@@ -31,10 +32,9 @@ static const struct data
   /* Coefficients generated using FPMinimax.  */
   .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f),
 	    V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) },
-  .neg_half_pi_1 = V4 (-0x1.921fb6p+0f),
-  .neg_half_pi_2 = V4 (0x1.777a5cp-25f),
-  .neg_half_pi_3 = V4 (0x1.ee59dap-50f),
-  .two_over_pi = V4 (0x1.45f306p-1f),
+  /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi.  */
+  .pi_consts
+  = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f },
   .shift = V4 (0x1.8p+23f),
 #if !WANT_SIMD_EXCEPT
   .range_val = V4 (0x1p15f),
@@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d)
 {
   float32x4_t z2 = vmulq_f32 (z, z);
 #if WANT_SIMD_EXCEPT
-  /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions
-     are to be triggered correctly, sidestep this by fixing such lanes to 0.  */
+  /* Tiny z (<= 0x1p-31) will underflow when calculating z^4.
+     If fp exceptions are to be triggered correctly,
+     sidestep this by fixing such lanes to 0.  */
   uint32x4_t will_uflow
-    = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
+      = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
   if (__glibc_unlikely (v_any_u32 (will_uflow)))
     z2 = vbslq_f32 (will_uflow, v_f32 (0), z2);
 #endif
@@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x)
 #endif
 
   /* n = rint(x/(pi/2)).  */
-  float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x);
+  float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
   float32x4_t n = vsubq_f32 (q, d->shift);
   /* Determine if x lives in an interval, where |tan(x)| grows to infinity.  */
   uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
 
   /* r = x - n * (pi/2)  (range reduction into -pi./4 .. pi/4).  */
   float32x4_t r;
-  r = vfmaq_f32 (x, d->neg_half_pi_1, n);
-  r = vfmaq_f32 (r, d->neg_half_pi_2, n);
-  r = vfmaq_f32 (r, d->neg_half_pi_3, n);
+  r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
+  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
+  r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
 
   /* If x lives in an interval, where |tan(x)|
      - is finite, then use a polynomial approximation of the form
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index c52860efb2..61dc40088f 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -36,5 +36,7 @@
     MTE_ENABLED ();							      \
   bool __attribute__((unused)) sve =					      \
     GLRO(dl_aarch64_cpu_features).sve;					      \
+  bool __attribute__((unused)) prefer_sve_ifuncs =			      \
+    GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs;			      \
   bool __attribute__((unused)) mops =					      \
     GLRO(dl_aarch64_cpu_features).mops;
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index d12eccfca5..ce53567dab 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -47,7 +47,7 @@ select_memcpy_ifunc (void)
     {
       if (IS_A64FX (midr))
 	return __memcpy_a64fx;
-      return __memcpy_sve;
+      return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic;
     }
 
   if (IS_THUNDERX (midr))
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index 2081eeb4d4..fe95037be3 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -47,7 +47,7 @@ select_memmove_ifunc (void)
     {
       if (IS_A64FX (midr))
 	return __memmove_a64fx;
-      return __memmove_sve;
+      return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic;
     }
 
   if (IS_THUNDERX (midr))
diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S
index 81748bdbce..e125a5ed85 100644
--- a/sysdeps/aarch64/multiarch/memset_generic.S
+++ b/sysdeps/aarch64/multiarch/memset_generic.S
@@ -33,3 +33,7 @@
 #endif
 
 #include <../memset.S>
+
+#if IS_IN (rtld)
+strong_alias (memset, __memset_generic)
+#endif
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
index d9bd1f8558..19657b627b 100644
--- a/sysdeps/aarch64/preconfigure
+++ b/sysdeps/aarch64/preconfigure
@@ -2,5 +2,6 @@ case "$machine" in
 aarch64*)
 	base_machine=aarch64
 	machine=aarch64
+	mtls_descriptor=desc
 	;;
 esac
diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h
new file mode 100644
index 0000000000..a247fcd3da
--- /dev/null
+++ b/sysdeps/arc/utmp-size.h
@@ -0,0 +1,3 @@
+/* arc has less padding than other architectures with 64-bit time_t.  */
+#define UTMP_SIZE 392
+#define LASTLOG_SIZE 296
diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile
index d5cea717a9..619474eca9 100644
--- a/sysdeps/arm/Makefile
+++ b/sysdeps/arm/Makefile
@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os
 lib-noranlib: $(objpfx)libgcc-stubs.a
 
 ifeq ($(build-shared),yes)
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
 tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy
 modules-names += tst-armtlsdesclocmod
 modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod
 CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic=
 CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic=
-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2
-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2
-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2
+CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor)
 LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now
 tst-armtlsdescloc-ENV = LD_BIND_NOW=1
 tst-armtlsdescextnow-ENV = LD_BIND_NOW=1
diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/arm/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
index 35e2918922..4ef4d46cbd 100644
--- a/sysdeps/arm/configure
+++ b/sysdeps/arm/configure
@@ -187,6 +187,38 @@ else
 default-abi = soft"
 fi
 
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5
+printf %s "checking whether VFP supports 32 registers... " >&6; }
+if test ${libc_cv_arm_pcs_vfp_d32+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void foo (void)
+{
+  asm volatile ("vldr d16,=17" : : : "d16");
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  libc_cv_arm_pcs_vfp_d32=yes
+else $as_nop
+  libc_cv_arm_pcs_vfp_d32=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5
+printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; }
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
+then
+  printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h
+
+fi
+
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
 printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
 if test ${libc_cv_arm_pcrel_movw+y}
diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
index 5172e30bbe..cd00ddc9d9 100644
--- a/sysdeps/arm/configure.ac
+++ b/sysdeps/arm/configure.ac
@@ -21,6 +21,21 @@ else
   LIBC_CONFIG_VAR([default-abi], [soft])
 fi
 
+AC_CACHE_CHECK([whether VFP supports 32 registers],
+		libc_cv_arm_pcs_vfp_d32, [
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+void foo (void)
+{
+  asm volatile ("vldr d16,=17" : : : "d16");
+}
+]])],
+                [libc_cv_arm_pcs_vfp_d32=yes],
+                [libc_cv_arm_pcs_vfp_d32=no])])
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
+then
+  AC_DEFINE(HAVE_ARM_PCS_VFP_D32)
+fi
+
 AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
 	       libc_cv_arm_pcrel_movw, [
 cat > conftest.s <<\EOF
diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h
index b857bbc868..dd1a0f6b6e 100644
--- a/sysdeps/arm/dl-machine.h
+++ b/sysdeps/arm/dl-machine.h
@@ -139,7 +139,6 @@ _start:\n\
 _dl_start_user:\n\
 	adr	r6, .L_GET_GOT\n\
 	add	sl, sl, r6\n\
-	ldr	r4, [sl, r4]\n\
 	@ save the entry point in another register\n\
 	mov	r6, r0\n\
 	@ get the original arg count\n\
diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
index 764c56e70f..ada106521d 100644
--- a/sysdeps/arm/dl-tlsdesc.S
+++ b/sysdeps/arm/dl-tlsdesc.S
@@ -19,6 +19,7 @@
 #include <sysdep.h>
 #include <arm-features.h>
 #include <tls.h>
+#include <rtld-global-offsets.h>
 #include "tlsdesc.h"
 
 	.text
@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp)
 	.align 2
 _dl_tlsdesc_dynamic:
 	/* Our calling convention is to clobber r0, r1 and the processor
-	   flags.  All others that are modified must be saved */
-	eabi_save ({r2,r3,r4,lr})
-	push	{r2,r3,r4,lr}
-	cfi_adjust_cfa_offset (16)
+	   flags.  All others that are modified must be saved.  r5 is
+	   used as the hwcap value to avoid reload after __tls_get_addr
+	   call.  If required we will save the vector register on the slow
+	   path.  */
+	eabi_save ({r2,r3,r4,r5,ip,lr})
+	push	{r2,r3,r4,r5,ip,lr}
+	cfi_adjust_cfa_offset (24)
 	cfi_rel_offset (r2,0)
 	cfi_rel_offset (r3,4)
 	cfi_rel_offset (r4,8)
-	cfi_rel_offset (lr,12)
+	cfi_rel_offset (r5,12)
+	cfi_rel_offset (ip,16)
+	cfi_rel_offset (lr,20)
+
 	ldr	r1, [r0] /* td */
 	GET_TLS (lr)
 	mov	r4, r0 /* r4 = tp */
@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic:
 	rsbne	r0, r4, r3
 	bne	2f
 1:	mov	r0, r1
+
+	/* Load the hwcap to check for vector support.  */
+	ldr     r2, 3f
+	ldr     r1, .Lrtld_global_ro
+0:	add     r2, pc, r2
+	ldr     r2, [r2, r1]
+	ldr     r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+
+#ifdef __SOFTFP__
+	tst     r5, #HWCAP_ARM_VFP
+	beq     .Lno_vfp
+#endif
+
+	/* Store the VFP registers.  Don't use VFP instructions directly
+	   because this code is used in non-VFP multilibs.  */
+#define VFP_STACK_REQ (32*8 + 8)
+	sub	sp, sp, VFP_STACK_REQ
+	cfi_adjust_cfa_offset (VFP_STACK_REQ)
+	mov	r3, sp
+	.inst	0xeca30b20	/* vstmia r3!, {d0-d15} */
+	tst	r5, #HWCAP_ARM_VFPD32
+	beq	4f
+	.inst	0xece30b20	/* vstmia r3!, {d16-d31}  */
+	/* Store the floating-point status register.  */
+4:	.inst	0xeef12a10	/* vmrs	r2, fpscr */
+	str	r2, [r3]
+.Lno_vfp:
 	bl	__tls_get_addr
 	rsb	r0, r4, r0
+#ifdef __SOFTFP__
+	tst     r5, #HWCAP_ARM_VFP
+	beq     2f
+#endif
+	mov	r3, sp
+	.inst	0xecb30b20	/* vldmia r3!, {d0-d15}  */
+	tst	r5, #HWCAP_ARM_VFPD32
+	beq	5f
+	.inst	0xecf30b20	/* vldmia r3!, {d16-d31}  */
+	ldr	r4, [r3]
+5:	.inst	0xeee14a10	/* vmsr	fpscr, r4  */
+	add	sp, sp, VFP_STACK_REQ
+	cfi_adjust_cfa_offset (-VFP_STACK_REQ)
+
 2:
 #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
      || defined (ARM_ALWAYS_BX))
-	pop	{r2,r3,r4, lr}
-	cfi_adjust_cfa_offset (-16)
+	pop	{r2,r3,r4,r5,ip, lr}
+	cfi_adjust_cfa_offset (-20)
 	cfi_restore (lr)
+	cfi_restore (ip)
+	cfi_restore (r5)
 	cfi_restore (r4)
 	cfi_restore (r3)
 	cfi_restore (r2)
 	bx	lr
 #else
-	pop	{r2,r3,r4, pc}
+	pop	{r2,r3,r4,r5,ip, pc}
 #endif
 	eabi_fnend
 	cfi_endproc
 	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+
+3:      .long   _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
+.Lrtld_global_ro:
+	.long   C_SYMBOL_NAME(_rtld_global_ro)(GOT)
 #endif /* SHARED */
diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h
new file mode 100644
index 0000000000..e413ac21fb
--- /dev/null
+++ b/sysdeps/arm/tst-gnu2-tls2.h
@@ -0,0 +1,128 @@
+/* Test TLSDESC relocation.  ARM version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <sys/auxv.h>
+#include <string.h>
+#include <stdlib.h>
+#include <endian.h>
+
+#ifndef __SOFTFP__
+
+# ifdef HAVE_ARM_PCS_VFP_D32
+#  define SAVE_VFP_D32					\
+      asm volatile ("vldr d16,=17" : : : "d16");	\
+      asm volatile ("vldr d17,=18" : : : "d17");	\
+      asm volatile ("vldr d18,=19" : : : "d18");	\
+      asm volatile ("vldr d19,=20" : : : "d19");	\
+      asm volatile ("vldr d20,=21" : : : "d20");	\
+      asm volatile ("vldr d21,=22" : : : "d21");	\
+      asm volatile ("vldr d22,=23" : : : "d22");	\
+      asm volatile ("vldr d23,=24" : : : "d23");	\
+      asm volatile ("vldr d24,=25" : : : "d24");	\
+      asm volatile ("vldr d25,=26" : : : "d25");	\
+      asm volatile ("vldr d26,=27" : : : "d26");	\
+      asm volatile ("vldr d27,=28" : : : "d27");	\
+      asm volatile ("vldr d28,=29" : : : "d28");	\
+      asm volatile ("vldr d29,=30" : : : "d29");	\
+      asm volatile ("vldr d30,=31" : : : "d30");	\
+      asm volatile ("vldr d31,=32" : : : "d31");
+# else
+#  define SAVE_VFP_D32
+# endif
+
+# define INIT_TLSDESC_CALL()				\
+  unsigned long hwcap = getauxval (AT_HWCAP)
+
+/* Set each vector register to a value from 1 to 32 before the TLS access,
+   dump to memory after TLS access, and compare with the expected values.  */
+
+# define BEFORE_TLSDESC_CALL()				\
+  if (hwcap & HWCAP_ARM_VFP)				\
+    {							\
+      asm volatile ("vldr  d0,=1" : : : "d0");		\
+      asm volatile ("vldr  d1,=2" : : : "d1");		\
+      asm volatile ("vldr  d2,=3" : : : "d1");		\
+      asm volatile ("vldr  d3,=4" : : : "d3");		\
+      asm volatile ("vldr  d4,=5" : : : "d4");		\
+      asm volatile ("vldr  d5,=6" : : : "d5");		\
+      asm volatile ("vldr  d6,=7" : : : "d6");		\
+      asm volatile ("vldr  d7,=8" : : : "d7");		\
+      asm volatile ("vldr  d8,=9" : : : "d8");		\
+      asm volatile ("vldr  d9,=10" : : : "d9");		\
+      asm volatile ("vldr d10,=11" : : : "d10");	\
+      asm volatile ("vldr d11,=12" : : : "d11");	\
+      asm volatile ("vldr d12,=13" : : : "d12");	\
+      asm volatile ("vldr d13,=14" : : : "d13");	\
+      asm volatile ("vldr d14,=15" : : : "d14");	\
+      asm volatile ("vldr d15,=16" : : : "d15");	\
+    }							\
+  if (hwcap & HWCAP_ARM_VFPD32)				\
+    {							\
+      SAVE_VFP_D32					\
+    }
+
+# define VFP_STACK_REQ (16*8)
+# if __BYTE_ORDER == __BIG_ENDIAN
+#  define DISP 7
+# else
+#  define DISP 0
+# endif
+
+# ifdef HAVE_ARM_PCS_VFP_D32
+#  define CHECK_VFP_D32							\
+      char vfp[VFP_STACK_REQ];						\
+      asm volatile ("vstmia %0, {d16-d31}\n"				\
+		    :							\
+		    : "r" (vfp)						\
+		    : "memory");					\
+									\
+      char expected[VFP_STACK_REQ] = { 0 };				\
+      for (int i = 0; i < 16; ++i)					\
+	expected[i * 8 + DISP] = i + 17;				\
+									\
+      if (memcmp (vfp, expected, VFP_STACK_REQ) != 0)			\
+        abort ();
+# else
+#  define CHECK_VFP_D32
+# endif
+
+# define AFTER_TLSDESC_CALL()						\
+  if (hwcap & HWCAP_ARM_VFP)						\
+    {									\
+      char vfp[VFP_STACK_REQ];						\
+      asm volatile ("vstmia %0, {d0-d15}\n"				\
+		    :							\
+		    : "r" (vfp)						\
+		    : "memory");					\
+									\
+      char expected[VFP_STACK_REQ] = { 0 };				\
+      for (int i = 0; i < 16; ++i)					\
+	expected[i * 8 + DISP] = i + 1;					\
+									\
+      if (memcmp (vfp, expected, VFP_STACK_REQ) != 0)			\
+        abort ();							\
+    }									\
+  if (hwcap & HWCAP_ARM_VFPD32)						\
+    {									\
+      CHECK_VFP_D32							\
+    }
+
+#endif /* __SOFTFP__ */
+
+#include_next <tst-gnu2-tls2.h>
diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/arm/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/csky/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/csky/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 117c901ccc..50f58a60e3 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -646,6 +646,8 @@ struct rtld_global_ro
   /* Mask for more hardware capabilities that are available on some
      platforms.  */
   EXTERN uint64_t _dl_hwcap2;
+  EXTERN uint64_t _dl_hwcap3;
+  EXTERN uint64_t _dl_hwcap4;
 
   EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
 
diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h
new file mode 100644
index 0000000000..89dbe878b0
--- /dev/null
+++ b/sysdeps/generic/utmp-size.h
@@ -0,0 +1,23 @@
+/* Expected sizes of utmp-related structures stored in files.  64-bit version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Expected size, in bytes, of struct utmp and struct utmpx.  */
+#define UTMP_SIZE 400
+
+/* Expected size, in bytes, of struct lastlog.  */
+#define LASTLOG_SIZE 296
diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/hppa/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index fc1ef96587..50d74fe6e9 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n",
 		  {
 		    td->arg = _dl_make_tlsdesc_dynamic
 		      (sym_map, sym->st_value + (ElfW(Word))td->arg);
-		    td->entry = _dl_tlsdesc_dynamic;
+		    td->entry = GLRO(dl_x86_tlsdesc_dynamic);
 		  }
 		else
 #  endif
diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..3627028577
--- /dev/null
+++ b/sysdeps/i386/dl-tlsdesc-dynamic.h
@@ -0,0 +1,190 @@
+/* Thread-local storage handling in the ELF dynamic linker.  i386 version.
+   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#undef REGISTER_SAVE_AREA
+
+#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multiple of 16
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+# ifdef USE_FNSAVE
+#  error USE_FNSAVE shouldn't be defined
+# endif
+# ifdef USE_FXSAVE
+/* Use fxsave to save all registers.  */
+#  define REGISTER_SAVE_AREA	512
+# endif
+#else
+# ifdef USE_FNSAVE
+/* Use fnsave to save x87 FPU stack registers.  */
+#  define REGISTER_SAVE_AREA	108
+# else
+#  ifndef USE_FXSAVE
+#   error USE_FXSAVE must be defined
+#  endif
+/* Use fxsave to save all registers.  Add 12 bytes to align the stack
+   to 16 bytes.  */
+#  define REGISTER_SAVE_AREA	(512 + 12)
+# endif
+#endif
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,@function
+
+     /* This function is used for symbols that need dynamic TLS.
+
+	%eax points to the TLS descriptor, such that 0(%eax) points to
+	_dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
+	tlsdesc_dynamic_arg object.  It must return in %eax the offset
+	between the thread pointer and the object denoted by the
+	argument, without clobbering any registers.
+
+	The assembly code that follows is a rendition of the following
+	C code, hand-optimized a little bit.
+
+ptrdiff_t
+__attribute__ ((__regparm__ (1)))
+_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+{
+  struct tlsdesc_dynamic_arg *td = tdp->arg;
+  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+  if (__builtin_expect (td->gen_count <= dtv[0].counter
+			&& (dtv[td->tlsinfo.ti_module].pointer.val
+			    != TLS_DTV_UNALLOCATED),
+			1))
+    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+      - __thread_pointer;
+
+  return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+}
+*/
+	cfi_startproc
+	.align 16
+_dl_tlsdesc_dynamic:
+	/* Like all TLS resolvers, preserve call-clobbered registers.
+	   We need two scratch regs anyway.  */
+	subl	$32, %esp
+	cfi_adjust_cfa_offset (32)
+	movl	%ecx, 20(%esp)
+	movl	%edx, 24(%esp)
+	movl	TLSDESC_ARG(%eax), %eax
+	movl	%gs:DTV_OFFSET, %edx
+	movl	TLSDESC_GEN_COUNT(%eax), %ecx
+	cmpl	(%edx), %ecx
+	ja	2f
+	movl	TLSDESC_MODID(%eax), %ecx
+	movl	(%edx,%ecx,8), %edx
+	cmpl	$-1, %edx
+	je	2f
+	movl	TLSDESC_MODOFF(%eax), %eax
+	addl	%edx, %eax
+1:
+	movl	20(%esp), %ecx
+	subl	%gs:0, %eax
+	movl	24(%esp), %edx
+	addl	$32, %esp
+	cfi_adjust_cfa_offset (-32)
+	ret
+	.p2align 4,,7
+2:
+	cfi_adjust_cfa_offset (32)
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	movl	%ebx, -28(%esp)
+	movl	%esp, %ebx
+	cfi_def_cfa_register(%ebx)
+	and	$-STATE_SAVE_ALIGNMENT, %esp
+#endif
+#ifdef REGISTER_SAVE_AREA
+	subl	$REGISTER_SAVE_AREA, %esp
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+#else
+# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true
+# endif
+	/* Allocate stack space of the required size to save the state.  */
+	LOAD_PIC_REG (cx)
+	subl	RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp
+#endif
+#ifdef USE_FNSAVE
+	fnsave	(%esp)
+#elif defined USE_FXSAVE
+	fxsave	(%esp)
+#else
+	/* Save the argument for ___tls_get_addr in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	/* Clear the XSAVE Header.  */
+# ifdef USE_XSAVE
+	movl	%edx, (512)(%esp)
+	movl	%edx, (512 + 4 * 1)(%esp)
+	movl	%edx, (512 + 4 * 2)(%esp)
+	movl	%edx, (512 + 4 * 3)(%esp)
+# endif
+	movl	%edx, (512 + 4 * 4)(%esp)
+	movl	%edx, (512 + 4 * 5)(%esp)
+	movl	%edx, (512 + 4 * 6)(%esp)
+	movl	%edx, (512 + 4 * 7)(%esp)
+	movl	%edx, (512 + 4 * 8)(%esp)
+	movl	%edx, (512 + 4 * 9)(%esp)
+	movl	%edx, (512 + 4 * 10)(%esp)
+	movl	%edx, (512 + 4 * 11)(%esp)
+	movl	%edx, (512 + 4 * 12)(%esp)
+	movl	%edx, (512 + 4 * 13)(%esp)
+	movl	%edx, (512 + 4 * 14)(%esp)
+	movl	%edx, (512 + 4 * 15)(%esp)
+# ifdef USE_XSAVE
+	xsave	(%esp)
+# else
+	xsavec	(%esp)
+# endif
+	/* Restore the argument for ___tls_get_addr in EAX.  */
+	movl	%ecx, %eax
+#endif
+	call	HIDDEN_JUMPTARGET (___tls_get_addr)
+	/* Get register content back.  */
+#ifdef USE_FNSAVE
+	frstor	(%esp)
+#elif defined USE_FXSAVE
+	fxrstor	(%esp)
+#else
+	/* Save and retore ___tls_get_addr return value stored in EAX.  */
+	movl	%eax, %ecx
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	xrstor	(%esp)
+	movl	%ecx, %eax
+#endif
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	mov	%ebx, %esp
+	cfi_def_cfa_register(%esp)
+	movl	-28(%esp), %ebx
+	cfi_restore(%ebx)
+#else
+	addl	$REGISTER_SAVE_AREA, %esp
+	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+#endif
+	jmp	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
index 90d93caa0c..f002feee56 100644
--- a/sysdeps/i386/dl-tlsdesc.S
+++ b/sysdeps/i386/dl-tlsdesc.S
@@ -18,8 +18,27 @@
 
 #include <sysdep.h>
 #include <tls.h>
+#include <cpu-features-offsets.h>
+#include <features-offsets.h>
 #include "tlsdesc.h"
 
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+   __tls_get_addr may be called with 4-byte stack alignment.  Although
+   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+   that stack will be always aligned at 16 bytes.  */
+# define DL_STACK_ALIGNMENT 4
+#endif
+
+/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align
+   stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr.  */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
+
 	.text
 
      /* This function is used to compute the TP offset for symbols in
@@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak:
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
 #ifdef SHARED
-	.hidden _dl_tlsdesc_dynamic
-	.global	_dl_tlsdesc_dynamic
-	.type	_dl_tlsdesc_dynamic,@function
-
-     /* This function is used for symbols that need dynamic TLS.
-
-	%eax points to the TLS descriptor, such that 0(%eax) points to
-	_dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct
-	tlsdesc_dynamic_arg object.  It must return in %eax the offset
-	between the thread pointer and the object denoted by the
-	argument, without clobbering any registers.
-
-	The assembly code that follows is a rendition of the following
-	C code, hand-optimized a little bit.
-
-ptrdiff_t
-__attribute__ ((__regparm__ (1)))
-_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
-{
-  struct tlsdesc_dynamic_arg *td = tdp->arg;
-  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
-  if (__builtin_expect (td->gen_count <= dtv[0].counter
-			&& (dtv[td->tlsinfo.ti_module].pointer.val
-			    != TLS_DTV_UNALLOCATED),
-			1))
-    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
-      - __thread_pointer;
-
-  return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
-}
-*/
-	cfi_startproc
-	.align 16
-_dl_tlsdesc_dynamic:
-	/* Like all TLS resolvers, preserve call-clobbered registers.
-	   We need two scratch regs anyway.  */
-	subl	$28, %esp
-	cfi_adjust_cfa_offset (28)
-	movl	%ecx, 20(%esp)
-	movl	%edx, 24(%esp)
-	movl	TLSDESC_ARG(%eax), %eax
-	movl	%gs:DTV_OFFSET, %edx
-	movl	TLSDESC_GEN_COUNT(%eax), %ecx
-	cmpl	(%edx), %ecx
-	ja	.Lslow
-	movl	TLSDESC_MODID(%eax), %ecx
-	movl	(%edx,%ecx,8), %edx
-	cmpl	$-1, %edx
-	je	.Lslow
-	movl	TLSDESC_MODOFF(%eax), %eax
-	addl	%edx, %eax
-.Lret:
-	movl	20(%esp), %ecx
-	subl	%gs:0, %eax
-	movl	24(%esp), %edx
-	addl	$28, %esp
-	cfi_adjust_cfa_offset (-28)
-	ret
-	.p2align 4,,7
-.Lslow:
-	cfi_adjust_cfa_offset (28)
-	call	HIDDEN_JUMPTARGET (___tls_get_addr)
-	jmp	.Lret
-	cfi_endproc
-	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# define USE_FNSAVE
+# define MINIMUM_ALIGNMENT	4
+# define STATE_SAVE_ALIGNMENT	4
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fnsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef MINIMUM_ALIGNMENT
+# undef USE_FNSAVE
+
+# define MINIMUM_ALIGNMENT	16
+
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT	16
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_FXSAVE
+
+# define USE_XSAVE
+# define STATE_SAVE_ALIGNMENT	64
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVE
+
+# define USE_XSAVEC
+# define STATE_SAVE_ALIGNMENT	64
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsavec
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVEC
 #endif /* SHARED */
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 84e6686eba..f2139fc172 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -1232,6 +1232,7 @@ ldouble: 6
 
 Function: "hypot":
 double: 1
+float: 1
 float128: 1
 ldouble: 1
 
diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S
index 3e26f112d6..79856d498a 100644
--- a/sysdeps/i386/i586/memcpy.S
+++ b/sysdeps/i386/i586/memcpy.S
@@ -26,7 +26,7 @@
 #define LEN	SRC+4
 
         .text
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY (__memcpy_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S
index f230359ad6..effd958120 100644
--- a/sysdeps/i386/i686/memmove.S
+++ b/sysdeps/i386/i686/memmove.S
@@ -29,7 +29,7 @@
 #define SRC	DEST+4
 #define LEN	SRC+4
 
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY_CHK (__memmove_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index f02f5a6df7..ab06771ea0 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -27,7 +27,7 @@
 #define LEN	CHR+4
 
         .text
-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
 ENTRY_CHK (__memset_chk)
 	movl	12(%esp), %eax
 	cmpl	%eax, 16(%esp)
diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c
index ef7bbbe792..20bfdf3af3 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-c.c
+++ b/sysdeps/i386/i686/multiarch/memrchr-c.c
@@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t);
 #endif
 
 #include "string/memrchr.c"
+strong_alias (__memrchr_ia32, __GI___memrchr)
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
index d9dae04171..e123f87435 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S
@@ -720,5 +720,4 @@ L(ret_null):
 	ret
 
 END (__memrchr_sse2)
-strong_alias (__memrchr_sse2, __GI___memrchr)
 #endif
diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c
index 9f05485236..7c0395fbb5 100644
--- a/sysdeps/loongarch/fpu/e_scalbf.c
+++ b/sysdeps/loongarch/fpu/e_scalbf.c
@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn)
 
   return x;
 }
-libm_alias_finite (__ieee754_scalb, __scalb)
+libm_alias_finite (__ieee754_scalbf, __scalbf)
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index fe863e1ba4..01762ef526 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -1,52 +1,52 @@
 ifeq ($(subdir),string)
 sysdep_routines += \
-  strlen-aligned \
-  strlen-lsx \
-  strlen-lasx \
-  strnlen-aligned \
-  strnlen-lsx \
-  strnlen-lasx \
+  memchr-aligned \
+  memchr-lasx \
+  memchr-lsx \
+  memcmp-aligned \
+  memcmp-lasx \
+  memcmp-lsx \
+  memcpy-aligned \
+  memcpy-unaligned \
+  memmove-lasx \
+  memmove-lsx \
+  memmove-unaligned \
+  memrchr-generic \
+  memrchr-lasx \
+  memrchr-lsx \
+  memset-aligned \
+  memset-lasx \
+  memset-lsx \
+  memset-unaligned \
+  rawmemchr-aligned \
+  rawmemchr-lasx \
+  rawmemchr-lsx \
+  stpcpy-aligned \
+  stpcpy-lasx \
+  stpcpy-lsx \
+  stpcpy-unaligned \
   strchr-aligned \
-  strchr-lsx \
   strchr-lasx \
-  strrchr-aligned \
-  strrchr-lsx \
-  strrchr-lasx \
+  strchr-lsx \
   strchrnul-aligned \
-  strchrnul-lsx \
   strchrnul-lasx \
+  strchrnul-lsx \
   strcmp-aligned \
   strcmp-lsx \
-  strncmp-aligned \
-  strncmp-lsx \
   strcpy-aligned \
-  strcpy-unaligned \
-  strcpy-lsx \
   strcpy-lasx \
-  stpcpy-aligned \
-  stpcpy-unaligned \
-  stpcpy-lsx \
-  stpcpy-lasx \
-  memcpy-aligned \
-  memcpy-unaligned \
-  memmove-unaligned \
-  memmove-lsx \
-  memmove-lasx \
-  rawmemchr-aligned \
-  rawmemchr-lsx \
-  rawmemchr-lasx \
-  memchr-aligned \
-  memchr-lsx \
-  memchr-lasx \
-  memrchr-generic \
-  memrchr-lsx \
-  memrchr-lasx \
-  memset-aligned \
-  memset-unaligned \
-  memset-lsx \
-  memset-lasx \
-  memcmp-aligned \
-  memcmp-lsx \
-  memcmp-lasx \
+  strcpy-lsx \
+  strcpy-unaligned \
+  strlen-aligned \
+  strlen-lasx \
+  strlen-lsx \
+  strncmp-aligned \
+  strncmp-lsx \
+  strnlen-aligned \
+  strnlen-lasx \
+  strnlen-lsx \
+  strrchr-aligned \
+  strrchr-lasx \
+  strrchr-lsx \
 # sysdep_routines
 endif
diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/m68k/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h
new file mode 100644
index 0000000000..5946685819
--- /dev/null
+++ b/sysdeps/m68k/utmp-size.h
@@ -0,0 +1,3 @@
+/* m68k has 2-byte alignment.  */
+#define UTMP_SIZE 382
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/microblaze/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/microblaze/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h
index 57f0f2a22f..30dd3fd85d 100644
--- a/sysdeps/mips/bits/wordsize.h
+++ b/sysdeps/mips/bits/wordsize.h
@@ -19,11 +19,7 @@
 
 #define __WORDSIZE			_MIPS_SZPTR
 
-#if _MIPS_SIM == _ABI64
-# define __WORDSIZE_TIME64_COMPAT32	1
-#else
-# define __WORDSIZE_TIME64_COMPAT32	0
-#endif
+#define __WORDSIZE_TIME64_COMPAT32	1
 
 #if __WORDSIZE == 32
 #define __WORDSIZE32_SIZE_ULONG		0
diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps
index 78969745b2..933aba4735 100644
--- a/sysdeps/mips/mips64/libm-test-ulps
+++ b/sysdeps/mips/mips64/libm-test-ulps
@@ -1066,17 +1066,17 @@ double: 1
 ldouble: 1
 
 Function: "j0":
-double: 2
+double: 3
 float: 9
 ldouble: 2
 
 Function: "j0_downward":
-double: 5
+double: 6
 float: 9
 ldouble: 9
 
 Function: "j0_towardzero":
-double: 6
+double: 7
 float: 9
 ldouble: 9
 
@@ -1146,6 +1146,7 @@ float: 6
 ldouble: 8
 
 Function: "log":
+double: 1
 float: 1
 ldouble: 1
 
diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/mips/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/nios2/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/nios2/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h
new file mode 100644
index 0000000000..6b3653aa4d
--- /dev/null
+++ b/sysdeps/or1k/utmp-size.h
@@ -0,0 +1,3 @@
+/* or1k has less padding than other architectures with 64-bit time_t.  */
+#define UTMP_SIZE 392
+#define LASTLOG_SIZE 296
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index a76bb6e5b0..8cf00aa7e3 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -38,6 +38,10 @@
        needed.
   */
 
+/* The total number of available bits (including those prior to
+   _DL_HWCAP_FIRST).  Some of these bits might not be used.  */
+#define _DL_HWCAP_COUNT         128
+
 #ifndef PROCINFO_CLASS
 # define PROCINFO_CLASS
 #endif
@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
 #if !defined PROCINFO_DECL && defined SHARED
   ._dl_powerpc_cap_flags
 #else
-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
+PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15]
 #endif
 #ifndef PROCINFO_DECL
 = {
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
index 68f4241095..b36697ba44 100644
--- a/sysdeps/powerpc/dl-procinfo.h
+++ b/sysdeps/powerpc/dl-procinfo.h
@@ -22,22 +22,23 @@
 #include <ldsodefs.h>
 #include <sysdep.h>	/* This defines the PPC_FEATURE[2]_* macros.  */
 
-/* The total number of available bits (including those prior to
-   _DL_HWCAP_FIRST).  Some of these bits might not be used.  */
-#define _DL_HWCAP_COUNT		64
+/* Feature masks are all 32-bits in size.  */
+#define _DL_HWCAP_SIZE		32
 
-/* Features started at bit 31 and decremented as new features were added.  */
-#define _DL_HWCAP_LAST		31
+/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings.  */
+#define _DL_HWCAP2_OFFSET	_DL_HWCAP_SIZE
 
-/* AT_HWCAP2 features started at bit 31 and decremented as new features were
-   added.  HWCAP2 feature bits start at bit 0.  */
-#define _DL_HWCAP2_LAST		31
+/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings.  */
+#define _DL_HWCAP3_OFFSET	(_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE)
+
+/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings.  */
+#define _DL_HWCAP4_OFFSET	(_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE)
 
 /* These bits influence library search.  */
 #define HWCAP_IMPORTANT		(PPC_FEATURE_HAS_ALTIVEC \
 				+ PPC_FEATURE_HAS_DFP)
 
-#define _DL_PLATFORMS_COUNT	16
+#define _DL_PLATFORMS_COUNT	17
 
 #define _DL_FIRST_PLATFORM	32
 /* Mask to filter out platforms.  */
@@ -61,6 +62,7 @@
 #define PPC_PLATFORM_POWER8		13
 #define PPC_PLATFORM_POWER9		14
 #define PPC_PLATFORM_POWER10		15
+#define PPC_PLATFORM_POWER11		16
 
 static inline const char *
 __attribute__ ((unused))
@@ -88,6 +90,11 @@ _dl_string_platform (const char *str)
 	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10;
 	      str++;
 	    }
+	  else if (str[1] == '1')
+	    {
+	      ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11;
+	      str++;
+	    }
 	  else
 	    return -1;
 	  break;
@@ -187,21 +194,42 @@ _dl_procinfo (unsigned int type, unsigned long int word)
     case AT_HWCAP:
       _dl_printf ("AT_HWCAP:            ");
 
-      for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
+      for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
        if (word & (1 << i))
          _dl_printf (" %s", _dl_hwcap_string (i));
       break;
     case AT_HWCAP2:
       {
-       unsigned int offset = _DL_HWCAP_LAST + 1;
 
        _dl_printf ("AT_HWCAP2:           ");
 
-        /* We have to go through them all because the kernel added the
-          AT_HWCAP2 features starting with the high bits.  */
-       for (int i = 0; i <= _DL_HWCAP2_LAST; ++i)
-         if (word & (1 << i))
-           _dl_printf (" %s", _dl_hwcap_string (offset + i));
+       /* We have to go through them all because the kernel added the
+	  AT_HWCAP2 features starting with the high bits.  */
+       for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
+	 if (word & (1 << i))
+	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i));
+       break;
+      }
+    case AT_HWCAP3:
+      {
+       _dl_printf ("AT_HWCAP3:           ");
+
+       /* We have to go through them all because the kernel added the
+	  AT_HWCAP3 features starting with the high bits.  */
+       for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
+	 if (word & (1 << i))
+	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i));
+       break;
+      }
+    case AT_HWCAP4:
+      {
+       _dl_printf ("AT_HWCAP4:           ");
+
+       /* We have to go through them all because the kernel added the
+	  AT_HWCAP4 features starting with the high bits.  */
+       for (int i = 0; i <= _DL_HWCAP_SIZE; ++i)
+	 if (word & (1 << i))
+	   _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i));
        break;
       }
     case AT_L1I_CACHEGEOMETRY:
diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c
index 76344f285a..f6fede15a7 100644
--- a/sysdeps/powerpc/hwcapinfo.c
+++ b/sysdeps/powerpc/hwcapinfo.c
@@ -31,7 +31,7 @@ void
 __tcb_parse_hwcap_and_convert_at_platform (void)
 {
 
-  uint64_t h1, h2;
+  uint64_t h1, h2, h3, h4;
 
   /* Read AT_PLATFORM string from auxv and convert it to a number.  */
   __tcb.at_platform = _dl_string_platform (GLRO (dl_platform));
@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
   /* Read HWCAP and HWCAP2 from auxv.  */
   h1 = GLRO (dl_hwcap);
   h2 = GLRO (dl_hwcap2);
+  h3 = GLRO (dl_hwcap3);
+  h4 = GLRO (dl_hwcap4);
 
   /* hwcap contains only the latest supported ISA, the code checks which is
      and fills the previous supported ones.  */
@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
   else if (h1 & PPC_FEATURE_POWER5)
     h1 |= PPC_FEATURE_POWER4;
 
-  uint64_t array_hwcaps[] = { h1, h2 };
+  uint64_t array_hwcaps[] = { h1, h2, h3, h4 };
   init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps);
 
   /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that
      we can read both in a single load later.  */
   __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff);
-  __tcb.hwcap_extn = 0x0;
+
+  /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that
+     we can read both in a single load later.  */
+  __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff);
 
 }
 #if IS_IN (rtld)
diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h
index 04ca9debf0..6993fb6b29 100644
--- a/sysdeps/powerpc/powerpc32/bits/wordsize.h
+++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h
@@ -2,10 +2,9 @@
 
 #if defined __powerpc64__
 # define __WORDSIZE	64
-# define __WORDSIZE_TIME64_COMPAT32	1
 #else
 # define __WORDSIZE	32
-# define __WORDSIZE_TIME64_COMPAT32	0
 # define __WORDSIZE32_SIZE_ULONG	0
 # define __WORDSIZE32_PTRDIFF_LONG	0
 #endif
+#define __WORDSIZE_TIME64_COMPAT32	1
diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies
new file mode 100644
index 0000000000..051cbe0f79
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power10/fpu
+powerpc/powerpc32/power10
diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000..58edb2861d
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
new file mode 100644
index 0000000000..c70f0428ba
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies
new file mode 100644
index 0000000000..de481d1c13
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/be/power10/fpu
+powerpc/powerpc64/be/power10
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
new file mode 100644
index 0000000000..dff0e13064
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/fpu
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000..c3f259e009
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
new file mode 100644
index 0000000000..9491a394c9
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h
index 04ca9debf0..6993fb6b29 100644
--- a/sysdeps/powerpc/powerpc64/bits/wordsize.h
+++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h
@@ -2,10 +2,9 @@
 
 #if defined __powerpc64__
 # define __WORDSIZE	64
-# define __WORDSIZE_TIME64_COMPAT32	1
 #else
 # define __WORDSIZE	32
-# define __WORDSIZE_TIME64_COMPAT32	0
 # define __WORDSIZE32_SIZE_ULONG	0
 # define __WORDSIZE32_PTRDIFF_LONG	0
 #endif
+#define __WORDSIZE_TIME64_COMPAT32	1
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index c6682f3445..2b6f5d2b08 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr)
 static inline Elf64_Addr
 elf_machine_load_address (void) __attribute__ ((const));
 
+#ifndef __PCREL__
 static inline Elf64_Addr
 elf_machine_load_address (void)
 {
@@ -105,6 +106,24 @@ elf_machine_dynamic (void)
   /* Then subtract off the load address offset.  */
   return runtime_dynamic - elf_machine_load_address() ;
 }
+#else /* __PCREL__ */
+/* In PCREL mode, r2 may have been clobbered.  Rely on relative
+   relocations instead.  */
+
+static inline ElfW(Addr)
+elf_machine_load_address (void)
+{
+  extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
+  return (ElfW(Addr)) &__ehdr_start;
+}
+
+static inline ElfW(Addr)
+elf_machine_dynamic (void)
+{
+  extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
+  return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
+}
+#endif /* __PCREL__ */
 
 /* The PLT uses Elf64_Rela relocs.  */
 #define elf_machine_relplt elf_machine_rela
diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies
new file mode 100644
index 0000000000..e18182dcc1
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/le/power10/fpu
+powerpc/powerpc64/le/power10
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
new file mode 100644
index 0000000000..e41bd55684
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/fpu
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000..c838d50931
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
new file mode 100644
index 0000000000..687248c3c2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
index 77465d9133..65d3e69303 100644
--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
+++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
@@ -36,9 +36,11 @@ compute_level (void)
     return 9;
   if (strcmp (platform, "power10") == 0)
     return 10;
+  if (strcmp (platform, "power11") == 0)
+    return 11;
   printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
-  /* Assume that the new platform supports POWER10.  */
-  return 10;
+  /* Assume that the new platform supports POWER11.  */
+  return 11;
 }
 
 static int
diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure
index 4de94089a3..9e5a07ab6d 100644
--- a/sysdeps/powerpc/preconfigure
+++ b/sysdeps/powerpc/preconfigure
@@ -58,7 +58,7 @@ fi
 
     ;;
 
-  a2|970|power[4-9]|power5x|power6+|power10)
+  a2|970|power[4-9]|power5x|power6+|power10|power11)
     submachine=${archcpu}
     if test ${libc_cv_cc_submachine+y}
 then :
diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac
index 6c63bd8257..14b6dafd4a 100644
--- a/sysdeps/powerpc/preconfigure.ac
+++ b/sysdeps/powerpc/preconfigure.ac
@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in
     AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
     ;;
 
-  a2|970|power[[4-9]]|power5x|power6+|power10)
+  a2|970|power[[4-9]]|power5x|power6+|power10|power11)
     submachine=${archcpu}
     AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
     ;;
diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/powerpc/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c
index 3030660e5f..94ad6281bc 100644
--- a/sysdeps/pthread/tst-cancel30.c
+++ b/sysdeps/pthread/tst-cancel30.c
@@ -18,6 +18,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <errno.h>
 #include <support/check.h>
 #include <support/xstdio.h>
 #include <support/xthread.h>
@@ -46,13 +47,19 @@ tf (void *arg)
 
   /* Wait indefinitely for cancellation, which only works if asynchronous
      cancellation is enabled.  */
-#if defined SYS_ppoll || defined SYS_ppoll_time64
-# ifndef SYS_ppoll_time64
-#  define SYS_ppoll_time64 SYS_ppoll
+#ifdef SYS_ppoll_time64
+  long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
+  (void) ret;
+# ifdef SYS_ppoll
+  if (ret == -1 && errno == ENOSYS)
+    syscall (SYS_ppoll, NULL, 0, NULL, NULL);
 # endif
-  syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
 #else
+# ifdef SYS_ppoll
+  syscall (SYS_ppoll, NULL, 0, NULL, NULL);
+# else
   for (;;);
+# endif
 #endif
 
   return 0;
diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/riscv/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/sh/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/sh/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h
index 4bbd2e63b4..a2e79e0fa9 100644
--- a/sysdeps/sparc/sparc32/bits/wordsize.h
+++ b/sysdeps/sparc/sparc32/bits/wordsize.h
@@ -1,6 +1,6 @@
 /* Determine the wordsize from the preprocessor defines.  */
 
 #define __WORDSIZE	32
-#define __WORDSIZE_TIME64_COMPAT32	0
+#define __WORDSIZE_TIME64_COMPAT32	1
 #define __WORDSIZE32_SIZE_ULONG	0
 #define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h
index 2f66f10d72..ea103e5970 100644
--- a/sysdeps/sparc/sparc64/bits/wordsize.h
+++ b/sysdeps/sparc/sparc64/bits/wordsize.h
@@ -2,10 +2,9 @@
 
 #if defined __arch64__ || defined __sparcv9
 # define __WORDSIZE	64
-# define __WORDSIZE_TIME64_COMPAT32	1
 #else
 # define __WORDSIZE	32
-# define __WORDSIZE_TIME64_COMPAT32	0
 # define __WORDSIZE32_SIZE_ULONG	0
 # define __WORDSIZE32_PTRDIFF_LONG	0
 #endif
+#define __WORDSIZE_TIME64_COMPAT32	1
diff --git a/sysdeps/sparc/sparc64/rtld-memset.c b/sysdeps/sparc/sparc64/rtld-memset.c
index 55f3835790..a19202a620 100644
--- a/sysdeps/sparc/sparc64/rtld-memset.c
+++ b/sysdeps/sparc/sparc64/rtld-memset.c
@@ -1 +1,4 @@
 #include <string/memset.c>
+#if IS_IN(rtld)
+strong_alias (memset, __memset_ultra1)
+#endif
diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/sparc/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index b1a3f673f0..c0b047bc0d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -21,6 +21,7 @@
 #include <sys/auxv.h>
 #include <elf/dl-hwcaps.h>
 #include <sys/prctl.h>
+#include <sys/utsname.h>
 #include <dl-tunables-parse.h>
 
 #define DCZID_DZP_MASK (1 << 4)
@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu)
   return UINT64_MAX;
 }
 
+#if __LINUX_KERNEL_VERSION < 0x060200
+
+/* Return true if we prefer using SVE in string ifuncs.  Old kernels disable
+   SVE after every system call which results in unnecessary traps if memcpy
+   uses SVE.  This is true for kernels between 4.15.0 and before 6.2.0, except
+   for 5.14.0 which was patched.  For these versions return false to avoid using
+   SVE ifuncs.
+   Parse the kernel version into a 24-bit kernel.major.minor value without
+   calling any library functions.  If uname() is not supported or if the version
+   format is not recognized, assume the kernel is modern and return true.  */
+
+static inline bool
+prefer_sve_ifuncs (void)
+{
+  struct utsname buf;
+  const char *p = &buf.release[0];
+  int kernel = 0;
+  int val;
+
+  if (__uname (&buf) < 0)
+    return true;
+
+  for (int shift = 16; shift >= 0; shift -= 8)
+    {
+      for (val = 0; *p >= '0' && *p <= '9'; p++)
+	val = val * 10 + *p - '0';
+      kernel |= (val & 255) << shift;
+      if (*p++ != '.')
+	break;
+    }
+
+  if (kernel >= 0x060200 || kernel == 0x050e00)
+    return true;
+  if (kernel >= 0x040f00)
+    return false;
+  return true;
+}
+
+#endif
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features)
   /* Check if SVE is supported.  */
   cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
 
+  cpu_features->prefer_sve_ifuncs = cpu_features->sve;
+
+#if __LINUX_KERNEL_VERSION < 0x060200
+  if (cpu_features->sve)
+    cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs ();
+#endif
+
   /* Check if MOPS is supported.  */
   cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS;
 }
diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
index e3d758b163..ea2a58ecb1 100644
--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
+++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
   GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
   GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
   GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
+  GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3];
+  GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4];
   GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
   GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
   _dl_random = (void *) auxv_values[AT_RANDOM];
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
index ad3692d738..e1b14e9eb3 100644
--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
@@ -197,6 +197,8 @@ _dl_show_auxv (void)
 	  [AT_SYSINFO_EHDR - 2] =	{ "SYSINFO_EHDR:      0x", hex },
 	  [AT_RANDOM - 2] =		{ "RANDOM:            0x", hex },
 	  [AT_HWCAP2 - 2] =		{ "HWCAP2:            0x", hex },
+	  [AT_HWCAP3 - 2] =		{ "HWCAP3:            0x", hex },
+	  [AT_HWCAP4 - 2] =		{ "HWCAP4:            0x", hex },
 	  [AT_MINSIGSTKSZ - 2] =	{ "MINSIGSTKSZ:       ", dec },
 	  [AT_L1I_CACHESIZE - 2] =	{ "L1I_CACHESIZE:     ", dec },
 	  [AT_L1I_CACHEGEOMETRY - 2] =	{ "L1I_CACHEGEOMETRY: 0x", hex },
diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
new file mode 100644
index 0000000000..6ecbfe7c86
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define __WORDSIZE			32
+#define __WORDSIZE_TIME64_COMPAT32	1
+#define __WORDSIZE32_SIZE_ULONG		0
+#define __WORDSIZE32_PTRDIFF_LONG	0
diff --git a/sysdeps/unix/sysv/linux/mips/clone3.S b/sysdeps/unix/sysv/linux/mips/clone3.S
index e9fec2fa47..481b8ae963 100644
--- a/sysdeps/unix/sysv/linux/mips/clone3.S
+++ b/sysdeps/unix/sysv/linux/mips/clone3.S
@@ -37,11 +37,6 @@
 
 	.text
 	.set		nomips16
-#if _MIPS_SIM == _ABIO32
-# define EXTRA_LOCALS 1
-#else
-# define EXTRA_LOCALS 0
-#endif
 #define FRAMESZ ((NARGSAVE*SZREG)+ALSZ)&ALMASK
 GPOFF= FRAMESZ-(1*SZREG)
 NESTED(__clone3, SZREG, sp)
@@ -68,8 +63,31 @@ NESTED(__clone3, SZREG, sp)
 	beqz	a0, L(error)	/* No NULL cl_args pointer.  */
 	beqz	a2, L(error)	/* No NULL function pointer.  */
 
+#if _MIPS_SIM == _ABIO32
+	/* Both stack and stack_size on clone_args are defined as uint64_t, and
+	   there is no need to handle values larger than to 32 bits for o32.  */
+# if __BYTE_ORDER == __BIG_ENDIAN
+#  define CL_STACKPOINTER_OFFSET  44
+#  define CL_STACKSIZE_OFFSET     52
+# else
+#  define CL_STACKPOINTER_OFFSET  40
+#  define CL_STACKSIZE_OFFSET     48
+# endif
+
+	/* For o32 we need to setup a minimal stack frame to allow cprestore
+	   on __thread_start_clone3.  Also there is no guarantee by kABI that
+	   $8 will be preserved after syscall execution (so we need to save it
+	   on the provided stack).  */
+	lw	t0, CL_STACKPOINTER_OFFSET(a0)	/* Load the stack pointer.  */
+	lw	t1, CL_STACKSIZE_OFFSET(a0)	/* Load the stack_size.  */
+	addiu	t1, -32				/* Update the stack size.  */
+	addu	t2, t1, t0			/* Calculate the thread stack.  */
+	sw	a3, 0(t2)			/* Save argument pointer.  */
+	sw	t1, CL_STACKSIZE_OFFSET(a0)	/* Save the new stack size.  */
+#else
 	move	$8, a3		/* a3 is set to 0/1 for syscall success/error
 				   while a4/$8 is returned unmodified.  */
+#endif
 
 	/* Do the system call, the kernel expects:
 	   v0: system call number
@@ -125,7 +143,11 @@ L(thread_start_clone3):
 
 	/* Restore the arg for user's function.  */
 	move		t9, a2		/* Function pointer.  */
+#if _MIPS_SIM == _ABIO32
+	PTR_L		a0, 0(sp)
+#else
 	move		a0, $8		/* Argument pointer.  */
+#endif
 
 	/* Call the user's function.  */
 	jal		t9
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
index 04ca9debf0..6993fb6b29 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h
@@ -2,10 +2,9 @@
 
 #if defined __powerpc64__
 # define __WORDSIZE	64
-# define __WORDSIZE_TIME64_COMPAT32	1
 #else
 # define __WORDSIZE	32
-# define __WORDSIZE_TIME64_COMPAT32	0
 # define __WORDSIZE32_SIZE_ULONG	0
 # define __WORDSIZE32_PTRDIFF_LONG	0
 #endif
+#define __WORDSIZE_TIME64_COMPAT32	1
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
index 8e8a5ec2ea..a947d62db6 100644
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[])
      which are set by __tcb_parse_hwcap_and_convert_at_platform.  */
   cpu_features->hwcap = hwcaps[0];
   cpu_features->hwcap2 = hwcaps[1];
+  cpu_features->hwcap3 = hwcaps[2];
+  cpu_features->hwcap4 = hwcaps[3];
   /* Default is to use aligned memory access on optimized function unless
      tunables is enable, since for this case user can explicit disable
      unaligned optimizations.  */
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
index 1294f0b601..e9eb6a13c8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
@@ -26,6 +26,8 @@ struct cpu_features
   bool use_cached_memopt;
   unsigned long int hwcap;
   unsigned long int hwcap2;
+  unsigned long int hwcap3;
+  unsigned long int hwcap4;
 };
 
 static const char hwcap_names[] = {
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index a4705daf1c..6a00cd88cd 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv,
       case AT_HWCAP2:
 	_dl_hwcap2 = (unsigned long int) av->a_un.a_val;
 	break;
+      case AT_HWCAP3:
+	_dl_hwcap3 = (unsigned long int) av->a_un.a_val;
+	break;
+      case AT_HWCAP4:
+	_dl_hwcap4 = (unsigned long int) av->a_un.a_val;
+	break;
       case AT_PLATFORM:
 	_dl_platform = (void *) av->a_un.a_val;
 	break;
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
index 4c882ef2ee..a7a863242c 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S
@@ -53,6 +53,7 @@ ENTRY(__clone)
 	br	%r14
 error:
 	lhi	%r2,-EINVAL
+	lm	%r6,%r7,24(%r15)	/* Load registers.  */
 	j	SYSCALL_ERROR_LABEL
 PSEUDO_END (__clone)
 
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
index 4eb104be71..c552a6b8de 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S
@@ -54,6 +54,7 @@ ENTRY(__clone)
 	br	%r14
 error:
 	lghi	%r2,-EINVAL
+	lmg	%r6,%r7,48(%r15)	/* Restore registers.  */
 	jg	SYSCALL_ERROR_LABEL
 PSEUDO_END (__clone)
 
diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c
index dfb884568d..72a3360550 100644
--- a/sysdeps/unix/sysv/linux/sched_getcpu.c
+++ b/sysdeps/unix/sysv/linux/sched_getcpu.c
@@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void)
   return r == -1 ? r : cpu;
 }
 
-#ifdef RSEQ_SIG
 int
 sched_getcpu (void)
 {
   int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id);
   return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu ();
 }
-#else /* RSEQ_SIG */
-int
-sched_getcpu (void)
-{
-  return vsyscall_sched_getcpu ();
-}
-#endif /* RSEQ_SIG */
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
index 7562875ee2..ea103e5970 100644
--- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
+++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h
@@ -2,10 +2,9 @@
 
 #if defined __arch64__ || defined __sparcv9
 # define __WORDSIZE	64
-# define __WORDSIZE_TIME64_COMPAT32	1
 #else
 # define __WORDSIZE	32
 # define __WORDSIZE32_SIZE_ULONG	0
 # define __WORDSIZE32_PTRDIFF_LONG	0
-# define __WORDSIZE_TIME64_COMPAT32	0
 #endif
+#define __WORDSIZE_TIME64_COMPAT32	1
diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c
index c6e5e66289..778d1e3354 100644
--- a/sysdeps/unix/sysv/linux/timespec_get.c
+++ b/sysdeps/unix/sysv/linux/timespec_get.c
@@ -5,7 +5,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c
index 5acebe2a2c..2eef9e512c 100644
--- a/sysdeps/unix/sysv/linux/timespec_getres.c
+++ b/sysdeps/unix/sysv/linux/timespec_getres.c
@@ -5,7 +5,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c
index 470676ab2b..2bc7124983 100644
--- a/sysdeps/unix/sysv/linux/tst-clone.c
+++ b/sysdeps/unix/sysv/linux/tst-clone.c
@@ -16,12 +16,16 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-/* BZ #2386 */
+/* BZ #2386, BZ #31402 */
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sched.h>
+#include <stackinfo.h>  /* For _STACK_GROWS_{UP,DOWN}.  */
+#include <support/check.h>
+
+volatile unsigned v = 0xdeadbeef;
 
 int child_fn(void *arg)
 {
@@ -30,22 +34,67 @@ int child_fn(void *arg)
 }
 
 static int
-do_test (void)
+__attribute__((noinline))
+do_clone (int (*fn)(void *), void *stack)
 {
   int result;
+  unsigned int a = v;
+  unsigned int b = v;
+  unsigned int c = v;
+  unsigned int d = v;
+  unsigned int e = v;
+  unsigned int f = v;
+  unsigned int g = v;
+  unsigned int h = v;
+  unsigned int i = v;
+  unsigned int j = v;
+  unsigned int k = v;
+  unsigned int l = v;
+  unsigned int m = v;
+  unsigned int n = v;
+  unsigned int o = v;
+
+  result = clone (fn, stack, 0, NULL);
+
+  /* Check that clone does not clobber call-saved registers.  */
+  TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v
+	       && g == v && h == v && i == v && j == v && k == v && l == v
+	       && m == v && n == v && o == v);
+
+  return result;
+}
+
+static void
+__attribute__((noinline))
+do_test_single (int (*fn)(void *), void *stack)
+{
+  printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack);
+  errno = 0;
+
+  int result = do_clone (fn, stack);
+
+  TEST_COMPARE (errno, EINVAL);
+  TEST_COMPARE (result, -1);
+}
 
-  result = clone (child_fn, NULL, 0, NULL);
+static int
+do_test (void)
+{
+  char st[128 * 1024] __attribute__ ((aligned));
+  void *stack = NULL;
+#if _STACK_GROWS_DOWN
+  stack = st + sizeof (st);
+#elif _STACK_GROWS_UP
+  stack = st;
+#else
+# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
+#endif
 
-  if (errno != EINVAL || result != -1)
-    {
-      printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n",
-              result, errno, EINVAL);
-      return 1;
-    }
+  do_test_single (child_fn, NULL);
+  do_test_single (NULL, stack);
+  do_test_single (NULL, NULL);
 
-  puts ("All OK");
   return 0;
 }
 
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
index 4223feb95f..9a1e7aa646 100644
--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
 $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
 	cp $< $@
 endif
+
+ifeq (yes,$(have-mamx-tile))
+tests += \
+  tst-gnu2-tls2-amx \
+# tests
+
+modules-names += \
+  tst-gnu2-tls2-amx-mod0 \
+  tst-gnu2-tls2-amx-mod1 \
+  tst-gnu2-tls2-amx-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-amx.out: \
+  $(objpfx)tst-gnu2-tls2-amx-mod0.so \
+  $(objpfx)tst-gnu2-tls2-amx-mod1.so \
+  $(objpfx)tst-gnu2-tls2-amx-mod2.so
+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
+
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+endif
+
 endif # $(subdir) == elf
 
 ifneq ($(enable-cet),no)
diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
index 2f511321ad..ef4631bf4b 100644
--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
+++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
@@ -20,3 +20,8 @@
 # define ARCH_SHSTK_SHSTK		0x1
 # define ARCH_SHSTK_WRSS		0x2
 #endif
+
+#ifndef ARCH_GET_XCOMP_PERM
+# define ARCH_GET_XCOMP_PERM		0x1022
+# define ARCH_REQ_XCOMP_PERM		0x1023
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
new file mode 100644
index 0000000000..2e0c7b91b7
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
new file mode 100644
index 0000000000..b8a8ccf1c1
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod1.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
new file mode 100644
index 0000000000..cdf4a8f363
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
new file mode 100644
index 0000000000..ae4dd82556
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
@@ -0,0 +1,83 @@
+/* Test TLSDESC relocation with AMX.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+#include <asm/prctl.h>
+#include <support/check.h>
+#include "tst-gnu2-tls2-amx.h"
+
+extern int arch_prctl (int, ...);
+
+#define X86_XSTATE_TILECFG_ID	17
+#define X86_XSTATE_TILEDATA_ID	18
+
+/* Initialize tile config.  */
+__attribute__ ((noinline, noclone))
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+  int i;
+  tileinfo->palette_id = 1;
+  tileinfo->start_row = 0;
+
+  tileinfo->colsb[0] = MAX_ROWS;
+  tileinfo->rows[0] = MAX_ROWS;
+
+  for (i = 1; i < 4; ++i)
+  {
+    tileinfo->colsb[i] = MAX_COLS;
+    tileinfo->rows[i] = MAX_ROWS;
+  }
+
+  _tile_loadconfig (tileinfo);
+}
+
+static bool
+enable_amx (void)
+{
+  uint64_t bitmask;
+  if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
+    return false;
+
+  if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
+    return false;
+
+  if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
+    return false;
+
+  /* Load tile configuration.  */
+  __tilecfg tile_data = { 0 };
+  init_tile_config (&tile_data);
+
+  return true;
+}
+
+/* An architecture can define it to clobber caller-saved registers in
+   malloc below to verify that the implicit TLSDESC call won't change
+   caller-saved registers.  */
+static void
+clear_tile_register (void)
+{
+  _tile_zero (2);
+}
+
+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
+#define IS_SUPPORTED()	enable_amx ()
+#define PREPARE_MALLOC() clear_tile_register ()
+
+#include <elf/tst-gnu2-tls2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
new file mode 100644
index 0000000000..1845a3caba
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
@@ -0,0 +1,63 @@
+/* Test TLSDESC relocation with AMX.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+#include <string.h>
+#include <x86intrin.h>
+#include <support/check.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+  uint8_t palette_id;
+  uint8_t start_row;
+  uint8_t reserved_0[14];
+  uint16_t colsb[16];
+  uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize int8_t buffer */
+static inline void
+init_buffer (int8_t *buf, int8_t value)
+{
+  int rows, colsb, i, j;
+  rows  = MAX_ROWS;
+  colsb = MAX_COLS;
+
+  for (i = 0; i < rows; i++)
+    for (j = 0; j < colsb; j++)
+      buf[i * colsb + j] = value;
+}
+
+#define BEFORE_TLSDESC_CALL()					\
+  int8_t src[MAX];						\
+  int8_t res[MAX];						\
+  /* Initialize src with data  */				\
+  init_buffer (src, 2);						\
+  /* Load tile rows from memory.  */				\
+  _tile_loadd (2, src, STRIDE);
+
+#define AFTER_TLSDESC_CALL()					\
+  /* Store the tile data to memory.  */				\
+  _tile_stored (2, res, STRIDE);				\
+  _tile_release ();						\
+  TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 4d50b327b5..5311b594af 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -1,5 +1,5 @@
 ifeq ($(subdir),csu)
-gen-as-const-headers += cpu-features-offsets.sym
+gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym
 endif
 
 ifeq ($(subdir),elf)
@@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags)
 CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector)
 
 tests += \
-  tst-get-cpu-features \
-  tst-get-cpu-features-static \
   tst-cpu-features-cpuinfo \
   tst-cpu-features-cpuinfo-static \
   tst-cpu-features-supports \
   tst-cpu-features-supports-static \
+  tst-get-cpu-features \
+  tst-get-cpu-features-static \
   tst-hwcap-tunables \
 # tests
 tests-static += \
-  tst-get-cpu-features-static \
   tst-cpu-features-cpuinfo-static \
   tst-cpu-features-supports-static \
+  tst-get-cpu-features-static \
 # tests-static
 ifeq (yes,$(have-ifunc))
 ifeq (yes,$(have-gcc-ifunc))
@@ -86,6 +86,11 @@ endif
 tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F
 tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV)
 tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
+
+CFLAGS-tst-gnu2-tls2.c += -msse
+CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell
+CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell
+CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell
 endif
 
 ifeq ($(subdir),math)
diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h
index 70f652bca1..3f40aa76f9 100644
--- a/sysdeps/x86/bits/wordsize.h
+++ b/sysdeps/x86/bits/wordsize.h
@@ -8,10 +8,9 @@
 #define __WORDSIZE32_PTRDIFF_LONG	0
 #endif
 
+#define __WORDSIZE_TIME64_COMPAT32 1
+
 #ifdef __x86_64__
-# define __WORDSIZE_TIME64_COMPAT32	1
 /* Both x86-64 and x32 use the 64-bit system call interface.  */
 # define __SYSCALL_WORDSIZE		64
-#else
-# define __WORDSIZE_TIME64_COMPAT32	0
 #endif
diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure
index 1f4c2d67fd..d28d9bcb29 100644
--- a/sysdeps/x86/configure
+++ b/sysdeps/x86/configure
@@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; }
   if test $libc_cv_have_x86_lahf_sahf = yes; then
     printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h
 
+    ISAFLAG="-DHAVE_X86_LAHF_SAHF"
   fi
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5
 printf %s "checking for MOVBE instruction support... " >&6; }
@@ -120,8 +121,47 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; }
   if test $libc_cv_have_x86_movbe = yes; then
     printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h
 
+    ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
   fi
+
+  # Check for ISA level support.
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5
+printf %s "checking for ISA level support... " >&6; }
+if test ${libc_cv_have_x86_isa_level+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat > conftest.c <<EOF
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL >= 4
+libc_cv_have_x86_isa_level=4
+#elif MINIMUM_X86_ISA_LEVEL == 3
+libc_cv_have_x86_isa_level=3
+#elif MINIMUM_X86_ISA_LEVEL == 2
+libc_cv_have_x86_isa_level=2
+#else
+libc_cv_have_x86_isa_level=baseline
+#endif
+EOF
+		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
+		 rm -rf conftest*
 fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5
+printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; }
+else
+  libc_cv_have_x86_isa_level=baseline
+fi
+if test $libc_cv_have_x86_isa_level = baseline; then
+  printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h
+
+else
+  printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL $libc_cv_have_x86_isa_level" >>confdefs.h
+
+fi
+config_vars="$config_vars
+have-x86-isa-level = $libc_cv_have_x86_isa_level"
+config_vars="$config_vars
+x86-isa-level-3-or-above = 3 4"
 config_vars="$config_vars
 enable-x86-isa-level = $libc_cv_include_x86_isa_level"
 
diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac
index 437a50623b..5b0acd03d2 100644
--- a/sysdeps/x86/configure.ac
+++ b/sysdeps/x86/configure.ac
@@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then
     fi])
   if test $libc_cv_have_x86_lahf_sahf = yes; then
     AC_DEFINE(HAVE_X86_LAHF_SAHF)
+    ISAFLAG="-DHAVE_X86_LAHF_SAHF"
   fi
   AC_CACHE_CHECK([for MOVBE instruction support],
 		 libc_cv_have_x86_movbe, [dnl
@@ -81,8 +82,36 @@ if test $libc_cv_include_x86_isa_level = yes; then
     fi])
   if test $libc_cv_have_x86_movbe = yes; then
     AC_DEFINE(HAVE_X86_MOVBE)
+    ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE"
   fi
+
+  # Check for ISA level support.
+  AC_CACHE_CHECK([for ISA level support],
+		 libc_cv_have_x86_isa_level, [dnl
+cat > conftest.c <<EOF
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL >= 4
+libc_cv_have_x86_isa_level=4
+#elif MINIMUM_X86_ISA_LEVEL == 3
+libc_cv_have_x86_isa_level=3
+#elif MINIMUM_X86_ISA_LEVEL == 2
+libc_cv_have_x86_isa_level=2
+#else
+libc_cv_have_x86_isa_level=baseline
+#endif
+EOF
+		 eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level`
+		 rm -rf conftest*])
+else
+  libc_cv_have_x86_isa_level=baseline
+fi
+if test $libc_cv_have_x86_isa_level = baseline; then
+  AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1)
+else
+  AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level)
 fi
+LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level])
+LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4])
 LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level])
 
 dnl Static PIE is supported.
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index 6a8fd29813..21fc88d651 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -3,3 +3,4 @@
 #include <ldsodefs.h>
 
 XSAVE_STATE_SIZE_OFFSET	offsetof (struct cpu_features, xsave_state_size)
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 25e6622a79..3d7c2819d7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -18,6 +18,7 @@
 
 #include <dl-hwcap.h>
 #include <libc-pointer-arith.h>
+#include <isa-level.h>
 #include <get-isa-level.h>
 #include <cacheinfo.h>
 #include <dl-cacheinfo.h>
@@ -27,8 +28,13 @@
 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
   attribute_hidden;
 
-#if defined SHARED && defined __x86_64__
-# include <dl-plt-rewrite.h>
+#if defined SHARED
+extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
+extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
+extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
+
+# ifdef __x86_64__
+#  include <dl-plt-rewrite.h>
 
 static void
 TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
@@ -47,6 +53,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
 		 : plt_rewrite_jmp);
     }
 }
+# else
+extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
+# endif
+#endif
+
+#ifdef __x86_64__
+extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
+extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
+extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
 #endif
 
 #ifdef __LP64__
@@ -293,8 +308,10 @@ update_active (struct cpu_features *cpu_features)
 	  __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
 	  if (ebx != 0)
 	    {
+	      /* NB: On AMX capable processors, ebx always includes AMX
+		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -306,6 +323,11 @@ update_active (struct cpu_features *cpu_features)
 		{
 		  unsigned int xstate_comp_offsets[32];
 		  unsigned int xstate_comp_sizes[32];
+#ifdef __x86_64__
+		  unsigned int xstate_amx_comp_offsets[32];
+		  unsigned int xstate_amx_comp_sizes[32];
+		  unsigned int amx_ecx;
+#endif
 		  unsigned int i;
 
 		  xstate_comp_offsets[0] = 0;
@@ -313,16 +335,39 @@ update_active (struct cpu_features *cpu_features)
 		  xstate_comp_offsets[2] = 576;
 		  xstate_comp_sizes[0] = 160;
 		  xstate_comp_sizes[1] = 256;
+#ifdef __x86_64__
+		  xstate_amx_comp_offsets[0] = 0;
+		  xstate_amx_comp_offsets[1] = 160;
+		  xstate_amx_comp_offsets[2] = 576;
+		  xstate_amx_comp_sizes[0] = 160;
+		  xstate_amx_comp_sizes[1] = 256;
+#endif
 
 		  for (i = 2; i < 32; i++)
 		    {
-		      if ((STATE_SAVE_MASK & (1 << i)) != 0)
+		      if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
 			{
 			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
-			  xstate_comp_sizes[i] = eax;
+#ifdef __x86_64__
+			  /* Include this in xsave_state_full_size.  */
+			  amx_ecx = ecx;
+			  xstate_amx_comp_sizes[i] = eax;
+			  if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
+			    {
+			      /* Exclude this from xsave_state_size.  */
+			      ecx = 0;
+			      xstate_comp_sizes[i] = 0;
+			    }
+			  else
+#endif
+			    xstate_comp_sizes[i] = eax;
 			}
 		      else
 			{
+#ifdef __x86_64__
+			  amx_ecx = 0;
+			  xstate_amx_comp_sizes[i] = 0;
+#endif
 			  ecx = 0;
 			  xstate_comp_sizes[i] = 0;
 			}
@@ -335,6 +380,15 @@ update_active (struct cpu_features *cpu_features)
 			  if ((ecx & (1 << 1)) != 0)
 			    xstate_comp_offsets[i]
 			      = ALIGN_UP (xstate_comp_offsets[i], 64);
+#ifdef __x86_64__
+			  xstate_amx_comp_offsets[i]
+			    = (xstate_amx_comp_offsets[i - 1]
+			       + xstate_amx_comp_sizes[i - 1]);
+			  if ((amx_ecx & (1 << 1)) != 0)
+			    xstate_amx_comp_offsets[i]
+			      = ALIGN_UP (xstate_amx_comp_offsets[i],
+					  64);
+#endif
 			}
 		    }
 
@@ -343,8 +397,23 @@ update_active (struct cpu_features *cpu_features)
 		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
 		  if (size)
 		    {
+#ifdef __x86_64__
+		      unsigned int amx_size
+			= (xstate_amx_comp_offsets[31]
+			   + xstate_amx_comp_sizes[31]);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
+				    64);
+		      /* Set xsave_state_full_size to the compact AMX
+			 state size for XSAVEC.  NB: xsave_state_full_size
+			 is only used in _dl_tlsdesc_dynamic_xsave and
+			 _dl_tlsdesc_dynamic_xsavec.  */
+		      cpu_features->xsave_state_full_size = amx_size;
+#endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
@@ -1130,6 +1199,45 @@ no_cpuid:
 	       TUNABLE_CALLBACK (set_x86_shstk));
 #endif
 
+  if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
+      || (GLRO(dl_x86_cpu_features).xsave_state_size != 0))
+    {
+      if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
+	{
+#ifdef __x86_64__
+	  GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
+#endif
+#ifdef SHARED
+	  GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
+#endif
+	}
+      else
+	{
+#ifdef __x86_64__
+	  GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
+#endif
+#ifdef SHARED
+	  GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
+#endif
+	}
+    }
+  else
+    {
+#ifdef __x86_64__
+      GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
+# ifdef SHARED
+      GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+# endif
+#else
+# ifdef SHARED
+      if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
+	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
+      else
+	GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
+# endif
+#endif
+    }
+
 #ifdef SHARED
 # ifdef __x86_64__
   TUNABLE_GET (plt_rewrite, tunable_val_t *,
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index d5101615e3..5a98f70364 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   long int data = -1;
   long int shared = -1;
   long int shared_per_thread = -1;
-  long int core = -1;
   unsigned int threads = 0;
   unsigned long int level1_icache_size = -1;
   unsigned long int level1_icache_linesize = -1;
@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   if (cpu_features->basic.kind == arch_kind_intel)
     {
       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
-      core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
       shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
       shared_per_thread = shared;
 
@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
 	= handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
       level1_dcache_linesize
 	= handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
-      level2_cache_size = core;
+      level2_cache_size
+	= handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
       level2_cache_assoc
 	= handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
       level2_cache_linesize
@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
       level4_cache_size
 	= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
 
-      get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+      get_common_cache_info (&shared, &shared_per_thread, &threads,
+			     level2_cache_size);
     }
   else if (cpu_features->basic.kind == arch_kind_zhaoxin)
     {
       data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
-      core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
       shared_per_thread = shared;
 
@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
       level1_dcache_size = data;
       level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
       level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
-      level2_cache_size = core;
+      level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
       level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
       level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
       level3_cache_size = shared;
       level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
       level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
 
-      get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+      get_common_cache_info (&shared, &shared_per_thread, &threads,
+			     level2_cache_size);
     }
   else if (cpu_features->basic.kind == arch_kind_amd)
     {
       data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
-      core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
 
       level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
       level1_dcache_size = data;
       level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
       level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
-      level2_cache_size = core;
+      level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
       level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
       level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
       level3_cache_size = shared;
@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
       if (shared <= 0)
         {
            /* No shared L3 cache.  All we have is the L2 cache.  */
-           shared = core;
+           shared = level2_cache_size;
         }
       else if (cpu_features->basic.family < 0x17)
         {
            /* Account for exclusive L2 and L3 caches.  */
-           shared += core;
+           shared += level2_cache_size;
         }
 
       shared_per_thread = shared;
@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
   if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
     rep_movsb_threshold = 2112;
 
+   /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
+      cases slower than the vectorized path (and for some alignments,
+      it is really slow, check BZ #30994).  */
+  if (cpu_features->basic.kind == arch_kind_amd)
+    rep_movsb_threshold = non_temporal_threshold;
+
   /* The default threshold to use Enhanced REP STOSB.  */
   unsigned long int rep_stosb_threshold = 2048;
 
@@ -1016,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
      minimum value is fixed.  */
   rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
 				     long int, NULL);
+  if (cpu_features->basic.kind == arch_kind_amd
+      && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
+    /* For AMD Zen3+ architecture, the performance of the vectorized loop is
+       slightly better than ERMS.  */
+    rep_stosb_threshold = SIZE_MAX;
 
   TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
   TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
@@ -1028,16 +1038,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
 			   SIZE_MAX);
 
   unsigned long int rep_movsb_stop_threshold;
-  /* ERMS feature is implemented from AMD Zen3 architecture and it is
-     performing poorly for data above L2 cache size. Henceforth, adding
-     an upper bound threshold parameter to limit the usage of Enhanced
-     REP MOVSB operations and setting its value to L2 cache size.  */
-  if (cpu_features->basic.kind == arch_kind_amd)
-    rep_movsb_stop_threshold = core;
   /* Setting the upper bound of ERMS to the computed value of
-     non-temporal threshold for architectures other than AMD.  */
-  else
-    rep_movsb_stop_threshold = non_temporal_threshold;
+     non-temporal threshold for all architectures.  */
+  rep_movsb_stop_threshold = non_temporal_threshold;
 
   cpu_features->data_cache_size = data;
   cpu_features->shared_cache_size = shared;
diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c
index ee957b4d70..5920d4b320 100644
--- a/sysdeps/x86/dl-procinfo.c
+++ b/sysdeps/x86/dl-procinfo.c
@@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9]
 #else
 ,
 #endif
+
+#if defined SHARED && !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL
+  ._dl_x86_tlsdesc_dynamic
+# else
+PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic
+# endif
+# ifndef PROCINFO_DECL
+= NULL
+# endif
+# ifdef PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym
similarity index 89%
rename from sysdeps/x86_64/features-offsets.sym
rename to sysdeps/x86/features-offsets.sym
index 9e4be3393a..77e990c705 100644
--- a/sysdeps/x86_64/features-offsets.sym
+++ b/sysdeps/x86/features-offsets.sym
@@ -3,4 +3,6 @@
 #include <ldsodefs.h>
 
 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
+#ifdef __x86_64__
 RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
+#endif
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index b9bf3115b6..cd7bd27cf3 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -934,6 +934,8 @@ struct cpu_features
   /* The full state size for XSAVE when XSAVEC is disabled by
 
      GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
+
+     and the AMX state size when XSAVEC is available.
    */
   unsigned int xsave_state_full_size;
   /* Data cache size for use in memory and string routines, typically
diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h
index 11fe1ca90c..2c7f74212b 100644
--- a/sysdeps/x86/isa-level.h
+++ b/sysdeps/x86/isa-level.h
@@ -61,8 +61,10 @@
 # define __X86_ISA_V4 0
 #endif
 
-#define MINIMUM_X86_ISA_LEVEL                                                 \
+#ifndef MINIMUM_X86_ISA_LEVEL
+# define MINIMUM_X86_ISA_LEVEL                                                 \
   (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4)
+#endif
 
 /* Depending on the minimum ISA level, a feature check result can be a
    compile-time constant.. */
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 85d0a8c943..7359149e17 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -21,14 +21,118 @@
 
 #include <sysdeps/generic/sysdep.h>
 
+/* The extended state feature IDs in the state component bitmap.  */
+#define X86_XSTATE_X87_ID	0
+#define X86_XSTATE_SSE_ID	1
+#define X86_XSTATE_AVX_ID	2
+#define X86_XSTATE_BNDREGS_ID	3
+#define X86_XSTATE_BNDCFG_ID	4
+#define X86_XSTATE_K_ID		5
+#define X86_XSTATE_ZMM_H_ID	6
+#define X86_XSTATE_ZMM_ID	7
+#define X86_XSTATE_PKRU_ID	9
+#define X86_XSTATE_TILECFG_ID	17
+#define X86_XSTATE_TILEDATA_ID	18
+#define X86_XSTATE_APX_F_ID	19
+
+#ifdef __x86_64__
 /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
-   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
+   because MOV, instead of PUSH, is used to save registers onto stack.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDI
+   |                  |<- RSI
+   |                  |<- RDX
+   |                  |<- RCX
+   |                  |<- RAX
+   +==================+<- RSP aligned at 64 bytes
+
+ */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
+   RDI, RSI and RBX values on stack by xsave.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- RDI saved in the red zone
+   |                  |<- RSI saved in the red zone
+   |                  |<- RBX saved in the red zone
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R11
+   |                  |<- R10
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDX
+   |                  |<- RCX
+   +==================+<- RSP aligned at 64 bytes
+
+   Define the total register save area size for all integer registers by
+   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
+   stack without adjusting stack pointer first, using the red-zone.  */
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
+   registers are mutually exclusive.  */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID) 	\
+   | (1 << X86_XSTATE_ZMM_ID)		\
+   | (1 << X86_XSTATE_APX_F_ID))
+
+/* AMX state mask.  */
+# define AMX_STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
+
+/* States to be included in xsave_state_full_size.  */
+# define FULL_STATE_SAVE_MASK		\
+  (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
+   uses PUSH to save registers onto stack, use 0 here.  */
+# define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers.   */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID))
+
+/* States to be included in xsave_state_size.  */
+# define FULL_STATE_SAVE_MASK		STATE_SAVE_MASK
+#endif
 
-/* Save SSE, AVX, AVX512, mask and bound registers.  */
-#define STATE_SAVE_MASK \
-  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
+   Compiler assumes that all registers, including AMX and x87 FPU
+   stack registers, are unchanged after CALL, except for EFLAGS and
+   RAX/EAX.  */
+#define TLSDESC_CALL_STATE_SAVE_MASK	\
+  (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
 
 /* Constants for bits in __x86_string_control:  */
 
diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c
new file mode 100644
index 0000000000..de900a423b
--- /dev/null
+++ b/sysdeps/x86/tst-gnu2-tls2.c
@@ -0,0 +1,20 @@
+#ifndef __x86_64__
+#include <sys/platform/x86.h>
+
+#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2)
+#endif
+
+/* Clear XMM0...XMM7  */
+#define PREPARE_MALLOC()				\
+{							\
+  asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" );	\
+  asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" );	\
+  asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" );	\
+  asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" );	\
+  asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" );	\
+  asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" );	\
+  asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" );	\
+  asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" );	\
+}
+
+#include <elf/tst-gnu2-tls2.c>
diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h
new file mode 100644
index 0000000000..8f21ebe1b6
--- /dev/null
+++ b/sysdeps/x86/utmp-size.h
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 90f4ecfd26..0ede447405 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt
 endif
 
 ifeq ($(subdir),csu)
-gen-as-const-headers += features-offsets.sym link-defines.sym
+gen-as-const-headers += link-defines.sym
 endif
 
 ifeq ($(subdir),gmon)
@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
 $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
 endif
 
+test-internal-extras += tst-gnu2-tls2mod1
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
@@ -250,6 +252,10 @@ sysdep-dl-routines += dl-cet
 
 tests += \
   tst-cet-legacy-1 \
+  tst-cet-legacy-10 \
+  tst-cet-legacy-10-static \
+  tst-cet-legacy-10a \
+  tst-cet-legacy-10a-static \
   tst-cet-legacy-1a \
   tst-cet-legacy-2 \
   tst-cet-legacy-2a \
@@ -261,15 +267,11 @@ tests += \
   tst-cet-legacy-8 \
   tst-cet-legacy-9 \
   tst-cet-legacy-9-static \
-  tst-cet-legacy-10 \
-  tst-cet-legacy-10-static \
-  tst-cet-legacy-10a \
-  tst-cet-legacy-10a-static \
 # tests
 tests-static += \
-  tst-cet-legacy-9-static \
   tst-cet-legacy-10-static \
   tst-cet-legacy-10a-static \
+  tst-cet-legacy-9-static \
 # tests-static
 tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd)
 
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index 418cc4a9b8..04a534fa12 100755
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -134,6 +134,34 @@ fi
 config_vars="$config_vars
 enable-cet = $enable_cet"
 
+# Check if -mamx-tile works properly.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
+printf %s "checking whether -mamx-tile works properly... " >&6; }
+if test ${libc_cv_x86_have_amx_tile+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+	       libc_cv_x86_have_amx_tile=no
+	       if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+		 if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+		   libc_cv_x86_have_amx_tile=yes
+	         fi
+	       fi
+	       rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
+config_vars="$config_vars
+have-mamx-tile = $libc_cv_x86_have_amx_tile"
+
 test -n "$critic_missing" && as_fn_error $? "
 *** $critic_missing" "$LINENO" 5
 
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index d1f803c02e..c714c47351 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
 fi
 LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
 
+# Check if -mamx-tile works properly.
+AC_CACHE_CHECK(whether -mamx-tile works properly,
+	       libc_cv_x86_have_amx_tile, [dnl
+cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+	       libc_cv_x86_have_amx_tile=no
+	       if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
+		 if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+		   libc_cv_x86_have_amx_tile=yes
+	         fi
+	       fi
+	       rm -rf conftest*])
+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
+
 test -n "$critic_missing" && AC_MSG_ERROR([
 *** $critic_missing])
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 6d605d0d32..ff5d45f7cb 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 			   int lazy, int profile)
 {
   Elf64_Addr *got;
-  extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
-  extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
-  extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
       /* Identify this shared object.  */
       *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
 
-      const struct cpu_features* cpu_features = __get_cpu_features ();
-
 #ifdef SHARED
       /* The got[2] entry contains the address of a function which gets
 	 called to get the address of a so far unresolved function and
@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 end in this function.  */
       if (__glibc_unlikely (profile))
 	{
+	  const struct cpu_features* cpu_features = __get_cpu_features ();
 	  if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
 	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
 	  else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
 	     the resolved address.  */
-	  if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
-	      || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
-	    *(ElfW(Addr) *) (got + 2)
-	      = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
-		 ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
-		 : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
-	  else
-	    *(ElfW(Addr) *) (got + 2)
-	      = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
+	  *(ElfW(Addr) *) (got + 2)
+	    = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve);
 	}
     }
 
@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n",
 		  {
 		    td->arg = _dl_make_tlsdesc_dynamic
 		      (sym_map, sym->st_value + reloc->r_addend);
-		    td->entry = _dl_tlsdesc_dynamic;
+		    td->entry = GLRO(dl_x86_tlsdesc_dynamic);
 		  }
 		else
 #  endif
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
index 4d1d790fbb..06637a8154 100644
--- a/sysdeps/x86_64/dl-procinfo.c
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -41,5 +41,21 @@
 
 #include <sysdeps/x86/dl-procinfo.c>
 
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+  ._dl_x86_64_runtime_resolve
+# else
+PROCINFO_CLASS void * _dl_x86_64_runtime_resolve
+# endif
+# ifndef PROCINFO_DECL
+= NULL
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
 #undef PROCINFO_DECL
 #undef PROCINFO_CLASS
diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
new file mode 100644
index 0000000000..9f02cfc3eb
--- /dev/null
+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
@@ -0,0 +1,166 @@
+/* Thread-local storage handling in the ELF dynamic linker.  x86_64 version.
+   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef SECTION
+# define SECTION(p)	p
+#endif
+
+#undef REGISTER_SAVE_AREA
+#undef LOCAL_STORAGE_AREA
+#undef BASE
+
+#include "dl-trampoline-state.h"
+
+	.section SECTION(.text),"ax",@progbits
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,@function
+
+     /* %rax points to the TLS descriptor, such that 0(%rax) points to
+	_dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
+	tlsdesc_dynamic_arg object.  It must return in %rax the offset
+	between the thread pointer and the object denoted by the
+	argument, without clobbering any registers.
+
+	The assembly code that follows is a rendition of the following
+	C code, hand-optimized a little bit.
+
+ptrdiff_t
+_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
+{
+  struct tlsdesc_dynamic_arg *td = tdp->arg;
+  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+  if (__builtin_expect (td->gen_count <= dtv[0].counter
+			&& (dtv[td->tlsinfo.ti_module].pointer.val
+			    != TLS_DTV_UNALLOCATED),
+			1))
+    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
+      - __thread_pointer;
+
+  return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
+}
+*/
+	cfi_startproc
+	.align 16
+_dl_tlsdesc_dynamic:
+	_CET_ENDBR
+	/* Preserve call-clobbered registers that we modify.
+	   We need two scratch regs anyway.  */
+	movq	%rsi, -16(%rsp)
+	mov	%fs:DTV_OFFSET, %RSI_LP
+	movq	%rdi, -8(%rsp)
+	movq	TLSDESC_ARG(%rax), %rdi
+	movq	(%rsi), %rax
+	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
+	ja	2f
+	movq	TLSDESC_MODID(%rdi), %rax
+	salq	$4, %rax
+	movq	(%rax,%rsi), %rax
+	cmpq	$-1, %rax
+	je	2f
+	addq	TLSDESC_MODOFF(%rdi), %rax
+1:
+	movq	-16(%rsp), %rsi
+	sub	%fs:0, %RAX_LP
+	movq	-8(%rsp), %rdi
+	ret
+2:
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	movq	%rbx, -24(%rsp)
+	mov	%RSP_LP, %RBX_LP
+	cfi_def_cfa_register(%rbx)
+	and	$-STATE_SAVE_ALIGNMENT, %RSP_LP
+#endif
+#ifdef REGISTER_SAVE_AREA
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	/* STATE_SAVE_OFFSET has space for 8 integer registers.  But we
+	   need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus
+	   RBX above.  */
+	sub	$(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP
+# else
+	sub	$REGISTER_SAVE_AREA, %RSP_LP
+	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+# endif
+#else
+	/* Allocate stack space of the required size to save the state.  */
+	sub	_rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
+#endif
+	/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
+	   r10 and r11.  */
+	movq	%rcx, REGISTER_SAVE_RCX(%rsp)
+	movq	%rdx, REGISTER_SAVE_RDX(%rsp)
+	movq	%r8, REGISTER_SAVE_R8(%rsp)
+	movq	%r9, REGISTER_SAVE_R9(%rsp)
+	movq	%r10, REGISTER_SAVE_R10(%rsp)
+	movq	%r11, REGISTER_SAVE_R11(%rsp)
+#ifdef USE_FXSAVE
+	fxsave	STATE_SAVE_OFFSET(%rsp)
+#else
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	/* Clear the XSAVE Header.  */
+# ifdef USE_XSAVE
+	movq	%rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+# endif
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+	movq	%rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+# ifdef USE_XSAVE
+	xsave	STATE_SAVE_OFFSET(%rsp)
+# else
+	xsavec	STATE_SAVE_OFFSET(%rsp)
+# endif
+#endif
+	/* %rdi already points to the tlsinfo data structure.  */
+	call	HIDDEN_JUMPTARGET (__tls_get_addr)
+	# Get register content back.
+#ifdef USE_FXSAVE
+	fxrstor	STATE_SAVE_OFFSET(%rsp)
+#else
+	/* Save and retore __tls_get_addr return value stored in RAX.  */
+	mov	%RAX_LP, %RCX_LP
+	movl	$TLSDESC_CALL_STATE_SAVE_MASK, %eax
+	xorl	%edx, %edx
+	xrstor	STATE_SAVE_OFFSET(%rsp)
+	mov	%RCX_LP, %RAX_LP
+#endif
+	movq	REGISTER_SAVE_R11(%rsp), %r11
+	movq	REGISTER_SAVE_R10(%rsp), %r10
+	movq	REGISTER_SAVE_R9(%rsp), %r9
+	movq	REGISTER_SAVE_R8(%rsp), %r8
+	movq	REGISTER_SAVE_RDX(%rsp), %rdx
+	movq	REGISTER_SAVE_RCX(%rsp), %rcx
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+	mov	%RBX_LP, %RSP_LP
+	cfi_def_cfa_register(%rsp)
+	movq	-24(%rsp), %rbx
+	cfi_restore(%rbx)
+#else
+	add	$REGISTER_SAVE_AREA, %RSP_LP
+	cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA)
+#endif
+	jmp	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index f748af2ece..057a10862a 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -18,7 +18,20 @@
 
 #include <sysdep.h>
 #include <tls.h>
+#include <cpu-features-offsets.h>
+#include <features-offsets.h>
+#include <isa-level.h>
 #include "tlsdesc.h"
+#include "dl-trampoline-save.h"
+
+/* Area on stack to save and restore registers used for parameter
+   passing when calling _dl_tlsdesc_dynamic.  */
+#define REGISTER_SAVE_RCX	0
+#define REGISTER_SAVE_RDX	(REGISTER_SAVE_RCX + 8)
+#define REGISTER_SAVE_R8	(REGISTER_SAVE_RDX + 8)
+#define REGISTER_SAVE_R9	(REGISTER_SAVE_R8 + 8)
+#define REGISTER_SAVE_R10	(REGISTER_SAVE_R9 + 8)
+#define REGISTER_SAVE_R11	(REGISTER_SAVE_R10 + 8)
 
 	.text
 
@@ -67,80 +80,26 @@ _dl_tlsdesc_undefweak:
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 
 #ifdef SHARED
-	.hidden _dl_tlsdesc_dynamic
-	.global	_dl_tlsdesc_dynamic
-	.type	_dl_tlsdesc_dynamic,@function
-
-     /* %rax points to the TLS descriptor, such that 0(%rax) points to
-	_dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct
-	tlsdesc_dynamic_arg object.  It must return in %rax the offset
-	between the thread pointer and the object denoted by the
-	argument, without clobbering any registers.
-
-	The assembly code that follows is a rendition of the following
-	C code, hand-optimized a little bit.
-
-ptrdiff_t
-_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
-{
-  struct tlsdesc_dynamic_arg *td = tdp->arg;
-  dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
-  if (__builtin_expect (td->gen_count <= dtv[0].counter
-			&& (dtv[td->tlsinfo.ti_module].pointer.val
-			    != TLS_DTV_UNALLOCATED),
-			1))
-    return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset
-      - __thread_pointer;
-
-  return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer;
-}
-*/
-	cfi_startproc
-	.align 16
-_dl_tlsdesc_dynamic:
-	_CET_ENDBR
-	/* Preserve call-clobbered registers that we modify.
-	   We need two scratch regs anyway.  */
-	movq	%rsi, -16(%rsp)
-	mov	%fs:DTV_OFFSET, %RSI_LP
-	movq	%rdi, -8(%rsp)
-	movq	TLSDESC_ARG(%rax), %rdi
-	movq	(%rsi), %rax
-	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
-	ja	.Lslow
-	movq	TLSDESC_MODID(%rdi), %rax
-	salq	$4, %rax
-	movq	(%rax,%rsi), %rax
-	cmpq	$-1, %rax
-	je	.Lslow
-	addq	TLSDESC_MODOFF(%rdi), %rax
-.Lret:
-	movq	-16(%rsp), %rsi
-	sub	%fs:0, %RAX_LP
-	movq	-8(%rsp), %rdi
-	ret
-.Lslow:
-	/* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
-	   r10 and r11.  Also, align the stack, that's off by 8 bytes.	*/
-	subq	$72, %rsp
-	cfi_adjust_cfa_offset (72)
-	movq	%rdx, 8(%rsp)
-	movq	%rcx, 16(%rsp)
-	movq	%r8, 24(%rsp)
-	movq	%r9, 32(%rsp)
-	movq	%r10, 40(%rsp)
-	movq	%r11, 48(%rsp)
-	/* %rdi already points to the tlsinfo data structure.  */
-	call	HIDDEN_JUMPTARGET (__tls_get_addr)
-	movq	8(%rsp), %rdx
-	movq	16(%rsp), %rcx
-	movq	24(%rsp), %r8
-	movq	32(%rsp), %r9
-	movq	40(%rsp), %r10
-	movq	48(%rsp), %r11
-	addq	$72, %rsp
-	cfi_adjust_cfa_offset (-72)
-	jmp	.Lret
-	cfi_endproc
-	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+#  define USE_FXSAVE
+#  define STATE_SAVE_ALIGNMENT	16
+#  define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_fxsave
+#  include "dl-tlsdesc-dynamic.h"
+#  undef _dl_tlsdesc_dynamic
+#  undef USE_FXSAVE
+# endif
+
+# define USE_XSAVE
+# define STATE_SAVE_ALIGNMENT	64
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVE
+
+# define USE_XSAVEC
+# define STATE_SAVE_ALIGNMENT	64
+# define _dl_tlsdesc_dynamic	_dl_tlsdesc_dynamic_xsavec
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_XSAVEC
 #endif /* SHARED */
diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h
new file mode 100644
index 0000000000..84eac4a8ac
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline-save.h
@@ -0,0 +1,34 @@
+/* x86-64 PLT trampoline register save macros.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+   __tls_get_addr may be called with 8-byte stack alignment.  Although
+   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+   that stack will be always aligned at 16 bytes.  */
+# define DL_STACK_ALIGNMENT 8
+#endif
+
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+   stack to 16 bytes before calling _dl_fixup.  */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || 16 > DL_STACK_ALIGNMENT)
diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h
new file mode 100644
index 0000000000..575f120797
--- /dev/null
+++ b/sysdeps/x86_64/dl-trampoline-state.h
@@ -0,0 +1,51 @@
+/* x86-64 PLT dl-trampoline state macros.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if (STATE_SAVE_ALIGNMENT % 16) != 0
+# error STATE_SAVE_ALIGNMENT must be multiple of 16
+#endif
+
+#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
+#endif
+
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX.  */
+# define LOCAL_STORAGE_AREA	8
+# define BASE			rbx
+# ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers.  */
+#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
+#  if (REGISTER_SAVE_AREA % 16) != 0
+#   error REGISTER_SAVE_AREA must be multiple of 16
+#  endif
+# endif
+#else
+# ifndef USE_FXSAVE
+#  error USE_FXSAVE must be defined
+# endif
+/* Use fxsave to save XMM registers.  */
+# define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
+/* Local stack area before jumping to function address:  All saved
+   registers.  */
+# define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+# define BASE			rsp
+# if (REGISTER_SAVE_AREA % 16) != 8
+#  error REGISTER_SAVE_AREA must be odd multiple of 8
+# endif
+#endif
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index b2e7e0f69b..87c5137837 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -22,25 +22,7 @@
 #include <features-offsets.h>
 #include <link-defines.h>
 #include <isa-level.h>
-
-#ifndef DL_STACK_ALIGNMENT
-/* Due to GCC bug:
-
-   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
-
-   __tls_get_addr may be called with 8-byte stack alignment.  Although
-   this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
-   that stack will be always aligned at 16 bytes.  We use unaligned
-   16-byte move to load and store SSE registers, which has no penalty
-   on modern processors if stack is 16-byte aligned.  */
-# define DL_STACK_ALIGNMENT 8
-#endif
-
-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
-   stack to 16 bytes before calling _dl_fixup.  */
-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
-  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
-   || 16 > DL_STACK_ALIGNMENT)
+#include "dl-trampoline-save.h"
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_fixup.  */
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index f55c6ea040..d9ccfb40d4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -27,39 +27,7 @@
 # undef LOCAL_STORAGE_AREA
 # undef BASE
 
-# if (STATE_SAVE_ALIGNMENT % 16) != 0
-#  error STATE_SAVE_ALIGNMENT must be multiple of 16
-# endif
-
-# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
-#  error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT
-# endif
-
-# if DL_RUNTIME_RESOLVE_REALIGN_STACK
-/* Local stack area before jumping to function address: RBX.  */
-#  define LOCAL_STORAGE_AREA	8
-#  define BASE			rbx
-#  ifdef USE_FXSAVE
-/* Use fxsave to save XMM registers.  */
-#   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
-#   if (REGISTER_SAVE_AREA % 16) != 0
-#    error REGISTER_SAVE_AREA must be multiple of 16
-#   endif
-#  endif
-# else
-#  ifndef USE_FXSAVE
-#   error USE_FXSAVE must be defined
-#  endif
-/* Use fxsave to save XMM registers.  */
-#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
-/* Local stack area before jumping to function address:  All saved
-   registers.  */
-#  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
-#  define BASE			rsp
-#  if (REGISTER_SAVE_AREA % 16) != 8
-#   error REGISTER_SAVE_AREA must be odd multiple of 8
-#  endif
-# endif
+# include "dl-trampoline-state.h"
 
 	.globl _dl_runtime_resolve
 	.hidden _dl_runtime_resolve
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index ea81753b70..6ddd50240c 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,49 +1,4 @@
 ifeq ($(subdir),math)
-libm-sysdep_routines += \
-  s_ceil-c \
-  s_ceilf-c \
-  s_floor-c \
-  s_floorf-c \
-  s_rint-c \
-  s_rintf-c \
-  s_nearbyint-c \
-  s_nearbyintf-c \
-  s_roundeven-c \
-  s_roundevenf-c \
-  s_trunc-c \
-  s_truncf-c \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  s_ceil-sse4_1 \
-  s_ceilf-sse4_1 \
-  s_floor-sse4_1 \
-  s_floorf-sse4_1 \
-  s_nearbyint-sse4_1 \
-  s_nearbyintf-sse4_1 \
-  s_roundeven-sse4_1 \
-  s_roundevenf-sse4_1 \
-  s_rint-sse4_1 \
-  s_rintf-sse4_1 \
-  s_trunc-sse4_1 \
-  s_truncf-sse4_1 \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  e_asin-fma \
-  e_atan2-fma \
-  e_exp-fma \
-  e_log-fma \
-  e_log2-fma \
-  e_pow-fma \
-  s_atan-fma \
-  s_expm1-fma \
-  s_log1p-fma \
-  s_sin-fma \
-  s_sincos-fma \
-  s_tan-fma \
-# libm-sysdep_routines
-
 CFLAGS-e_asin-fma.c = -mfma -mavx2
 CFLAGS-e_atan2-fma.c = -mfma -mavx2
 CFLAGS-e_exp-fma.c = -mfma -mavx2
@@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 CFLAGS-s_sincos-fma.c = -mfma -mavx2
 
-libm-sysdep_routines += \
-  s_cosf-sse2 \
-  s_sincosf-sse2 \
-  s_sinf-sse2 \
-# libm-sysdep_routines
-
-libm-sysdep_routines += \
-  e_exp2f-fma \
-  e_expf-fma \
-  e_log2f-fma \
-  e_logf-fma \
-  e_powf-fma \
-  s_cosf-fma \
-  s_sincosf-fma \
-  s_sinf-fma \
-# libm-sysdep_routines
-
 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
 CFLAGS-e_expf-fma.c = -mfma -mavx2
 CFLAGS-e_log2f-fma.c = -mfma -mavx2
@@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2
 CFLAGS-s_cosf-fma.c = -mfma -mavx2
 CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
+# Check if ISA level is 3 or above.
+ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
+libm-sysdep_routines += \
+  s_ceil-avx \
+  s_ceilf-avx \
+  s_floor-avx \
+  s_floorf-avx \
+  s_nearbyint-avx \
+  s_nearbyintf-avx \
+  s_rint-avx \
+  s_rintf-avx \
+  s_roundeven-avx \
+  s_roundevenf-avx \
+  s_trunc-avx \
+  s_truncf-avx \
+# libm-sysdep_routines
+else
 libm-sysdep_routines += \
+  e_asin-fma \
+  e_asin-fma4 \
+  e_atan2-avx \
+  e_atan2-fma \
+  e_atan2-fma4 \
+  e_exp-avx \
+  e_exp-fma \
   e_exp-fma4 \
+  e_exp2f-fma \
+  e_expf-fma \
+  e_log-avx \
+  e_log-fma \
   e_log-fma4 \
+  e_log2-fma \
+  e_log2f-fma \
+  e_logf-fma \
+  e_pow-fma \
   e_pow-fma4 \
-  e_asin-fma4 \
+  e_powf-fma \
+  s_atan-avx \
+  s_atan-fma \
   s_atan-fma4 \
-  e_atan2-fma4 \
+  s_ceil-sse4_1 \
+  s_ceilf-sse4_1 \
+  s_cosf-fma \
+  s_cosf-sse2 \
+  s_expm1-fma \
+  s_floor-sse4_1 \
+  s_floorf-sse4_1 \
+  s_log1p-fma \
+  s_nearbyint-sse4_1 \
+  s_nearbyintf-sse4_1 \
+  s_rint-sse4_1 \
+  s_rintf-sse4_1 \
+  s_roundeven-sse4_1 \
+  s_roundevenf-sse4_1 \
+  s_sin-avx \
+  s_sin-fma \
   s_sin-fma4 \
+  s_sincos-avx \
+  s_sincos-fma \
   s_sincos-fma4 \
+  s_sincosf-fma \
+  s_sincosf-sse2 \
+  s_sinf-fma \
+  s_sinf-sse2 \
+  s_tan-avx \
+  s_tan-fma \
   s_tan-fma4 \
+  s_trunc-sse4_1 \
+  s_truncf-sse4_1 \
+# libm-sysdep_routines
+ifeq ($(have-x86-isa-level),baseline)
+libm-sysdep_routines += \
+  s_ceil-c \
+  s_ceilf-c \
+  s_floor-c \
+  s_floorf-c \
+  s_nearbyint-c \
+  s_nearbyintf-c \
+  s_rint-c \
+  s_rintf-c \
+  s_roundeven-c \
+  s_roundevenf-c \
+  s_trunc-c \
+  s_truncf-c \
 # libm-sysdep_routines
+endif
+endif
 
 CFLAGS-e_asin-fma4.c = -mfma4
 CFLAGS-e_atan2-fma4.c = -mfma4
@@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 CFLAGS-s_sincos-fma4.c = -mfma4
 
-libm-sysdep_routines += \
-  e_exp-avx \
-  e_log-avx \
-  s_atan-avx \
-  e_atan2-avx \
-  s_sin-avx \
-  s_sincos-avx \
-  s_tan-avx \
-# libm-sysdep_routines
-
 CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
index 2eaa6c2c04..d64fca2586 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -16,26 +16,29 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_asin (double);
 extern double __redirect_ieee754_acos (double);
 
-#define SYMBOL_NAME ieee754_asin
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME ieee754_asin
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_asin, __asin)
 
-#undef SYMBOL_NAME
-#define SYMBOL_NAME ieee754_acos
-#include "ifunc-fma4.h"
+# undef SYMBOL_NAME
+# define SYMBOL_NAME ieee754_acos
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_acos, __acos)
 
-#define __ieee754_acos __ieee754_acos_sse2
-#define __ieee754_asin __ieee754_asin_sse2
+# define __ieee754_acos __ieee754_acos_sse2
+# define __ieee754_asin __ieee754_asin_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 17ee4f3c36..8a86c14ded 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -16,16 +16,19 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_atan2 (double, double);
 
-#define SYMBOL_NAME ieee754_atan2
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_atan2
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_atan2,
 		       __ieee754_atan2, IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_atan2, __atan2)
 
-#define __ieee754_atan2 __ieee754_atan2_sse2
+# define __ieee754_atan2 __ieee754_atan2_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index 406b7ebd44..d56329291a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_exp (double);
 
-#define SYMBOL_NAME ieee754_exp
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_exp
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_exp, __exp)
 
-#define __exp __ieee754_exp_sse2
+# define __exp __ieee754_exp_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
index 804fd6be85..06fe5028d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
@@ -16,25 +16,28 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_exp2f (float);
 
-#define SYMBOL_NAME exp2f
-#include "ifunc-fma.h"
+# define SYMBOL_NAME exp2f
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27);
 libm_alias_float_other (__exp2, exp2)
-#else
+# else
 libm_alias_float (__exp2, exp2)
-#endif
+# endif
 
 strong_alias (__exp2f, __ieee754_exp2f)
 libm_alias_finite (__exp2f, __exp2f)
 
-#define __exp2f __exp2f_sse2
+# define __exp2f __exp2f_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
index 4a7e2a5bce..19d767f636 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
@@ -16,28 +16,31 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_expf (float);
 
-#define SYMBOL_NAME expf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME expf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__expf, __GI___expf, __redirect_expf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27);
 libm_alias_float_other (__exp, exp)
-#else
+# else
 libm_alias_float (__exp, exp)
-#endif
+# endif
 
 strong_alias (__expf, __ieee754_expf)
 libm_alias_finite (__expf, __expf)
 
-#define __expf __expf_sse2
+# define __expf __expf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index 067fbf58c3..d80c1b1463 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_log (double);
 
-#define SYMBOL_NAME ieee754_log
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME ieee754_log
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
 		       IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_log, __log)
 
-#define __log __ieee754_log_sse2
+# define __log __ieee754_log_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c
index 9c57a2f6cc..9686782c09 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c
@@ -16,28 +16,31 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_log2 (double);
 
-#define SYMBOL_NAME log2
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log2
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__log2, __GI___log2, __redirect_log2)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29);
 libm_alias_double_other (__log2, log2)
-#else
+# else
 libm_alias_double (__log2, log2)
-#endif
+# endif
 
 strong_alias (__log2, __ieee754_log2)
 libm_alias_finite (__log2, __log2)
 
-#define __log2 __log2_sse2
+# define __log2 __log2_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_log2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
index 2b45c87f38..8ada46e11e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
@@ -16,28 +16,31 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_log2f (float);
 
-#define SYMBOL_NAME log2f
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log2f
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27);
 libm_alias_float_other (__log2, log2)
-#else
+# else
 libm_alias_float (__log2, log2)
-#endif
+# endif
 
 strong_alias (__log2f, __ieee754_log2f)
 libm_alias_finite (__log2f, __log2f)
 
-#define __log2f __log2f_sse2
+# define __log2f __log2f_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
index 97e23c8fea..a3978d9a8e 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
@@ -16,28 +16,31 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
 extern float __redirect_logf (float);
 
-#define SYMBOL_NAME logf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME logf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__logf, __GI___logf, __redirect_logf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27);
 libm_alias_float_other (__log, log)
-#else
+# else
 libm_alias_float (__log, log)
-#endif
+# endif
 
 strong_alias (__logf, __ieee754_logf)
 libm_alias_finite (__logf, __logf)
 
-#define __logf __logf_sse2
+# define __logf __logf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index 42618e7112..f8f17aff9f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <math.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <math.h>
+# include <libm-alias-finite.h>
 
 extern double __redirect_ieee754_pow (double, double);
 
-#define SYMBOL_NAME ieee754_pow
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME ieee754_pow
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_ieee754_pow,
 		       __ieee754_pow, IFUNC_SELECTOR ());
 libm_alias_finite (__ieee754_pow, __pow)
 
-#define __pow __ieee754_pow_sse2
+# define __pow __ieee754_pow_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
index 8e6ce13cc1..8b1a4c7d04 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
@@ -16,31 +16,34 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
-#include <libm-alias-finite.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# include <libm-alias-finite.h>
 
-#define powf __redirect_powf
-#define __DECL_SIMD___redirect_powf
-#include <math.h>
-#undef powf
+# define powf __redirect_powf
+# define __DECL_SIMD___redirect_powf
+# include <math.h>
+# undef powf
 
-#define SYMBOL_NAME powf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME powf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
 
-#ifdef SHARED
+# ifdef SHARED
 __hidden_ver1 (__powf, __GI___powf, __redirect_powf)
   __attribute__ ((visibility ("hidden")));
 
 versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27);
 libm_alias_float_other (__pow, pow)
-#else
+# else
 libm_alias_float (__pow, pow)
-#endif
+# endif
 
 strong_alias (__powf, __ieee754_powf)
 libm_alias_finite (__powf, __powf)
 
-#define __powf __powf_sse2
+# define __powf __powf_sse2
+#endif
 #include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 71bad096a9..4d2c6ce006 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -16,15 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern double __redirect_atan (double);
 
-#define SYMBOL_NAME atan
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME atan
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
 libm_alias_double (__atan, atan)
 
-#define __atan __atan_sse2
+# define __atan __atan_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
new file mode 100644
index 0000000000..e6c1106753
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of ceil function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__ceil)
+	vroundsd $10, %xmm0, %xmm0, %xmm0
+	ret
+END(__ceil)
+
+libm_alias_double (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
index 64119011ad..dba756c38f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __ceil_sse41 __ceil
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__ceil_sse41)
 	roundsd	$10, %xmm0, %xmm0
 	ret
 END(__ceil_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
index cc028addee..46c8e91e19 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define ceil __redirect_ceil
-#define __ceil __redirect___ceil
-#include <math.h>
-#undef ceil
-#undef __ceil
+# define ceil __redirect_ceil
+# define __ceil __redirect___ceil
+# include <math.h>
+# undef ceil
+# undef __ceil
 
-#define SYMBOL_NAME ceil
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME ceil
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
 libm_alias_double (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
new file mode 100644
index 0000000000..b4d8ac0455
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of ceilf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__ceilf)
+	vroundss $10, %xmm0, %xmm0, %xmm0
+	ret
+END(__ceilf)
+
+libm_alias_float (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
index dd9a9f6b71..9abc87b91a 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __ceilf_sse41 __ceilf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__ceilf_sse41)
 	roundss	$10, %xmm0, %xmm0
 	ret
 END(__ceilf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
index 97a0ca7d19..bb53108f73 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define ceilf __redirect_ceilf
-#define __ceilf __redirect___ceilf
-#include <math.h>
-#undef ceilf
-#undef __ceilf
+# define ceilf __redirect_ceilf
+# define __ceilf __redirect___ceilf
+# include <math.h>
+# undef ceilf
+# undef __ceilf
 
-#define SYMBOL_NAME ceilf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME ceilf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
 libm_alias_float (__ceil, ceil)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
index 2703c576df..8a02e04538 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
@@ -16,13 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
 
 extern float __redirect_cosf (float);
 
-#define SYMBOL_NAME cosf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME cosf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
 
 libm_alias_float (__cos, cos)
+#else
+# include <sysdeps/ieee754/flt-32/s_cosf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
index 8a2d69f9b2..d58ef3d8f5 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c
@@ -16,21 +16,24 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern double __redirect_expm1 (double);
 
-#define SYMBOL_NAME expm1
-#include "ifunc-fma.h"
+# define SYMBOL_NAME expm1
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ());
 libm_alias_double (__expm1, expm1)
 
-#define __expm1 __expm1_sse2
+# define __expm1 __expm1_sse2
 
 /* NB: __expm1 may be expanded to __expm1_sse2 in the following
    prototypes.  */
 extern long double __expm1l (long double);
 extern long double __expm1f128 (long double);
 
+#endif
 #include <sysdeps/ieee754/dbl-64/s_expm1.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
new file mode 100644
index 0000000000..ff74b5a8bf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of floor function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__floor)
+	vroundsd $9, %xmm0, %xmm0, %xmm0
+	ret
+END(__floor)
+
+libm_alias_double (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
index 2f7521f39f..c9b9b0639b 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __floor_sse41 __floor
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__floor_sse41)
 	roundsd	$9, %xmm0, %xmm0
 	ret
 END(__floor_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
index 8cebd48e10..2c87dd0056 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define floor __redirect_floor
-#define __floor __redirect___floor
-#include <math.h>
-#undef floor
-#undef __floor
+# define floor __redirect_floor
+# define __floor __redirect___floor
+# include <math.h>
+# undef floor
+# undef __floor
 
-#define SYMBOL_NAME floor
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME floor
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
 libm_alias_double (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
new file mode 100644
index 0000000000..c378baae8e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of floorf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__floorf)
+	vroundss $9, %xmm0, %xmm0, %xmm0
+	ret
+END(__floorf)
+
+libm_alias_float (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
index 5f6020d27d..c2216899db 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __floorf_sse41 __floorf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__floorf_sse41)
 	roundss	$9, %xmm0, %xmm0
 	ret
 END(__floorf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
index a14e18b03c..a277802b6d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define floorf __redirect_floorf
-#define __floorf __redirect___floorf
-#include <math.h>
-#undef floorf
-#undef __floorf
+# define floorf __redirect_floorf
+# define __floorf __redirect___floorf
+# include <math.h>
+# undef floorf
+# undef __floorf
 
-#define SYMBOL_NAME floorf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME floorf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
 libm_alias_float (__floor, floor)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
index a8e1a3f21b..3fa1185d81 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c
@@ -16,14 +16,17 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern double __redirect_log1p (double);
 
-#define SYMBOL_NAME log1p
-#include "ifunc-fma.h"
+# define SYMBOL_NAME log1p
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ());
 
-#define __log1p __log1p_sse2
+# define __log1p __log1p_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_log1p.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
new file mode 100644
index 0000000000..5bfdf73c28
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of nearbyint function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__nearbyint)
+	vroundsd $0xc, %xmm0, %xmm0, %xmm0
+	ret
+END(__nearbyint)
+
+libm_alias_double (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
index 674f7eb40a..9d84410a1f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __nearbyint_sse41 __nearbyint
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__nearbyint_sse41)
 	roundsd	$0xc, %xmm0, %xmm0
 	ret
 END(__nearbyint_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
index 693e42dd4e..057a7ca60f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
-#define nearbyint __redirect_nearbyint
-#define __nearbyint __redirect___nearbyint
-#include <math.h>
-#undef nearbyint
-#undef __nearbyint
+# define nearbyint __redirect_nearbyint
+# define __nearbyint __redirect___nearbyint
+# include <math.h>
+# undef nearbyint
+# undef __nearbyint
 
-#define SYMBOL_NAME nearbyint
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME nearbyint
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
 		       IFUNC_SELECTOR ());
 libm_alias_double (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
new file mode 100644
index 0000000000..1dbaed0324
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implmentation of nearbyintf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__nearbyintf)
+	vroundss $0xc, %xmm0, %xmm0, %xmm0
+	ret
+END(__nearbyintf)
+
+libm_alias_float (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
index 5892bd7563..3cf35f92d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __nearbyintf_sse41 __nearbyintf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__nearbyintf_sse41)
 	roundss	$0xc, %xmm0, %xmm0
 	ret
 END(__nearbyintf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
index a0ac009f4b..41f374ba72 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
 
-#define nearbyintf __redirect_nearbyintf
-#define __nearbyintf __redirect___nearbyintf
-#include <math.h>
-#undef nearbyintf
-#undef __nearbyintf
+# define nearbyintf __redirect_nearbyintf
+# define __nearbyintf __redirect___nearbyintf
+# include <math.h>
+# undef nearbyintf
+# undef __nearbyintf
 
-#define SYMBOL_NAME nearbyintf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME nearbyintf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
 		       IFUNC_SELECTOR ());
 libm_alias_float (__nearbyint, nearbyint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
new file mode 100644
index 0000000000..2b403b331f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of rint function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__rint)
+	vroundsd $4, %xmm0, %xmm0, %xmm0
+	ret
+END(__rint)
+
+libm_alias_double (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
index 405372991b..8cd9cf759f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __rint_sse41 __rint
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__rint_sse41)
 	roundsd	$4, %xmm0, %xmm0
 	ret
 END(__rint_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
index 754c87e004..18623b7d99 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define rint __redirect_rint
-#define __rint __redirect___rint
-#include <math.h>
-#undef rint
-#undef __rint
+# define rint __redirect_rint
+# define __rint __redirect___rint
+# include <math.h>
+# undef rint
+# undef __rint
 
-#define SYMBOL_NAME rint
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME rint
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
 libm_alias_double (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
new file mode 100644
index 0000000000..171c2867f4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of rintf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__rintf)
+	vroundss $4, %xmm0, %xmm0, %xmm0
+	ret
+END(__rintf)
+
+libm_alias_float (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
index 8ac67ce767..fc1e70f0c9 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __rintf_sse41 __rintf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__rintf_sse41)
 	roundss	$4, %xmm0, %xmm0
 	ret
 END(__rintf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
index e9d6b7a5f2..e275368dec 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define rintf __redirect_rintf
-#define __rintf __redirect___rintf
-#include <math.h>
-#undef rintf
-#undef __rintf
+# define rintf __redirect_rintf
+# define __rintf __redirect___rintf
+# include <math.h>
+# undef rintf
+# undef __rintf
 
-#define SYMBOL_NAME rintf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME rintf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
 libm_alias_float (__rint, rint)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
new file mode 100644
index 0000000000..576790355c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of roundeven function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__roundeven)
+	vroundsd $8, %xmm0, %xmm0, %xmm0
+	ret
+END(__roundeven)
+
+libm_alias_double (__roundeven, roundeven)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
index 5ef102336b..f00be56c59 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __roundeven_sse41 __roundeven
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__roundeven_sse41)
 	roundsd	$8, %xmm0, %xmm0
 	ret
 END(__roundeven_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
index 8737b32e26..139aad088f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c
@@ -16,16 +16,19 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
-#define roundeven __redirect_roundeven
-#define __roundeven __redirect___roundeven
-#include <math.h>
-#undef roundeven
-#undef __roundeven
+# define roundeven __redirect_roundeven
+# define __roundeven __redirect___roundeven
+# include <math.h>
+# undef roundeven
+# undef __roundeven
 
-#define SYMBOL_NAME roundeven
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME roundeven
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ());
 libm_alias_double (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
new file mode 100644
index 0000000000..42c359f4cd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of roundevenf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__roundevenf)
+	vroundss $8, %xmm0, %xmm0, %xmm0
+	ret
+END(__roundevenf)
+
+libm_alias_float (__roundeven, roundeven)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
index 792c90ba07..6b148e4353 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S
@@ -17,8 +17,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __roundevenf_sse41 __roundevenf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__roundevenf_sse41)
 	roundss	$8, %xmm0, %xmm0
 	ret
 END(__roundevenf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
index e96016a4d5..2fb090075d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c
@@ -16,16 +16,19 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
 
-#define roundevenf __redirect_roundevenf
-#define __roundevenf __redirect___roundevenf
-#include <math.h>
-#undef roundevenf
-#undef __roundevenf
+# define roundevenf __redirect_roundevenf
+# define __roundevenf __redirect___roundevenf
+# include <math.h>
+# undef roundevenf
+# undef __roundevenf
 
-#define SYMBOL_NAME roundevenf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME roundevenf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ());
 libm_alias_float (__roundeven, roundeven)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 355cc0092e..21e77943a3 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -16,24 +16,27 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern double __redirect_sin (double);
 extern double __redirect_cos (double);
 
-#define SYMBOL_NAME sin
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME sin
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
 libm_alias_double (__sin, sin)
 
-#undef SYMBOL_NAME
-#define SYMBOL_NAME cos
-#include "ifunc-avx-fma4.h"
+# undef SYMBOL_NAME
+# define SYMBOL_NAME cos
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
 libm_alias_double (__cos, cos)
 
-#define __cos __cos_sse2
-#define __sin __sin_sse2
+# define __cos __cos_sse2
+# define __sin __sin_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
index 70107e999c..b35757f8de 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c
@@ -16,15 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern void __redirect_sincos (double, double *, double *);
 
-#define SYMBOL_NAME sincos
-#include "ifunc-fma4.h"
+# define SYMBOL_NAME sincos
+# include "ifunc-fma4.h"
 
 libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ());
 libm_alias_double (__sincos, sincos)
 
-#define __sincos __sincos_sse2
+# define __sincos __sincos_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_sincos.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
index 80bc028451..0ea9b40e84 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
@@ -16,13 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
 
 extern void __redirect_sincosf (float, float *, float *);
 
-#define SYMBOL_NAME sincosf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME sincosf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
 
 libm_alias_float (__sincos, sincos)
+#else
+# include <sysdeps/ieee754/flt-32/s_sincosf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
index a32b9e9550..c61624e3ee 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
@@ -16,13 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
 
 extern float __redirect_sinf (float);
 
-#define SYMBOL_NAME sinf
-#include "ifunc-fma.h"
+# define SYMBOL_NAME sinf
+# include "ifunc-fma.h"
 
 libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
 
 libm_alias_float (__sin, sin)
+#else
+# include <sysdeps/ieee754/flt-32/s_sinf.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index f9a2474a13..125d992ba1 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -16,15 +16,18 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-double.h>
 
 extern double __redirect_tan (double);
 
-#define SYMBOL_NAME tan
-#include "ifunc-avx-fma4.h"
+# define SYMBOL_NAME tan
+# include "ifunc-avx-fma4.h"
 
 libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
 libm_alias_double (__tan, tan)
 
-#define __tan __tan_sse2
+# define __tan __tan_sse2
+#endif
 #include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
new file mode 100644
index 0000000000..b3e87e9606
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of trunc function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-double.h>
+
+	.text
+ENTRY(__trunc)
+	vroundsd $11, %xmm0, %xmm0, %xmm0
+	ret
+END(__trunc)
+
+libm_alias_double (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
index b496a6ef49..2b79174eed 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
@@ -18,8 +18,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-double.h>
+# define __trunc_sse41 __trunc
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__trunc_sse41)
 	roundsd	$11, %xmm0, %xmm0
 	ret
 END(__trunc_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_double (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
index 9bc9df8744..ea89c4f85d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-double.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
 
-#define trunc __redirect_trunc
-#define __trunc __redirect___trunc
-#include <math.h>
-#undef trunc
-#undef __trunc
+# define trunc __redirect_trunc
+# define __trunc __redirect___trunc
+# include <math.h>
+# undef trunc
+# undef __trunc
 
-#define SYMBOL_NAME trunc
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME trunc
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
 libm_alias_double (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
new file mode 100644
index 0000000000..f31ac7d7f7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S
@@ -0,0 +1,28 @@
+/* AVX implementation of truncf function.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libm-alias-float.h>
+
+	.text
+ENTRY(__truncf)
+	vroundss $11, %xmm0, %xmm0, %xmm0
+	ret
+END(__truncf)
+
+libm_alias_float (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
index 22e9a83307..60498b2cb2 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
@@ -18,8 +18,20 @@
 
 #include <sysdep.h>
 
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+# define __truncf_sse41 __truncf
+	.text
+#else
 	.section .text.sse4.1,"ax",@progbits
+#endif
+
 ENTRY(__truncf_sse41)
 	roundss	$11, %xmm0, %xmm0
 	ret
 END(__truncf_sse41)
+
+#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL
+libm_alias_float (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
index dae01d166a..92435ce39d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
@@ -16,17 +16,20 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#define NO_MATH_REDIRECT
-#include <libm-alias-float.h>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
 
-#define truncf __redirect_truncf
-#define __truncf __redirect___truncf
-#include <math.h>
-#undef truncf
-#undef __truncf
+# define truncf __redirect_truncf
+# define __truncf __redirect___truncf
+# include <math.h>
+# undef truncf
+# undef __truncf
 
-#define SYMBOL_NAME truncf
-#include "ifunc-sse4_1.h"
+# define SYMBOL_NAME truncf
+# include "ifunc-sse4_1.h"
 
 libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
 libm_alias_float (__trunc, trunc)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c
index 27eee98a0a..3584187e0e 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c
@@ -1 +1,6 @@
-#include <sysdeps/../math/w_exp.c>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
+# include <sysdeps/ieee754/dbl-64/w_exp.c>
+#else
+# include <sysdeps/../math/w_exp.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c
index 9b2b018711..414ca3ca3d 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_log.c
@@ -1 +1,6 @@
-#include <sysdeps/../math/w_log.c>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
+# include <sysdeps/ieee754/dbl-64/w_log.c>
+#else
+# include <sysdeps/../math/w_log.c>
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c
index b50c1988de..d5fcc4f871 100644
--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c
@@ -1 +1,6 @@
-#include <sysdeps/../math/w_pow.c>
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL
+# include <sysdeps/ieee754/dbl-64/w_pow.c>
+#else
+# include <sysdeps/../math/w_pow.c>
+#endif
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index e1e894c963..d3d2270394 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -4,8 +4,8 @@ sysdep_routines += \
   memchr-avx2 \
   memchr-avx2-rtm \
   memchr-evex \
-  memchr-evex512 \
   memchr-evex-rtm \
+  memchr-evex512 \
   memchr-sse2 \
   memcmp-avx2-movbe \
   memcmp-avx2-movbe-rtm \
@@ -37,8 +37,8 @@ sysdep_routines += \
   rawmemchr-avx2 \
   rawmemchr-avx2-rtm \
   rawmemchr-evex \
-  rawmemchr-evex512 \
   rawmemchr-evex-rtm \
+  rawmemchr-evex512 \
   rawmemchr-sse2 \
   stpcpy-avx2 \
   stpcpy-avx2-rtm \
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 9984c3ca0f..97839a2248 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -21,7 +21,9 @@
    2. If size is less than VEC, use integer register stores.
    3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
    4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
-   5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+   5. On machines ERMS feature, if size is greater or equal than
+      __x86_rep_stosb_threshold then REP STOSB will be used.
+   6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
       4 VEC stores and store 4 * VEC at a time until done.  */
 
 #include <sysdep.h>
diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
new file mode 100644
index 0000000000..1d636669ba
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
@@ -0,0 +1,87 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
+   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
+   is 960 bytes and this test didn't fail.  It may be due to the unused
+   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
+   this test might fail without the fix.  */
+#ifndef OFFSET
+# define OFFSET 40
+#endif
+
+	.text
+	.p2align 4
+	.globl	apply_tls
+	.type	apply_tls, @function
+apply_tls:
+	cfi_startproc
+	_CET_ENDBR
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (6, -16)
+	movdqu	(%RDI_LP), %xmm0
+	lea	tls_var1@TLSDESC(%rip), %RAX_LP
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (6)
+	/* Align stack to 64 bytes.  */
+	and	$-64, %RSP_LP
+	sub	$OFFSET, %RSP_LP
+	pushq	%rbx
+	/* Set %ebx to 0xbadbeef.  */
+	movl	$0xbadbeef, %ebx
+	movl	$0xbadbeef, %esi
+	movq	%rdi, saved_rdi(%rip)
+	movq	%rsi, saved_rsi(%rip)
+	call	*tls_var1@TLSCALL(%RAX_LP)
+	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
+	cmpq	saved_rdi(%rip), %rdi
+	jne	L(hlt)
+	cmpq	saved_rsi(%rip), %rsi
+	jne	L(hlt)
+	cmpl	$0xbadbeef, %ebx
+	jne	L(hlt)
+	add	%fs:0, %RAX_LP
+	movups	%xmm0, 32(%RAX_LP)
+	movdqu	16(%RDI_LP), %xmm1
+	mov	%RAX_LP, %RBX_LP
+	movups	%xmm1, 48(%RAX_LP)
+	lea	32(%RBX_LP), %RAX_LP
+	pop	%rbx
+	leave
+	cfi_def_cfa (7, 8)
+	ret
+L(hlt):
+	hlt
+	cfi_endproc
+	.size	apply_tls, .-apply_tls
+	.hidden	tls_var1
+	.globl	tls_var1
+	.section	.tbss,"awT",@nobits
+	.align 16
+	.type	tls_var1, @object
+	.size	tls_var1, 3200
+tls_var1:
+	.zero	3200
+	.local	saved_rdi
+	.comm	saved_rdi,8,8
+	.local	saved_rsi
+	.comm	saved_rsi,8,8
+	.section	.note.GNU-stack,"",@progbits
diff --git a/time/timespec_get.c b/time/timespec_get.c
index b031e42ca2..26a044bca6 100644
--- a/time/timespec_get.c
+++ b/time/timespec_get.c
@@ -4,7 +4,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/time/timespec_getres.c b/time/timespec_getres.c
index edb397507c..2e18b8bcac 100644
--- a/time/timespec_getres.c
+++ b/time/timespec_getres.c
@@ -5,7 +5,7 @@
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
-   version 2.1 of the License.
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of