aboutsummaryrefslogtreecommitdiff
path: root/libpsn00b/libc/clz.S
diff options
context:
space:
mode:
authorXavier Del Campo Romero <xavi92@disroot.org>2025-07-05 02:34:11 +0200
committerXavier Del Campo Romero <xavi92@disroot.org>2025-07-05 02:34:11 +0200
commitbeb76e4dd362374b8f42cd971d394bba1074cd8d (patch)
tree3ea4cc342737afb9225c01160c92647ba66c78bd /libpsn00b/libc/clz.S
parent5d9aa2d3dfc7d6e51c2eb942ab4cdbae5571a40a (diff)
downloadpsn00bsdk-fix-include.tar.gz
Replace .include with #includefix-include
For some reason, both mipsel-unknown-elf-gcc 8.2.0 and mipsel-non-elf 15.1.0 were unable to resolve .include assembler directives. As a workaround, it is still possible to use the preprocessor, and therefore the usual #include preprocessor directive. However, this requires the assembly files to use the uppercase .S file extension.
Diffstat (limited to 'libpsn00b/libc/clz.S')
-rw-r--r--libpsn00b/libc/clz.S53
1 files changed, 53 insertions, 0 deletions
diff --git a/libpsn00b/libc/clz.S b/libpsn00b/libc/clz.S
new file mode 100644
index 0000000..1ccff2e
--- /dev/null
+++ b/libpsn00b/libc/clz.S
@@ -0,0 +1,53 @@
+# PSn00bSDK leading zero count intrinsics
+# (C) 2022-2023 spicyjpeg - MPL licensed
+#
+# libgcc provides two functions used internally by GCC to count the number of
+# leading zeroes in a value, __clzsi2() (32-bit) and __clzdi2() (64-bit). This
+# file overrides them with smaller implementations that make use of the GTE's
+# LZCS/LZCR registers.
+
+.set noreorder
+
+.set LZCS, $30
+.set LZCR, $31
+
+.section .text.__clzsi2, "ax", @progbits
+.global __clzsi2
+.type __clzsi2, @function
+
+__clzsi2:
+ mtc2 $a0, LZCS
+ bltz $a0, .Lreturn # if (value & (1 << 31)) return 0
+ li $v0, 0
+ mfc2 $v0, LZCR # else return GTE_CLZ(value)
+
+.Lreturn:
+ jr $ra
+ nop
+
+.section .text.__clzdi2, "ax", @progbits
+.global __clzdi2
+.type __clzdi2, @function
+
+__clzdi2:
+ mtc2 $a1, LZCS
+ bltz $a1, .Lreturn2 # if (msb & (1 << 31)) return 0
+ li $v0, 0
+ bnez $a1, .LreturnMSB # else if (msb) return GTE_CLZ(msb)
+ nop
+
+.LnoMSB:
+ mtc2 $a0, LZCS
+ bltz $a0, .Lreturn2 # else if (lsb & (1 << 31)) return 32
+ li $v0, 32
+ mfc2 $v0, LZCR # else return 32 + GTE_CLZ(lsb)
+
+ jr $ra
+ addiu $v0, 32
+
+.LreturnMSB:
+ mfc2 $v0, LZCR
+
+.Lreturn2:
+ jr $ra
+ nop