aboutsummaryrefslogtreecommitdiff
path: root/libpsn00b
diff options
context:
space:
mode:
authorspicyjpeg <thatspicyjpeg@gmail.com>2023-06-20 15:13:11 +0200
committerspicyjpeg <thatspicyjpeg@gmail.com>2023-06-20 15:13:11 +0200
commitd23024ceeb8e7f04b0a3bde7a0db41ae5e4fd06d (patch)
tree03342eb22041fb7343c034e376489295e2a78a19 /libpsn00b
parent79a966486615e60be3a37d278945dc3dd0fd933b (diff)
downloadpsn00bsdk-d23024ceeb8e7f04b0a3bde7a0db41ae5e4fd06d.tar.gz
Split off libsmd from n00bdemo into separate library
Diffstat (limited to 'libpsn00b')
-rw-r--r--libpsn00b/cmake/internal_setup.cmake1
-rw-r--r--libpsn00b/include/psxapi.h2
-rw-r--r--libpsn00b/include/smd/smd.h85
-rw-r--r--libpsn00b/include/smd/smd_s.inc339
-rw-r--r--libpsn00b/smd/smd.s928
-rw-r--r--libpsn00b/smd/smd_cel.s1078
-rw-r--r--libpsn00b/smd/smd_flat.s833
-rw-r--r--libpsn00b/smd/smdparser.s130
8 files changed, 3395 insertions, 1 deletions
diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake
index e24595a..c3bb2dc 100644
--- a/libpsn00b/cmake/internal_setup.cmake
+++ b/libpsn00b/cmake/internal_setup.cmake
@@ -51,6 +51,7 @@ set(
psxsio
psxetc
psxapi
+ smd
lzp
c
)
diff --git a/libpsn00b/include/psxapi.h b/libpsn00b/include/psxapi.h
index 5bb2b6f..67fa51a 100644
--- a/libpsn00b/include/psxapi.h
+++ b/libpsn00b/include/psxapi.h
@@ -310,7 +310,7 @@ int LoadExec(const char *path, int argc, const char **argv);
void FlushCache(void);
void ResetEntryInt(void);
-void HookEntryInt(jmp_buf buf);
+void HookEntryInt(const JumpBuffer *buf);
void ReturnFromException(void);
int SetConf(int evcb, int tcb, uint32_t sp);
diff --git a/libpsn00b/include/smd/smd.h b/libpsn00b/include/smd/smd.h
new file mode 100644
index 0000000..658f7fa
--- /dev/null
+++ b/libpsn00b/include/smd/smd.h
@@ -0,0 +1,85 @@
+/*
+ * PSn00bSDK .SMD model parser library
+ * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <psxgte.h>
+
+/* Structure definitions */
+
+typedef struct {
+ uint32_t *ot;
+ int16_t otlen;
+ uint8_t zdiv,zoff;
+} SC_OT;
+
+typedef struct {
+ char id[3];
+ uint8_t version;
+ uint16_t flags;
+ uint16_t n_verts;
+ uint16_t n_norms;
+ uint16_t n_prims;
+ SVECTOR *p_verts;
+ SVECTOR *p_norms;
+ void *p_prims;
+} SMD;
+
+typedef struct {
+ uint8_t type : 2;
+ uint8_t l_type : 2;
+ uint8_t c_type : 1;
+ uint8_t texture : 1;
+ uint8_t blend : 2;
+ uint8_t zoff : 4;
+ uint8_t nocull : 1;
+ uint8_t mask : 1;
+ uint8_t texwin : 2;
+ uint8_t texoff : 2;
+ uint8_t reserved : 6;
+ uint8_t len;
+} SMD_PRI_TYPE;
+
+typedef struct {
+ SMD_PRI_TYPE prim_id;
+
+ uint16_t v0,v1,v2,v3; // Vertex indices
+ uint16_t n0,n1,n2,n3; // Normal indices
+ uint8_t r0,g0,b0,code; // RGB0
+ uint8_t r1,g1,b1,p0; // RGB1
+ uint8_t r2,g2,b2,p1; // RGB2
+ uint8_t r3,g3,b3,p2; // RGB3
+ uint8_t tu0,tv0;
+ uint8_t tu1,tv1;
+ uint8_t tu2,tv2;
+ uint8_t tu3,tv3;
+ uint16_t tpage,clut;
+} SMD_PRIM;
+
+/* API */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int OpenSMD(const void *smd);
+SMD_PRIM *ReadSMD(SMD_PRIM *pri);
+
+void scSetClipRect(int x0, int y0, int x1, int y1);
+
+SMD *smdInitData(const void *data);
+void smdSetBaseTPage(uint16_t tpage);
+
+uint8_t *smdSortModel(SC_OT *ot, uint8_t *pribuff, SMD *smd);
+uint8_t *smdSortModelFlat(uint32_t *ot, uint8_t *pribuff, SMD *smd);
+
+void smdSetCelTex(uint16_t tpage, uint16_t clut);
+void smdSetCelParam(int udiv, int vdiv, unsigned int col);
+uint8_t *smdSortModelCel(SC_OT *ot, uint8_t *pribuff, SMD *smd);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/libpsn00b/include/smd/smd_s.inc b/libpsn00b/include/smd/smd_s.inc
new file mode 100644
index 0000000..827e9e3
--- /dev/null
+++ b/libpsn00b/include/smd/smd_s.inc
@@ -0,0 +1,339 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set OT_ADDR, 0
+.set OT_LEN, 4
+.set OT_ZDIV, 8
+.set OT_ZOFF, 10
+
+.set SMD_HEAD_ID, 0
+.set SMD_HEAD_FLAG, 4
+.set SMD_HEAD_NVERTS, 6
+.set SMD_HEAD_NNORMS, 8
+.set SMD_HEAD_NPRIMS, 10
+.set SMD_HEAD_PVERTS, 12
+.set SMD_HEAD_PNORMS, 16
+.set SMD_HEAD_PPRIMS, 20
+.set SMD_HEAD_SIZE, 24
+
+.set POLYF3_tag, 0
+.set POLYF3_tpage, 4
+.set POLYF3_rgbc, 8
+.set POLYF3_xy0, 12
+.set POLYF3_xy1, 16
+.set POLYF3_xy2, 20
+.set POLYF3_len, 24
+
+.set POLYFT3_tag, 0
+.set POLYFT3_rgbc, 4
+.set POLYFT3_xy0, 8
+.set POLYFT3_uv0, 12
+.set POLYFT3_clut, 14
+.set POLYFT3_xy1, 16
+.set POLYFT3_uv1, 20
+.set POLYFT3_tpage, 22
+.set POLYFT3_xy2, 24
+.set POLYFT3_uv2, 28
+.set POLYFT3_pad, 30
+.set POLYFT3_len, 32
+
+.set POLYG3_tag, 0
+.set POLYG3_tpage, 4
+.set POLYG3_rgbc0, 8
+.set POLYG3_xy0, 12
+.set POLYG3_rgbc1, 16
+.set POLYG3_xy1, 20
+.set POLYG3_rgbc2, 24
+.set POLYG3_xy2, 28
+.set POLYG3_len, 32
+
+.set POLYGT3_tag, 0
+.set POLYGT3_rgbc0, 4
+.set POLYGT3_xy0, 8
+.set POLYGT3_uv0, 12
+.set POLYGT3_clut, 14
+.set POLYGT3_rgbc1, 16
+.set POLYGT3_xy1, 20
+.set POLYGT3_uv1, 24
+.set POLYGT3_tpage, 26
+.set POLYGT3_rgbc2, 28
+.set POLYGT3_xy2, 32
+.set POLYGT3_uv2, 36
+.set POLYGT3_pad, 38
+.set POLYGT3_len, 40
+
+.set POLYF4_tag, 0
+.set POLYF4_tpage, 4
+.set POLYF4_rgbc, 8
+.set POLYF4_xy0, 12
+.set POLYF4_xy1, 16
+.set POLYF4_xy2, 20
+.set POLYF4_xy3, 24
+.set POLYF4_len, 28
+
+.set POLYFT4_tag, 0
+.set POLYFT4_rgbc, 4
+.set POLYFT4_xy0, 8
+.set POLYFT4_uv0, 12
+.set POLYFT4_clut, 14
+.set POLYFT4_xy1, 16
+.set POLYFT4_uv1, 20
+.set POLYFT4_tpage, 22
+.set POLYFT4_xy2, 24
+.set POLYFT4_uv2, 28
+.set POLYFT4_pad0, 30
+.set POLYFT4_xy3, 32
+.set POLYFT4_uv3, 36
+.set POLYFT4_pad1, 38
+.set POLYFT4_len, 40
+
+.set POLYG4_tag, 0
+.set POLYG4_tpage, 4
+.set POLYG4_rgbc0, 8
+.set POLYG4_xy0, 12
+.set POLYG4_rgbc1, 16
+.set POLYG4_xy1, 20
+.set POLYG4_rgbc2, 24
+.set POLYG4_xy2, 28
+.set POLYG4_rgbc3, 32
+.set POLYG4_xy3, 36
+.set POLYG4_len, 40
+
+.set POLYGT4_tag, 0
+.set POLYGT4_rgbc0, 4
+.set POLYGT4_xy0, 8
+.set POLYGT4_uv0, 12
+.set POLYGT4_clut, 14
+.set POLYGT4_rgbc1, 16
+.set POLYGT4_xy1, 20
+.set POLYGT4_uv1, 24
+.set POLYGT4_tpage, 26
+.set POLYGT4_rgbc2, 28
+.set POLYGT4_xy2, 32
+.set POLYGT4_uv2, 36
+.set POLYGT4_pad0, 38
+.set POLYGT4_rgbc3, 40
+.set POLYGT4_xy3, 44
+.set POLYGT4_uv3, 48
+.set POLYGT4_pad1, 50
+.set POLYGT4_len, 52
+
+.set CLIP_LEFT, 1
+.set CLIP_RIGHT, 2
+.set CLIP_TOP, 4
+.set CLIP_BOTTOM, 8
+
+
+# Clip routine macros, based on Cohen-Sutherland line clipping algorithm
+# but only with the off-screen dectection logic extended for polygons
+
+.macro ClipTestTri
+
+ # X0 clip test
+ mfc2 $t7, C2_SXY0
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x0
+ move $s0, $0
+ ori $s0, CLIP_LEFT
+.no_clip_l_x0:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x0
+ nop
+ ori $s0, CLIP_RIGHT
+.no_clip_r_x0:
+ # Y0 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y0
+ nop
+ ori $s0, CLIP_TOP
+.no_clip_t_y0:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y0
+ nop
+ ori $s0, CLIP_BOTTOM
+.no_clip_b_y0:
+
+ # X1 clip test
+ mfc2 $t7, C2_SXY1
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x1
+ move $s1, $0
+ ori $s1, CLIP_LEFT
+.no_clip_l_x1:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x1
+ nop
+ ori $s1, CLIP_RIGHT
+.no_clip_r_x1:
+ # Y1 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y1
+ nop
+ ori $s1, CLIP_TOP
+.no_clip_t_y1:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y1
+ nop
+ ori $s1, CLIP_BOTTOM
+.no_clip_b_y1:
+
+ # X2 clip test
+ mfc2 $t7, C2_SXY2
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x2
+ move $s2, $0
+ ori $s2, CLIP_LEFT
+.no_clip_l_x2:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x2
+ nop
+ ori $s2, CLIP_RIGHT
+.no_clip_r_x2:
+ # Y2 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y2
+ nop
+ ori $s2, CLIP_TOP
+.no_clip_t_y2:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y2
+ nop
+ ori $s2, CLIP_BOTTOM
+.no_clip_b_y2:
+
+.endm
+
+
+.macro ClipTestQuad
+
+ # X0 clip test
+ sll $v0, $t6, 16
+ sra $v0, 16
+ sll $v1, $t8, 16
+ sra $v1, 16
+ bge $v0, $v1, .no_clip_l_x0_q
+ move $s0, $0
+ ori $s0, CLIP_LEFT
+.no_clip_l_x0_q:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x0_q
+ nop
+ ori $s0, CLIP_RIGHT
+.no_clip_r_x0_q:
+ # Y0 clip test
+ sra $v0, $t6, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y0_q
+ nop
+ ori $s0, CLIP_TOP
+.no_clip_t_y0_q:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y0_q
+ nop
+ ori $s0, CLIP_BOTTOM
+.no_clip_b_y0_q:
+
+ # X1 clip test
+ mfc2 $t7, C2_SXY0
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x1_q
+ move $s1, $0
+ ori $s1, CLIP_LEFT
+.no_clip_l_x1_q:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x1_q
+ nop
+ ori $s1, CLIP_RIGHT
+.no_clip_r_x1_q:
+ # Y1 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y1_q
+ nop
+ ori $s1, CLIP_TOP
+.no_clip_t_y1_q:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y1_q
+ nop
+ ori $s1, CLIP_BOTTOM
+.no_clip_b_y1_q:
+
+ # X2 clip test
+ mfc2 $t7, C2_SXY1
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x2_q
+ move $s2, $0
+ ori $s2, CLIP_LEFT
+.no_clip_l_x2_q:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x2_q
+ nop
+ ori $s2, CLIP_RIGHT
+.no_clip_r_x2_q:
+ # Y2 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y2_q
+ nop
+ ori $s2, CLIP_TOP
+.no_clip_t_y2_q:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y2_q
+ nop
+ ori $s2, CLIP_BOTTOM
+.no_clip_b_y2_q:
+
+ # X3 clip test
+ mfc2 $t7, C2_SXY2
+ sll $v1, $t8, 16
+ sra $v1, 16
+ sll $v0, $t7, 16
+ sra $v0, 16
+ bge $v0, $v1, .no_clip_l_x3_q
+ move $s3, $0
+ ori $s3, CLIP_LEFT
+.no_clip_l_x3_q:
+ sll $v1, $t9, 16
+ sra $v1, 16
+ ble $v0, $v1, .no_clip_r_x3_q
+ nop
+ ori $s3, CLIP_RIGHT
+.no_clip_r_x3_q:
+ # Y4 clip test
+ sra $v0, $t7, 16
+ sra $v1, $t8, 16
+ bge $v0, $v1, .no_clip_t_y3_q
+ nop
+ ori $s3, CLIP_TOP
+.no_clip_t_y3_q:
+ sra $v1, $t9, 16
+ ble $v0, $v1, .no_clip_b_y3_q
+ nop
+ ori $s3, CLIP_BOTTOM
+.no_clip_b_y3_q:
+
+.endm
diff --git a/libpsn00b/smd/smd.s b/libpsn00b/smd/smd.s
new file mode 100644
index 0000000..3c87a5e
--- /dev/null
+++ b/libpsn00b/smd/smd.s
@@ -0,0 +1,928 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set noreorder
+
+.include "gtereg.inc"
+.include "inline_s.inc"
+.include "smd/smd_s.inc"
+
+# Currently does not do header checks
+.section .text.smdInitData, "ax", @progbits
+.global smdInitData
+.type smdInitData, @function
+
+smdInitData:
+ lw $a1, SMD_HEAD_PVERTS($a0) # Initialize header pointers
+ lw $a2, SMD_HEAD_PNORMS($a0)
+ lw $a3, SMD_HEAD_PPRIMS($a0)
+
+ addu $a1, $a0
+ addu $a2, $a0
+ addu $a3, $a0
+
+ sw $a1, SMD_HEAD_PVERTS($a0)
+ sw $a2, SMD_HEAD_PNORMS($a0)
+ sw $a3, SMD_HEAD_PPRIMS($a0)
+
+ jr $ra
+ move $v0, $a0
+
+.section .text.scSetClipRect, "ax", @progbits
+.global scSetClipRect
+.type scSetClipRect, @function
+
+scSetClipRect:
+ sll $a1, 16
+ or $a0, $a1
+ sll $a3, 16
+ or $a2, $a3
+ la $a1, _sc_clip
+ sw $a0, 0( $a1 )
+ jr $ra
+ sw $a2, 4( $a1 )
+
+.section .text.smdSetBaseTPage, "ax", @progbits
+.global smdSetBaseTPage
+.type smdSetBaseTPage, @function
+
+smdSetBaseTPage:
+ la $v0, _smd_tpage_base
+ jr $ra
+ sw $a0, 0($v0)
+
+.section .text.smdSortModel, "ax", @progbits
+.global smdSortModel
+.type smdSortModel, @function
+
+smdSortModel:
+ # a0 - Pointer SC_OT structure
+ # a1 - Pointer to next primitive
+ # a2 - Pointer to SMD data address
+ # v0 - New pointer of primitive buffer (return)
+
+ addiu $sp, -16
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+
+ la $v0, _sc_clip
+ lw $t8, 0($v0)
+ lw $t9, 4($v0)
+
+ lw $t0, OT_LEN($a0)
+ lw $a0, OT_ADDR($a0)
+ lw $t1, SMD_HEAD_PVERTS($a2)
+ lw $t2, SMD_HEAD_PNORMS($a2)
+ lw $t3, SMD_HEAD_PPRIMS($a2)
+
+.sort_loop:
+
+ nop
+ lw $a3, 0($t3) # Get primitive ID word
+ move $t4, $t3
+
+ beqz $a3, .exit # Check if terminator (just zero)
+ addiu $t4, 4
+
+ lhu $t5, 0( $t4 ) # Load vertices
+ lhu $t6, 2( $t4 )
+ lhu $t7, 4( $t4 )
+ sll $t5, 3
+ sll $t6, 3
+ sll $t7, 3
+ addu $t5, $t1
+ addu $t6, $t1
+ addu $t7, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+ lwc2 C2_VXY1, 0( $t6 )
+ lwc2 C2_VZ1 , 4( $t6 )
+ lwc2 C2_VXY2, 0( $t7 )
+ lwc2 C2_VZ2 , 4( $t7 )
+
+ srl $v1, $a3, 24 # Get primitive size
+ addu $t3, $v1 # Step main pointer to next primitive
+
+ RTPT
+
+ cfc2 $v0, C2_FLAG # Get GTE flag value
+ nop
+
+ bltz $v0, .skip_prim # Skip primitive if Z overflow
+ andi $v0, $a3, 0x3
+
+ NCLIP # Backface culling
+
+ srl $v1, $a3, 12
+ andi $v1, 1
+
+ bnez $v1, .no_culling
+ nop
+
+ mfc2 $v1, C2_MAC0
+ nop
+ bltz $v1, .skip_prim
+ nop
+
+.no_culling:
+
+ beq $v0, 0x1, .prim_tri # If primitive is a triangle
+ nop
+ beq $v0, 0x2, .prim_quad # If primitive is a quad
+ nop
+
+ b .skip_prim
+ nop
+
+## Triangles
+
+.prim_tri: # Triangle processing
+
+ addiu $t4, 8 # Advance from indices
+
+ AVSZ3 # Calculate average Z
+
+ srl $v0, $t0, 16 # Get Z divisor from OT_LEN value
+ andi $v0, 0xff
+
+ mfc2 $t5, C2_OTZ # Get AVSZ3 result
+
+ sra $v1, $t0, 24 # Get Z offset from OT_LEN value
+
+ srl $t5, $v0 # Apply divisor and offset
+ sub $t5, $v1
+
+ blez $t5, .skip_prim # Skip primitive if less than zero
+ andi $v1, $t0, 0xffff
+ bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length
+ sll $t5, 2
+ addu $t5, $a0 # Append OTZ to OT address
+
+ ClipTestTri
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw
+ and $v0, $s2, $s0 # v2 & v0
+ beqz $v0, .do_draw
+ nop
+ b .skip_prim
+ nop
+
+.do_draw:
+
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F3_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud shaded
+ bnez $v0, .F3_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_textured
+ nop
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2000
+ or $v0, $v1
+
+ b .sort_F3_pri
+ sw $v0, POLYF3_rgbc( $a1 )
+
+.F3_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3000
+ or $v0, $at
+ .set at
+ sw $v0, POLYG3_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG3_rgbc1($a1)
+ b .sort_G3_pri
+ sw $v0, POLYG3_rgbc2($a1)
+
+.F3_textured:
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2400
+ or $v0, $v1
+ sw $v0, POLYFT3_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v0, 8( $t4 ) # Tpage + CLUT
+ nop
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT3_tpage( $a1 )
+ srl $v0, 16
+
+ b .sort_FT3_pri
+ sh $v0, POLYFT3_clut( $a1 )
+
+.F3_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F3_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2000
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF3_rgbc( $a1 )
+
+ b .sort_F3_pri
+ nop
+
+.F3_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT3_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT3_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2400
+ or $v0, $v1
+
+ b .sort_FT3_pri
+ sw $v0, POLYFT3_rgbc( $a1 )
+
+.F3_light_smt:
+
+ lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ lhu $v1, 4( $t4 )
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0( $v0 )
+ lwc2 C2_VZ1 , 4( $v0 )
+ lw $v0, 8( $t4 )
+ lwc2 C2_VXY2, 0( $v1 )
+ lwc2 C2_VZ2 , 4( $v1 )
+ lui $v1, 0x3000 # Load color
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 12
+ nop
+
+ NCCT
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_light_tex_smt
+ nop
+
+ swc2 C2_RGB0, POLYG3_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYG3_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYG3_rgbc2( $a1 )
+
+ b .sort_G3_pri
+ nop
+
+.F3_light_tex_smt:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYGT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYGT3_uv1( $a1 )
+ sh $v0, POLYGT3_uv2( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYGT3_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYGT3_clut( $a1 )
+
+ mfc2 $v0, C2_RGB0
+ lui $v1, 0x3400
+ or $v0, $v1
+
+ swc2 C2_RGB1, POLYGT3_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYGT3_rgbc2( $a1 )
+
+ b .sort_GT3_pri
+ sw $v0, POLYGT3_rgbc0( $a1 )
+
+.sort_F3_pri:
+
+ swc2 C2_SXY0, POLYF3_xy0($a1)
+ swc2 C2_SXY1, POLYF3_xy1($a1)
+ swc2 C2_SXY2, POLYF3_xy2($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0500
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF3_len
+
+.sort_FT3_pri:
+
+ swc2 C2_SXY0, POLYFT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYFT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYFT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT3_len
+
+.sort_G3_pri:
+
+ swc2 C2_SXY0, POLYG3_xy0( $a1 )
+ swc2 C2_SXY1, POLYG3_xy1( $a1 )
+ swc2 C2_SXY2, POLYG3_xy2( $a1 )
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG3_len
+
+.sort_GT3_pri:
+
+ swc2 C2_SXY0, POLYGT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYGT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYGT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT3_len
+
+## Quads
+
+.prim_quad: # Quad processing
+
+ mfc2 $t6, C2_SXY0 # Retrieve first projected vertex
+
+ lhu $t5, 6( $t4 ) # Project the last vertex
+ addiu $t4, 8
+ sll $t5, 3
+ addu $t5, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+
+ nRTPS
+
+ cfc2 $v1, C2_FLAG # Get GTE flag value
+
+ srl $v0, $t0, 16 # Get Z divisor from OT_LEN value
+
+ bltz $v1, .skip_prim
+ nop
+
+ AVSZ4
+
+ andi $v0, 0xff
+
+ mfc2 $t5, C2_OTZ
+
+ sra $v1, $t0, 24 # Get Z offset from OT_LEN value
+
+ srl $t5, $v0 # Apply divisor and offset
+ sub $t5, $v1
+
+ blez $t5, .skip_prim # Skip primitive if less than zero
+ andi $v1, $t0, 0xffff
+ bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length
+ sll $t5, 2
+ addu $t5, $a0 # Append OTZ to OT address
+
+ # no touch:
+ # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0)
+
+ ClipTestQuad
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s2, $s3 # v2 & v3
+ beqz $v0, .do_draw_q
+ and $v0, $s3, $s0 # v3 & v0
+ beqz $v0, .do_draw_q
+ and $v0, $s0, $s2 # v0 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s3 # v1 & v3
+ beqz $v0, .do_draw_q
+ nop
+ b .skip_prim
+ nop
+
+.do_draw_q:
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F4_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud quad
+ bnez $v0, .F4_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured quad
+ bnez $v0, .F4_textured
+ nop
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2800
+ or $v0, $v1
+
+ b .sort_F4_pri
+ sw $v0, POLYF4_rgbc($a1)
+
+.F4_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3800
+ or $v0, $at
+ .set at
+ sw $v0, POLYG4_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG4_rgbc1($a1)
+ lw $v1, 12($t4)
+ sw $v0, POLYG4_rgbc2($a1)
+ b .sort_G4_pri
+ sw $v1, POLYG4_rgbc3($a1)
+
+.F4_textured:
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2c00
+ or $v0, $v1
+ sw $v0, POLYFT4_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0($t4) # Load texture coordinates
+ lhu $v1, 2($t4)
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_FT4_pri
+ sh $v0, POLYFT4_clut($a1)
+
+.F4_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F4_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2800
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF4_rgbc( $a1 )
+
+ b .sort_F4_pri
+ nop
+
+.F4_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT4_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2c00
+ or $v0, $v1
+
+ b .sort_FT4_pri
+ sw $v0, POLYFT4_rgbc( $a1 )
+
+.F4_light_smt:
+
+ lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ lhu $v1, 4( $t4 )
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0( $v0 )
+ lwc2 C2_VZ1 , 4( $v0 )
+ lwc2 C2_VXY2, 0( $v1 )
+ lwc2 C2_VZ2 , 4( $v1 )
+
+ lw $v0, 8( $t4 )
+ lui $v1, 0x3800 # Load color
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ nNCCT
+
+ lhu $v0, 6( $t4 ) # Load normal 3
+
+ addiu $t4, 12
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex_smt
+ nop
+
+ swc2 C2_RGB0, POLYG4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYG4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYG4_rgbc2( $a1 )
+
+ nNCCS
+
+ swc2 C2_RGB2, POLYG4_rgbc3( $a1 )
+
+ b .sort_G4_pri
+ nop
+
+.F4_light_tex_smt:
+
+ mfc2 $v0, C2_RGB0
+ lui $v1, 0x3400
+ or $v0, $v1
+ sw $v0, POLYGT4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYGT4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYGT4_rgbc2( $a1 )
+
+ NCCS
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYGT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYGT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYGT4_uv2( $a1 )
+ sh $v1, POLYGT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ swc2 C2_RGB2, POLYGT4_rgbc3( $a1 )
+
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYGT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_GT4_pri
+ sh $v0, POLYGT4_clut( $a1 )
+
+.sort_F4_pri:
+
+ sw $t6, POLYF4_xy0($a1)
+ swc2 C2_SXY0, POLYF4_xy1($a1)
+ swc2 C2_SXY1, POLYF4_xy2($a1)
+ swc2 C2_SXY2, POLYF4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0600
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF4_len
+
+.sort_FT4_pri:
+
+ sw $t6, POLYFT4_xy0($a1)
+ swc2 C2_SXY0, POLYFT4_xy1($a1)
+ swc2 C2_SXY1, POLYFT4_xy2($a1)
+ swc2 C2_SXY2, POLYFT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT4_len
+
+.sort_G4_pri:
+
+ sw $t6, POLYG4_xy0($a1)
+ swc2 C2_SXY0, POLYG4_xy1($a1)
+ swc2 C2_SXY1, POLYG4_xy2($a1)
+ swc2 C2_SXY2, POLYG4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG4_len
+
+.sort_GT4_pri:
+
+ sw $t6, POLYGT4_xy0($a1)
+ swc2 C2_SXY0, POLYGT4_xy1($a1)
+ swc2 C2_SXY1, POLYGT4_xy2($a1)
+ swc2 C2_SXY2, POLYGT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0c00
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT4_len
+
+.skip_prim:
+
+ b .sort_loop
+ nop
+
+.exit:
+
+ lw $s0, 0( $sp )
+ lw $s1, 4( $sp )
+ lw $s2, 8( $sp )
+ lw $s3, 12( $sp )
+ addiu $sp, 16
+ jr $ra
+ move $v0, $a1
+
+.section .bss._smd_tpage_base, "w"
+.comm _smd_tpage_base, 4, 4
+
+.section .bss._sc_clip, "w"
+.comm _sc_clip, 8, 4
diff --git a/libpsn00b/smd/smd_cel.s b/libpsn00b/smd/smd_cel.s
new file mode 100644
index 0000000..8a39f01
--- /dev/null
+++ b/libpsn00b/smd/smd_cel.s
@@ -0,0 +1,1078 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set noreorder
+
+.include "gtereg.inc"
+.include "inline_s.inc"
+.include "smd/smd_s.inc"
+
+.section .text.smdSetCelTex, "ax", @progbits
+.global smdSetCelTex
+.type smdSetCelTex, @function
+
+smdSetCelTex:
+ # a0 - TPage value
+ # a1 - CLUT value
+ la $v0, _smd_cel_tpage
+ andi $a0, 0xffff
+ sll $a1, 16
+ or $a0, $a1
+ jr $ra
+ sw $a0, 0($v0)
+
+.section .text.smdSetCelParam, "ax", @progbits
+.global smdSetCelParam
+.type smdSetCelParam, @function
+
+smdSetCelParam:
+ # a0 - Shading texture U offset
+ # a1 - Shading texture V offset
+ # a2 - Shading primitive color
+ andi $a1, 0xff
+ sll $a1, 8
+ andi $a0, 0xff
+ or $a0, $a1
+ la $v0, _smd_cel_param
+ sw $a0, 0($v0)
+ la $v0, _smd_cel_col
+ lui $v1, 0x0200
+ or $a3, $v1
+ jr $ra
+ sw $a2, 0($v0)
+
+.section .text.smdSortModelCel, "ax", @progbits
+.global smdSortModelCel
+.type smdSortModelCel, @function
+
+smdSortModelCel:
+ # a0 - Pointer SC_OT structure
+ # a1 - Pointer to next primitive
+ # a2 - Pointer to SMD data address
+ # v0 - New pointer of primitive buffer (return)
+
+ addiu $sp, -16
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+
+ la $v0, _sc_clip
+ lw $t8, 0($v0)
+ lw $t9, 4($v0)
+
+ lw $t0, OT_LEN($a0)
+ lw $a0, OT_ADDR($a0)
+ lw $t1, SMD_HEAD_PVERTS($a2)
+ lw $t2, SMD_HEAD_PNORMS($a2)
+ lw $t3, SMD_HEAD_PPRIMS($a2)
+
+.sort_loop:
+
+ nop
+ lw $a3, 0($t3) # Get primitive ID word
+ move $t4, $t3
+
+ beqz $a3, .exit # Check if terminator (just zero)
+ addiu $t4, 4
+
+ lhu $t5, 0( $t4 ) # Load vertices
+ lhu $t6, 2( $t4 )
+ lhu $t7, 4( $t4 )
+ sll $t5, 3
+ sll $t6, 3
+ sll $t7, 3
+ addu $t5, $t1
+ addu $t6, $t1
+ addu $t7, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+ lwc2 C2_VXY1, 0( $t6 )
+ lwc2 C2_VZ1 , 4( $t6 )
+ lwc2 C2_VXY2, 0( $t7 )
+ lwc2 C2_VZ2 , 4( $t7 )
+
+ srl $v1, $a3, 24 # Get primitive size
+ addu $t3, $v1 # Step main pointer to next primitive
+
+ RTPT
+
+ cfc2 $v0, C2_FLAG # Get GTE flag value
+ nop
+
+ bltz $v0, .skip_prim # Skip primitive if Z overflow
+ andi $v0, $a3, 0x3
+
+ NCLIP # Backface culling
+
+ srl $v1, $a3, 12
+ andi $v1, 1
+
+ bnez $v1, .no_culling
+ nop
+
+ mfc2 $v1, C2_MAC0
+ nop
+ bltz $v1, .skip_prim
+ nop
+
+.no_culling:
+
+ beq $v0, 0x1, .prim_tri # If primitive is a triangle
+ nop
+ beq $v0, 0x2, .prim_quad # If primitive is a quad
+ nop
+
+ b .skip_prim
+ nop
+
+## Triangles
+
+.prim_tri: # Triangle processing
+
+ addiu $t4, 8 # Advance from indices
+
+ AVSZ3 # Calculate average Z
+
+ srl $v0, $t0, 16 # Get Z divisor from OT_LEN value
+ andi $v0, 0xff
+
+ mfc2 $t5, C2_OTZ # Get AVSZ3 result
+
+ sra $v1, $t0, 24 # Get Z offset from OT_LEN value
+
+ srl $t5, $v0 # Apply divisor and offset
+ sub $t5, $v1
+
+ blez $t5, .skip_prim # Skip primitive if less than zero
+ andi $v1, $t0, 0xffff
+ bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length
+ sll $t5, 2
+ addu $t5, $a0 # Append OTZ to OT address
+
+ ClipTestTri
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw
+ and $v0, $s2, $s0 # v2 & v0
+ beqz $v0, .do_draw
+ nop
+ b .skip_prim
+ nop
+
+.do_draw:
+
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F3_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud shaded
+ bnez $v0, .F3_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_textured
+ nop
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2000
+ or $v0, $v1
+
+ b .sort_F3_pri
+ sw $v0, POLYF3_rgbc( $a1 )
+
+.F3_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3000
+ or $v0, $at
+ .set at
+ sw $v0, POLYG3_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG3_rgbc1($a1)
+ b .sort_G3_pri
+ sw $v0, POLYG3_rgbc2($a1)
+
+.F3_textured:
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2400
+ or $v0, $v1
+ sw $v0, POLYFT3_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v0, 8( $t4 ) # Tpage + CLUT
+ nop
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT3_tpage( $a1 )
+ srl $v0, 16
+
+ b .sort_FT3_pri
+ sh $v0, POLYFT3_clut( $a1 )
+
+.F3_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F3_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2000
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF3_rgbc( $a1 )
+
+ b .sort_F3_pri
+ nop
+
+.F3_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT3_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT3_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2400
+ or $v0, $v1
+
+ b .sort_FT3_pri
+ sw $v0, POLYFT3_rgbc( $a1 )
+
+.F3_light_smt:
+
+ lhu $v0, 2($t4) # Load normals 1 and 2
+ lhu $v1, 4($t4)
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0($v0)
+ lwc2 C2_VZ1 , 4($v0)
+ #la $v0, _smd_cel_col
+ #lw $v0, 0($v0)
+ lwc2 C2_VXY2, 0($v1)
+ lwc2 C2_VZ2 , 4($v1)
+ mtc2 $v0, C2_RGB
+
+ swc2 C2_SXY0, POLYFT3_xy0($a1)
+ swc2 C2_SXY1, POLYFT3_xy1($a1)
+ swc2 C2_SXY2, POLYFT3_xy2($a1)
+
+ la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values
+ lw $v0, 0($v0)
+
+ NCT
+
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT3_tpage($a1)
+ srl $v1, $v0, 16
+ sh $v1, POLYFT3_clut($a1)
+
+ # Usable regs: v0, v1, at, t6, t7
+
+ .set noat
+
+ la $at, _smd_cel_param # Load cel shader parameters
+ lhu $at, 0($at)
+
+ mfc2 $t7, C2_RGB0 # Get first shaded color
+ andi $v1, $at, 0xff # Get U divisor value
+ andi $t7, 0xffff # Only keep R and G colors
+
+ andi $v0, $t7, 0xff # U0
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv0($a1)
+ srl $v0, $t7, 8 # V0
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv0+1($a1)
+
+ mfc2 $t7, C2_RGB1
+ andi $v1, $at, 0xff
+ andi $t7, 0xffff
+ andi $v0, $t7, 0xff # U1
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv1($a1)
+ srl $v0, $t7, 8 # V1
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv1+1($a1)
+
+ mfc2 $t7, C2_RGB2
+ andi $v1, $at, 0xff
+ andi $t7, 0xffff
+ andi $v0, $t7, 0xff # U2
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv2($a1)
+ srl $v0, $t7, 8 # V2
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT3_uv2+1($a1)
+
+ la $v0, _smd_cel_col
+ lw $v0, 0($v0)
+ lui $v1, 0x2600
+ or $v0, $v1
+ sw $v0, POLYFT3_rgbc($a1)
+
+ lw $t7, 8($t4)
+ addiu $t4, 12
+
+ lui $v1, 0x0700
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $v1, $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $v1
+ sw $v0, 0($t5)
+
+ lui $v0, 0x8000
+ or $a1, $v0
+ addiu $a1, POLYFT3_len
+
+ .set at
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_light_tex_smt
+ nop
+
+ #swc2 C2_RGB0, POLYG3_rgbc0( $a1 )
+ #swc2 C2_RGB1, POLYG3_rgbc1( $a1 )
+ #swc2 C2_RGB2, POLYG3_rgbc2( $a1 )
+
+ lui $v0, 0x2000
+ or $t7, $v0
+ b .sort_F3_pri
+ sw $t7, POLYF3_rgbc($a1)
+
+.F3_light_tex_smt:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT3_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT3_clut( $a1 )
+
+ lui $v1, 0x2400
+ or $t7, $v1
+
+ b .sort_FT3_pri
+ sw $t7, POLYFT3_rgbc( $a1 )
+
+.sort_F3_pri:
+
+ swc2 C2_SXY0, POLYF3_xy0($a1)
+ swc2 C2_SXY1, POLYF3_xy1($a1)
+ swc2 C2_SXY2, POLYF3_xy2($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0500
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF3_len
+
+.sort_FT3_pri:
+
+ swc2 C2_SXY0, POLYFT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYFT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYFT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT3_len
+
+.sort_G3_pri:
+
+ swc2 C2_SXY0, POLYG3_xy0( $a1 )
+ swc2 C2_SXY1, POLYG3_xy1( $a1 )
+ swc2 C2_SXY2, POLYG3_xy2( $a1 )
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG3_len
+
+.sort_GT3_pri:
+
+ swc2 C2_SXY0, POLYGT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYGT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYGT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT3_len
+
+## Quads
+
+.prim_quad: # Quad processing
+
+ mfc2 $t6, C2_SXY0 # Retrieve first projected vertex
+
+ lhu $t5, 6( $t4 ) # Project the last vertex
+ addiu $t4, 8
+ sll $t5, 3
+ addu $t5, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+
+ nRTPS
+
+ cfc2 $v1, C2_FLAG # Get GTE flag value
+
+ srl $v0, $t0, 16 # Get Z divisor from OT_LEN value
+
+ bltz $v1, .skip_prim
+ nop
+
+ AVSZ4
+
+ andi $v0, 0xff
+
+ mfc2 $t5, C2_OTZ
+
+ sra $v1, $t0, 24 # Get Z offset from OT_LEN value
+
+ srl $t5, $v0 # Apply divisor and offset
+ sub $t5, $v1
+
+ blez $t5, .skip_prim # Skip primitive if less than zero
+ andi $v1, $t0, 0xffff
+ bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length
+ sll $t5, 2
+ addu $t5, $a0 # Append OTZ to OT address
+
+ # no touch:
+ # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0)
+
+ ClipTestQuad
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s2, $s3 # v2 & v3
+ beqz $v0, .do_draw_q
+ and $v0, $s3, $s0 # v3 & v0
+ beqz $v0, .do_draw_q
+ and $v0, $s0, $s2 # v0 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s3 # v1 & v3
+ beqz $v0, .do_draw_q
+ nop
+ b .skip_prim
+ nop
+
+.do_draw_q:
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F4_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud quad
+ bnez $v0, .F4_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured quad
+ bnez $v0, .F4_textured
+ nop
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2800
+ or $v0, $v1
+
+ b .sort_F4_pri
+ sw $v0, POLYF4_rgbc($a1)
+
+.F4_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3800
+ or $v0, $at
+ .set at
+ sw $v0, POLYG4_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG4_rgbc1($a1)
+ lw $v1, 12($t4)
+ sw $v0, POLYG4_rgbc2($a1)
+ b .sort_G4_pri
+ sw $v1, POLYG4_rgbc3($a1)
+
+.F4_textured:
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2c00
+ or $v0, $v1
+ sw $v0, POLYFT4_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0($t4) # Load texture coordinates
+ lhu $v1, 2($t4)
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_FT4_pri
+ sh $v0, POLYFT4_clut($a1)
+
+.F4_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F4_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2800
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF4_rgbc( $a1 )
+
+ b .sort_F4_pri
+ nop
+
+.F4_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT4_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2c00
+ or $v0, $v1
+
+ b .sort_FT4_pri
+ nop
+ sw $v0, POLYFT4_rgbc( $a1 )
+
+.F4_light_smt:
+
+ lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ lhu $v1, 4( $t4 )
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0( $v0 )
+ lwc2 C2_VZ1 , 4( $v0 )
+ lwc2 C2_VXY2, 0( $v1 )
+ lwc2 C2_VZ2 , 4( $v1 )
+
+ sw $t6, POLYFT4_xy0($a1)
+ swc2 C2_SXY0, POLYFT4_xy1($a1)
+ swc2 C2_SXY1, POLYFT4_xy2($a1)
+ swc2 C2_SXY2, POLYFT4_xy3($a1)
+
+ la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values
+ lw $v0, 0($v0)
+
+ NCT
+
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT4_tpage($a1)
+ srl $v1, $v0, 16
+ sh $v1, POLYFT4_clut($a1)
+
+ # Usable regs: v0, v1, at, t7
+
+ .set noat
+
+ la $at, _smd_cel_param # Load cel shader parameters
+ lhu $at, 0($at)
+
+ mfc2 $t7, C2_RGB0
+ andi $v1, $at, 0xff # Get U divisor value
+ andi $t7, 0xffff # Only keep R and G colors
+
+ andi $v0, $t7, 0xff # U0
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv0($a1)
+ srl $v0, $t7, 8 # V0
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv0+1($a1)
+
+ mfc2 $t7, C2_RGB1
+ andi $v1, $at, 0xff
+ andi $t7, 0xffff
+ andi $v0, $t7, 0xff # U1
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv1($a1)
+ srl $v0, $t7, 8 # V1
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv1+1($a1)
+
+ mfc2 $t7, C2_RGB2
+ andi $v1, $at, 0xff
+ andi $t7, 0xffff
+ andi $v0, $t7, 0xff # U2
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv2($a1)
+ srl $v0, $t7, 8 # V2
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv2+1($a1)
+
+ la $v0, _smd_cel_col
+ lw $v0, 0($v0)
+ lui $v1, 0x2E00
+ or $v0, $v1
+ sw $v0, POLYFT4_rgbc($a1)
+
+ lw $t7, 8($t4)
+
+ lhu $v0, 6($t4) # Load normal 3
+ addiu $t4, 12
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ nNCS
+
+ mfc2 $s0, C2_RGB2
+ andi $v1, $at, 0xff
+ andi $s0, 0xffff
+ andi $v0, $s0, 0xff # U3
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv3($a1)
+ srl $v0, $s0, 8 # V3
+ srl $v1, $at, 8
+ srl $v0, $v1
+ sb $v0, POLYFT4_uv3+1($a1)
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $v1, $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $v1
+ sw $v0, 0($t5)
+
+ lui $v0, 0x8000
+ or $a1, $v0
+ addiu $a1, POLYFT4_len
+
+ .set at
+
+ andi $v0, $a3, 0x20 # Textured quad
+ bnez $v0, .F4_light_tex_smt
+ nop
+
+ lui $v0, 0x2800
+ or $t7, $v0
+ b .sort_F4_pri
+ sw $t7, POLYF4_rgbc($a1)
+
+.F4_light_tex_smt:
+
+ lhu $v0, 0($t4) # Load texture coordinates
+ lhu $v1, 2($t4)
+ sh $v0, POLYFT4_uv0($a1)
+ lhu $v0, 4($t4)
+ sh $v1, POLYFT4_uv1($a1)
+ lhu $v1, 6($t4)
+ sh $v0, POLYFT4_uv2($a1)
+ sh $v1, POLYFT4_uv3($a1)
+
+ lw $v1, 8($t4)
+
+ lui $v0, 0x2E00
+ or $t7, $v0
+ sw $t7, POLYFT4_rgbc($a1)
+
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage($a1)
+ srl $v0, $v1, 16
+
+ b .sort_FT4_pri
+ sh $v0, POLYFT4_clut($a1)
+
+.sort_F4_pri:
+
+ sw $t6, POLYF4_xy0($a1)
+ swc2 C2_SXY0, POLYF4_xy1($a1)
+ swc2 C2_SXY1, POLYF4_xy2($a1)
+ swc2 C2_SXY2, POLYF4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0600
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF4_len
+
+.sort_FT4_pri:
+
+ sw $t6, POLYFT4_xy0($a1)
+ swc2 C2_SXY0, POLYFT4_xy1($a1)
+ swc2 C2_SXY1, POLYFT4_xy2($a1)
+ swc2 C2_SXY2, POLYFT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT4_len
+
+.sort_G4_pri:
+
+ sw $t6, POLYG4_xy0($a1)
+ swc2 C2_SXY0, POLYG4_xy1($a1)
+ swc2 C2_SXY1, POLYG4_xy2($a1)
+ swc2 C2_SXY2, POLYG4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG4_len
+
+.sort_GT4_pri:
+
+ sw $t6, POLYGT4_xy0($a1)
+ swc2 C2_SXY0, POLYGT4_xy1($a1)
+ swc2 C2_SXY1, POLYGT4_xy2($a1)
+ swc2 C2_SXY2, POLYGT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0c00
+ lw $v0, 0($t5)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($t5)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($t5)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT4_len
+
+.skip_prim:
+
+ b .sort_loop
+ nop
+
+.exit:
+
+ lw $s0, 0( $sp )
+ lw $s1, 4( $sp )
+ lw $s2, 8( $sp )
+ lw $s3, 12( $sp )
+ addiu $sp, 16
+ jr $ra
+ move $v0, $a1
+
+.section .bss._smd_cel_col, "w"
+.comm _smd_cel_col, 4, 4 # STP shading polygon color
+
+.section .bss._smd_cel_param, "w"
+.comm _smd_cel_param, 4, 4 # U divisor, V divisor, shading clip
+
+.section .bss._smd_cel_tpage, "w"
+.comm _smd_cel_tpage, 4, 4 # CEL shader texture page & CLUT
diff --git a/libpsn00b/smd/smd_flat.s b/libpsn00b/smd/smd_flat.s
new file mode 100644
index 0000000..843b8d3
--- /dev/null
+++ b/libpsn00b/smd/smd_flat.s
@@ -0,0 +1,833 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set noreorder
+
+.include "gtereg.inc"
+.include "inline_s.inc"
+.include "smd/smd_s.inc"
+
+.section .text.smdSortModelFlat, "ax", @progbits
+.global smdSortModelFlat
+.type smdSortModelFlat, @function
+
+smdSortModelFlat:
+ # a0 - Pointer SC_OT structure
+ # a1 - Pointer to next primitive
+ # a2 - Pointer to SMD data address
+ # v0 - New pointer of primitive buffer (return)
+
+ addiu $sp, -16
+ sw $s0, 0( $sp )
+ sw $s1, 4( $sp )
+ sw $s2, 8( $sp )
+ sw $s3, 12( $sp )
+
+ la $v0, _sc_clip
+ lw $t8, 0($v0)
+ lw $t9, 4($v0)
+
+ lw $t1, SMD_HEAD_PVERTS( $a2 )
+ lw $t2, SMD_HEAD_PNORMS( $a2 )
+ lw $t3, SMD_HEAD_PPRIMS( $a2 )
+
+.sort_loop:
+
+ nop
+ lw $a3, 0($t3) # Get primitive ID word
+ move $t4, $t3
+
+ beqz $a3, .exit # Check if terminator (just zero)
+ addiu $t4, 4
+
+ lhu $t5, 0( $t4 ) # Load vertices
+ lhu $t6, 2( $t4 )
+ lhu $t7, 4( $t4 )
+ sll $t5, 3
+ sll $t6, 3
+ sll $t7, 3
+ addu $t5, $t1
+ addu $t6, $t1
+ addu $t7, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+ lwc2 C2_VXY1, 0( $t6 )
+ lwc2 C2_VZ1 , 4( $t6 )
+ lwc2 C2_VXY2, 0( $t7 )
+ lwc2 C2_VZ2 , 4( $t7 )
+
+ srl $v1, $a3, 24 # Get primitive size
+ addu $t3, $v1 # Step main pointer to next primitive
+
+ RTPT
+
+ cfc2 $v0, C2_FLAG # Get GTE flag value
+ nop
+
+ bltz $v0, .skip_prim # Skip primitive if Z overflow
+ nop
+
+ #NCLIP # Backface culling
+
+ #mfc2 $v1, C2_MAC0
+
+ andi $v0, $a3, 0x3
+
+ #bltz $v1, .skip_prim
+ #nop
+
+ beq $v0, 0x1, .prim_tri # If primitive is a triangle
+ nop
+ beq $v0, 0x2, .prim_quad # If primitive is a quad
+ nop
+
+ b .skip_prim
+ nop
+
+## Triangles
+
+.prim_tri: # Triangle processing
+
+ addiu $t4, 8 # Advance from indices
+
+ #AVSZ3 # Calculate average Z
+
+ ClipTestTri
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw
+ and $v0, $s2, $s0 # v2 & v0
+ beqz $v0, .do_draw
+ nop
+ b .skip_prim
+ nop
+
+.do_draw:
+
+ #srl $v0, $a3, 2 # Lighting enabled?
+ #andi $v0, 0x3
+ #bnez $v0, .F3_light
+ #nop
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_textured
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud shaded
+ bnez $v0, .F3_gouraud
+ nop
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2000
+ or $v0, $v1
+
+ b .sort_F3_pri
+ sw $v0, POLYF3_rgbc( $a1 )
+
+.F3_textured:
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2400
+ or $v0, $v1
+ sw $v0, POLYFT3_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v0, 8( $t4 ) # Tpage + CLUT
+ nop
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT3_tpage( $a1 )
+ srl $v0, 16
+
+ b .sort_FT3_pri
+ sh $v0, POLYFT3_clut( $a1 )
+
+.F3_gouraud:
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3000
+ or $v0, $at
+ .set at
+ sw $v0, POLYG3_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG3_rgbc1($a1)
+ b .sort_G3_pri
+ sw $v0, POLYG3_rgbc2($a1)
+
+#.F3_light:
+
+ #lhu $v0, 0( $t4 ) # Load normal 0
+
+ #srl $v1, $a3, 2
+ #andi $v1, $v1, 0x3
+
+ #sll $v0, 3
+ #addu $v0, $t2
+ #lwc2 C2_VXY0, 0( $v0 )
+ #lwc2 C2_VZ0 , 4( $v0 )
+
+ #beq $v1, 0x2, .F3_light_smt
+ #nop
+
+ #lw $v0, 4( $t4 )
+ #lui $v1, 0x2000
+ #or $v0, $v1
+ #mtc2 $v0, C2_RGB
+
+ #addiu $t4, 8
+ #nop
+
+ #NCCS
+
+ #andi $v0, $a3, 0x20 # Textured triangle
+ #bnez $v0, .F3_light_tex
+ #nop
+
+ #swc2 C2_RGB2, POLYF3_rgbc( $a1 )
+
+ #b .sort_F3_pri
+ #nop
+
+#.F3_light_tex:
+
+ #lhu $v0, 0( $t4 ) # Load texture coordinates
+ #lhu $v1, 2( $t4 )
+ #sh $v0, POLYFT3_uv0( $a1 )
+ #lhu $v0, 4( $t4 )
+ #sh $v1, POLYFT3_uv1( $a1 )
+ #sh $v0, POLYFT3_uv2( $a1 )
+
+ #lw $v1, 8( $t4 )
+ #nop
+ #andi $v0, $v1, 0xffff
+ #sh $v0, POLYFT3_tpage( $a1 )
+ #srl $v0, $v1, 16
+ #sh $v0, POLYFT3_clut( $a1 )
+
+ #mfc2 $v0, C2_RGB2
+ #lui $v1, 0x2400
+ #or $v0, $v1
+
+ #b .sort_FT3_pri
+ #sw $v0, POLYFT3_rgbc( $a1 )
+
+#.F3_light_smt:
+
+ #lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ #lhu $v1, 4( $t4 )
+ #sll $v0, 3
+ #sll $v1, 3
+ #addu $v0, $t2
+ #addu $v1, $t2
+ #lwc2 C2_VXY1, 0( $v0 )
+ #lwc2 C2_VZ1 , 4( $v0 )
+ #lw $v0, 8( $t4 )
+ #lwc2 C2_VXY2, 0( $v1 )
+ #lwc2 C2_VZ2 , 4( $v1 )
+ #lui $v1, 0x3000 # Load color
+ #or $v0, $v1
+ #mtc2 $v0, C2_RGB
+
+ #addiu $t4, 12
+ #nop
+
+ #NCCT
+
+ #andi $v0, $a3, 0x20 # Textured triangle
+ #bnez $v0, .F3_light_tex_smt
+ #nop
+
+ #swc2 C2_RGB0, POLYG3_rgbc0( $a1 )
+ #swc2 C2_RGB1, POLYG3_rgbc1( $a1 )
+ #swc2 C2_RGB2, POLYG3_rgbc2( $a1 )
+
+ #b .sort_G3_pri
+ #nop
+
+# .F3_light_tex_smt:
+
+ # lhu $v0, 0( $t4 ) # Load texture coordinates
+ # lhu $v1, 2( $t4 )
+ # sh $v0, POLYGT3_uv0( $a1 )
+ # lhu $v0, 4( $t4 )
+ # sh $v1, POLYGT3_uv1( $a1 )
+ # sh $v0, POLYGT3_uv2( $a1 )
+
+ # lw $v1, 8( $t4 )
+ # nop
+ # andi $v0, $v1, 0xffff
+ # sh $v0, POLYGT3_tpage( $a1 )
+ # srl $v0, $v1, 16
+ # sh $v0, POLYGT3_clut( $a1 )
+
+ # mfc2 $v0, C2_RGB0
+ # lui $v1, 0x3400
+ # or $v0, $v1
+
+ # swc2 C2_RGB1, POLYGT3_rgbc1( $a1 )
+ # swc2 C2_RGB2, POLYGT3_rgbc2( $a1 )
+
+ # b .sort_GT3_pri
+ # sw $v0, POLYGT3_rgbc0( $a1 )
+
+.sort_F3_pri:
+
+ swc2 C2_SXY0, POLYF3_xy0($a1)
+ swc2 C2_SXY1, POLYF3_xy1($a1)
+ swc2 C2_SXY2, POLYF3_xy2($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0500
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF3_len
+
+.sort_FT3_pri:
+
+ swc2 C2_SXY0, POLYFT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYFT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYFT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT3_len
+
+.sort_G3_pri:
+
+ swc2 C2_SXY0, POLYG3_xy0( $a1 )
+ swc2 C2_SXY1, POLYG3_xy1( $a1 )
+ swc2 C2_SXY2, POLYG3_xy2( $a1 )
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG3_len
+
+.sort_GT3_pri:
+
+ swc2 C2_SXY0, POLYGT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYGT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYGT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT3_len
+
+## Quads
+
+.prim_quad: # Quad processing
+
+ mfc2 $t6, C2_SXY0 # Retrieve first projected vertex
+
+ lhu $t5, 6( $t4 ) # Project the last vertex
+ addiu $t4, 8
+ sll $t5, 3
+ addu $t5, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+
+ nRTPS
+
+ cfc2 $v1, C2_FLAG # Get GTE flag value
+
+ nop
+
+ bltz $v1, .skip_prim
+ nop
+
+ ClipTestQuad
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s2, $s3 # v2 & v3
+ beqz $v0, .do_draw_q
+ and $v0, $s3, $s0 # v3 & v0
+ beqz $v0, .do_draw_q
+ and $v0, $s0, $s2 # v0 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s3 # v1 & v3
+ beqz $v0, .do_draw_q
+ nop
+ b .skip_prim
+ nop
+
+.do_draw_q:
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F4_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud quad
+ bnez $v0, .F4_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured quad
+ bnez $v0, .F4_textured
+ nop
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2800
+ or $v0, $v1
+
+ b .sort_F4_pri
+ sw $v0, POLYF4_rgbc($a1)
+
+.F4_textured:
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2c00
+ or $v0, $v1
+ sw $v0, POLYFT4_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0($t4) # Load texture coordinates
+ lhu $v1, 2($t4)
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_FT4_pri
+ sh $v0, POLYFT4_clut($a1)
+
+.F4_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3800
+ or $v0, $at
+ .set at
+ sw $v0, POLYG4_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG4_rgbc1($a1)
+ lw $v1, 12($t4)
+ sw $v0, POLYG4_rgbc2($a1)
+ b .sort_G4_pri
+ sw $v1, POLYG4_rgbc3($a1)
+
+
+.F4_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F4_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2800
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF4_rgbc( $a1 )
+
+ b .sort_F4_pri
+ nop
+
+.F4_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT4_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2c00
+ or $v0, $v1
+
+ b .sort_FT4_pri
+ sw $v0, POLYFT4_rgbc( $a1 )
+
+.F4_light_smt:
+
+ lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ lhu $v1, 4( $t4 )
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0( $v0 )
+ lwc2 C2_VZ1 , 4( $v0 )
+ lwc2 C2_VXY2, 0( $v1 )
+ lwc2 C2_VZ2 , 4( $v1 )
+
+ lw $v0, 8( $t4 )
+ lui $v1, 0x3800 # Load color
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ nNCCT
+
+ lhu $v0, 6( $t4 ) # Load normal 3
+
+ addiu $t4, 12
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex_smt
+ nop
+
+ swc2 C2_RGB0, POLYG4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYG4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYG4_rgbc2( $a1 )
+
+ nNCCS
+
+ swc2 C2_RGB2, POLYG4_rgbc3( $a1 )
+
+ b .sort_G4_pri
+ nop
+
+.F4_light_tex_smt:
+
+ mfc2 $v0, C2_RGB0
+ lui $v1, 0x3400
+ or $v0, $v1
+ sw $v0, POLYGT4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYGT4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYGT4_rgbc2( $a1 )
+
+ NCCS
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYGT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYGT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYGT4_uv2( $a1 )
+ sh $v1, POLYGT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ swc2 C2_RGB2, POLYGT4_rgbc3( $a1 )
+
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYGT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_GT4_pri
+ sh $v0, POLYGT4_clut( $a1 )
+
+.sort_F4_pri:
+
+ sw $t6, POLYF4_xy0($a1)
+ swc2 C2_SXY0, POLYF4_xy1($a1)
+ swc2 C2_SXY1, POLYF4_xy2($a1)
+ swc2 C2_SXY2, POLYF4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0600
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF4_len
+
+.sort_FT4_pri:
+
+ sw $t6, POLYFT4_xy0($a1)
+ swc2 C2_SXY0, POLYFT4_xy1($a1)
+ swc2 C2_SXY1, POLYFT4_xy2($a1)
+ swc2 C2_SXY2, POLYFT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT4_len
+
+.sort_G4_pri:
+
+ sw $t6, POLYG4_xy0($a1)
+ swc2 C2_SXY0, POLYG4_xy1($a1)
+ swc2 C2_SXY1, POLYG4_xy2($a1)
+ swc2 C2_SXY2, POLYG4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG4_len
+
+.sort_GT4_pri:
+
+ sw $t6, POLYGT4_xy0($a1)
+ swc2 C2_SXY0, POLYGT4_xy1($a1)
+ swc2 C2_SXY1, POLYGT4_xy2($a1)
+ swc2 C2_SXY2, POLYGT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0c00
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT4_len
+
+.skip_prim:
+
+ b .sort_loop
+ nop
+
+.exit:
+
+ lw $s0, 0( $sp )
+ lw $s1, 4( $sp )
+ lw $s2, 8( $sp )
+ lw $s3, 12( $sp )
+ addiu $sp, 16
+ jr $ra
+ move $v0, $a1
diff --git a/libpsn00b/smd/smdparser.s b/libpsn00b/smd/smdparser.s
new file mode 100644
index 0000000..656e509
--- /dev/null
+++ b/libpsn00b/smd/smdparser.s
@@ -0,0 +1,130 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set noreorder
+
+.include "smd/smd_s.inc"
+
+.set SMD_PRI_ID, 0
+.set SMD_PRI_v0, 4
+.set SMD_PRI_v1, 6
+.set SMD_PRI_v2, 8
+.set SMD_PRI_v3, 10
+.set SMD_PRI_n0, 12
+.set SMD_PRI_n1, 14
+.set SMD_PRI_n2, 16
+.set SMD_PRI_n3, 18
+.set SMD_PRI_rgbc0, 20
+.set SMD_PRI_rgbc1, 24
+.set SMD_PRI_rgbc2, 28
+.set SMD_PRI_rgbc3, 32
+.set SMD_PRI_tuv0, 36
+.set SMD_PRI_tuv1, 38
+.set SMD_PRI_tuv2, 40
+.set SMD_PRI_tuv3, 42
+.set SMD_PRI_tpage, 44
+.set SMD_PRI_clut, 46
+
+.section .text.OpenSMD, "ax", @progbits
+.global OpenSMD
+.type OpenSMD, @function
+
+OpenSMD:
+ lw $v0, SMD_HEAD_ID($a0)
+ li $v1, 0x01444d53
+
+ bne $v0, $v1, .not_smd
+ nop
+
+ lw $v0, SMD_HEAD_PPRIMS($a0)
+ la $v1, _smd_parse_addr
+ sw $v0, 0($v1)
+
+ jr $ra
+ lhu $v0, SMD_HEAD_NPRIMS($a0)
+
+.not_smd:
+ jr $ra
+ move $v0, $0
+
+.section .text.ReadSMD, "ax", @progbits
+.global ReadSMD
+.type ReadSMD, @function
+
+ReadSMD:
+ la $v0, _smd_parse_addr
+ lw $v0, 0($v0)
+ nop
+
+ lw $a2, 0($v0) # Load primitive ID
+ addiu $a1, $v0, 4
+
+ sw $a2, SMD_PRI_ID($a0)
+
+ beqz $a2, $end_prim
+ nop
+
+ srl $v1, $a2, 24 # Get primitive size
+ addu $v0, $v1
+ la $v1, _smd_parse_addr
+ sw $v0, 0($v1)
+
+ lw $v0, 0($a1) # Copy vertex coords
+ lw $v1, 4($a1)
+ sw $v0, SMD_PRI_v0($a0)
+ sw $v1, SMD_PRI_v2($a0)
+ addiu $a1, 8
+
+ srl $v0, $a2, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, $light
+ nop
+
+ b $no_light
+ nop
+
+$light:
+ srl $v1, $a2, 2
+ lw $v0, 0($a1) # Copy vertex coords
+ andi $v1, 0x3
+ sw $v0, SMD_PRI_n0($a0)
+
+ bne $v1, 0x2, $light_flat
+ addiu $a1, 4
+
+ lw $v1, 0($a1)
+ addiu $a1, 4
+ sw $v1, SMD_PRI_n2($a0)
+
+$light_flat:
+$no_light:
+
+ lw $v0, 0($a1)
+ nop
+ sw $v0, SMD_PRI_rgbc0($a0)
+ addiu $a1, 4
+
+ srl $v0, $a2, 5
+ andi $v0, 0x1
+ beqz $v0, $not_textured
+ nop
+
+ lw $v0, 0($a1)
+ lw $v1, 4($a1)
+ sw $v0, SMD_PRI_tuv0($a0)
+ lw $v0, 8($a1)
+ sw $v1, SMD_PRI_tuv2($a0)
+ sw $v0, SMD_PRI_tpage($a0)
+
+$not_textured:
+
+ jr $ra
+ move $v0, $a0
+
+$end_prim:
+
+ jr $ra
+ move $v0, $0
+
+.section .bss._smd_parse_addr, "w"
+.comm _smd_parse_addr, 4, 4