aboutsummaryrefslogtreecommitdiff
path: root/libpsn00b/smd/smd_flat.S
diff options
context:
space:
mode:
authorXavier Del Campo Romero <xavi92@disroot.org>2025-07-05 02:34:11 +0200
committerXavier Del Campo Romero <xavi92@disroot.org>2025-07-05 02:34:11 +0200
commitbeb76e4dd362374b8f42cd971d394bba1074cd8d (patch)
tree3ea4cc342737afb9225c01160c92647ba66c78bd /libpsn00b/smd/smd_flat.S
parent5d9aa2d3dfc7d6e51c2eb942ab4cdbae5571a40a (diff)
downloadpsn00bsdk-fix-include.tar.gz
Replace .include with #includefix-include
For some reason, both mipsel-unknown-elf-gcc 8.2.0 and mipsel-non-elf 15.1.0 were unable to resolve .include assembler directives. As a workaround, it is still possible to use the preprocessor, and therefore the usual #include preprocessor directive. However, this requires the assembly files to use the uppercase .S file extension.
Diffstat (limited to 'libpsn00b/smd/smd_flat.S')
-rw-r--r--libpsn00b/smd/smd_flat.S833
1 files changed, 833 insertions, 0 deletions
diff --git a/libpsn00b/smd/smd_flat.S b/libpsn00b/smd/smd_flat.S
new file mode 100644
index 0000000..b9235e9
--- /dev/null
+++ b/libpsn00b/smd/smd_flat.S
@@ -0,0 +1,833 @@
+# PSn00bSDK .SMD model parser library
+# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed
+
+.set noreorder
+
+#include "gtereg.inc"
+#include "inline_s.inc"
+#include "smd/smd_s.inc"
+
+.section .text.smdSortModelFlat, "ax", @progbits
+.global smdSortModelFlat
+.type smdSortModelFlat, @function
+
+smdSortModelFlat:
+ # a0 - Pointer SC_OT structure
+ # a1 - Pointer to next primitive
+ # a2 - Pointer to SMD data address
+ # v0 - New pointer of primitive buffer (return)
+
+ addiu $sp, -16
+ sw $s0, 0( $sp )
+ sw $s1, 4( $sp )
+ sw $s2, 8( $sp )
+ sw $s3, 12( $sp )
+
+ la $v0, _sc_clip
+ lw $t8, 0($v0)
+ lw $t9, 4($v0)
+
+ lw $t1, SMD_HEAD_PVERTS( $a2 )
+ lw $t2, SMD_HEAD_PNORMS( $a2 )
+ lw $t3, SMD_HEAD_PPRIMS( $a2 )
+
+.sort_loop:
+
+ nop
+ lw $a3, 0($t3) # Get primitive ID word
+ move $t4, $t3
+
+ beqz $a3, .exit # Check if terminator (just zero)
+ addiu $t4, 4
+
+ lhu $t5, 0( $t4 ) # Load vertices
+ lhu $t6, 2( $t4 )
+ lhu $t7, 4( $t4 )
+ sll $t5, 3
+ sll $t6, 3
+ sll $t7, 3
+ addu $t5, $t1
+ addu $t6, $t1
+ addu $t7, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+ lwc2 C2_VXY1, 0( $t6 )
+ lwc2 C2_VZ1 , 4( $t6 )
+ lwc2 C2_VXY2, 0( $t7 )
+ lwc2 C2_VZ2 , 4( $t7 )
+
+ srl $v1, $a3, 24 # Get primitive size
+ addu $t3, $v1 # Step main pointer to next primitive
+
+ RTPT
+
+ cfc2 $v0, C2_FLAG # Get GTE flag value
+ nop
+
+ bltz $v0, .skip_prim # Skip primitive if Z overflow
+ nop
+
+ #NCLIP # Backface culling
+
+ #mfc2 $v1, C2_MAC0
+
+ andi $v0, $a3, 0x3
+
+ #bltz $v1, .skip_prim
+ #nop
+
+ beq $v0, 0x1, .prim_tri # If primitive is a triangle
+ nop
+ beq $v0, 0x2, .prim_quad # If primitive is a quad
+ nop
+
+ b .skip_prim
+ nop
+
+## Triangles
+
+.prim_tri: # Triangle processing
+
+ addiu $t4, 8 # Advance from indices
+
+ #AVSZ3 # Calculate average Z
+
+ ClipTestTri
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw
+ and $v0, $s2, $s0 # v2 & v0
+ beqz $v0, .do_draw
+ nop
+ b .skip_prim
+ nop
+
+.do_draw:
+
+ #srl $v0, $a3, 2 # Lighting enabled?
+ #andi $v0, 0x3
+ #bnez $v0, .F3_light
+ #nop
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F3_textured
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud shaded
+ bnez $v0, .F3_gouraud
+ nop
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2000
+ or $v0, $v1
+
+ b .sort_F3_pri
+ sw $v0, POLYF3_rgbc( $a1 )
+
+.F3_textured:
+
+ lw $v0, 0( $t4 ) # Flat color, no lighting
+ lui $v1, 0x2400
+ or $v0, $v1
+ sw $v0, POLYFT3_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT3_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT3_uv1( $a1 )
+ sh $v0, POLYFT3_uv2( $a1 )
+
+ lw $v0, 8( $t4 ) # Tpage + CLUT
+ nop
+ andi $v1, $v0, 0xffff
+ sh $v1, POLYFT3_tpage( $a1 )
+ srl $v0, 16
+
+ b .sort_FT3_pri
+ sh $v0, POLYFT3_clut( $a1 )
+
+.F3_gouraud:
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3000
+ or $v0, $at
+ .set at
+ sw $v0, POLYG3_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG3_rgbc1($a1)
+ b .sort_G3_pri
+ sw $v0, POLYG3_rgbc2($a1)
+
+#.F3_light:
+
+ #lhu $v0, 0( $t4 ) # Load normal 0
+
+ #srl $v1, $a3, 2
+ #andi $v1, $v1, 0x3
+
+ #sll $v0, 3
+ #addu $v0, $t2
+ #lwc2 C2_VXY0, 0( $v0 )
+ #lwc2 C2_VZ0 , 4( $v0 )
+
+ #beq $v1, 0x2, .F3_light_smt
+ #nop
+
+ #lw $v0, 4( $t4 )
+ #lui $v1, 0x2000
+ #or $v0, $v1
+ #mtc2 $v0, C2_RGB
+
+ #addiu $t4, 8
+ #nop
+
+ #NCCS
+
+ #andi $v0, $a3, 0x20 # Textured triangle
+ #bnez $v0, .F3_light_tex
+ #nop
+
+ #swc2 C2_RGB2, POLYF3_rgbc( $a1 )
+
+ #b .sort_F3_pri
+ #nop
+
+#.F3_light_tex:
+
+ #lhu $v0, 0( $t4 ) # Load texture coordinates
+ #lhu $v1, 2( $t4 )
+ #sh $v0, POLYFT3_uv0( $a1 )
+ #lhu $v0, 4( $t4 )
+ #sh $v1, POLYFT3_uv1( $a1 )
+ #sh $v0, POLYFT3_uv2( $a1 )
+
+ #lw $v1, 8( $t4 )
+ #nop
+ #andi $v0, $v1, 0xffff
+ #sh $v0, POLYFT3_tpage( $a1 )
+ #srl $v0, $v1, 16
+ #sh $v0, POLYFT3_clut( $a1 )
+
+ #mfc2 $v0, C2_RGB2
+ #lui $v1, 0x2400
+ #or $v0, $v1
+
+ #b .sort_FT3_pri
+ #sw $v0, POLYFT3_rgbc( $a1 )
+
+#.F3_light_smt:
+
+ #lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ #lhu $v1, 4( $t4 )
+ #sll $v0, 3
+ #sll $v1, 3
+ #addu $v0, $t2
+ #addu $v1, $t2
+ #lwc2 C2_VXY1, 0( $v0 )
+ #lwc2 C2_VZ1 , 4( $v0 )
+ #lw $v0, 8( $t4 )
+ #lwc2 C2_VXY2, 0( $v1 )
+ #lwc2 C2_VZ2 , 4( $v1 )
+ #lui $v1, 0x3000 # Load color
+ #or $v0, $v1
+ #mtc2 $v0, C2_RGB
+
+ #addiu $t4, 12
+ #nop
+
+ #NCCT
+
+ #andi $v0, $a3, 0x20 # Textured triangle
+ #bnez $v0, .F3_light_tex_smt
+ #nop
+
+ #swc2 C2_RGB0, POLYG3_rgbc0( $a1 )
+ #swc2 C2_RGB1, POLYG3_rgbc1( $a1 )
+ #swc2 C2_RGB2, POLYG3_rgbc2( $a1 )
+
+ #b .sort_G3_pri
+ #nop
+
+# .F3_light_tex_smt:
+
+ # lhu $v0, 0( $t4 ) # Load texture coordinates
+ # lhu $v1, 2( $t4 )
+ # sh $v0, POLYGT3_uv0( $a1 )
+ # lhu $v0, 4( $t4 )
+ # sh $v1, POLYGT3_uv1( $a1 )
+ # sh $v0, POLYGT3_uv2( $a1 )
+
+ # lw $v1, 8( $t4 )
+ # nop
+ # andi $v0, $v1, 0xffff
+ # sh $v0, POLYGT3_tpage( $a1 )
+ # srl $v0, $v1, 16
+ # sh $v0, POLYGT3_clut( $a1 )
+
+ # mfc2 $v0, C2_RGB0
+ # lui $v1, 0x3400
+ # or $v0, $v1
+
+ # swc2 C2_RGB1, POLYGT3_rgbc1( $a1 )
+ # swc2 C2_RGB2, POLYGT3_rgbc2( $a1 )
+
+ # b .sort_GT3_pri
+ # sw $v0, POLYGT3_rgbc0( $a1 )
+
+.sort_F3_pri:
+
+ swc2 C2_SXY0, POLYF3_xy0($a1)
+ swc2 C2_SXY1, POLYF3_xy1($a1)
+ swc2 C2_SXY2, POLYF3_xy2($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0500
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF3_len
+
+.sort_FT3_pri:
+
+ swc2 C2_SXY0, POLYFT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYFT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYFT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT3_len
+
+.sort_G3_pri:
+
+ swc2 C2_SXY0, POLYG3_xy0( $a1 )
+ swc2 C2_SXY1, POLYG3_xy1( $a1 )
+ swc2 C2_SXY2, POLYG3_xy2( $a1 )
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG3_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0700
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG3_len
+
+.sort_GT3_pri:
+
+ swc2 C2_SXY0, POLYGT3_xy0( $a1 )
+ swc2 C2_SXY1, POLYGT3_xy1( $a1 )
+ swc2 C2_SXY2, POLYGT3_xy2( $a1 )
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT3_len
+
+## Quads
+
+.prim_quad: # Quad processing
+
+ mfc2 $t6, C2_SXY0 # Retrieve first projected vertex
+
+ lhu $t5, 6( $t4 ) # Project the last vertex
+ addiu $t4, 8
+ sll $t5, 3
+ addu $t5, $t1
+ lwc2 C2_VXY0, 0( $t5 )
+ lwc2 C2_VZ0 , 4( $t5 )
+
+ nRTPS
+
+ cfc2 $v1, C2_FLAG # Get GTE flag value
+
+ nop
+
+ bltz $v1, .skip_prim
+ nop
+
+ ClipTestQuad
+
+ and $v0, $s0, $s1 # v0 & v1
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s2 # v1 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s2, $s3 # v2 & v3
+ beqz $v0, .do_draw_q
+ and $v0, $s3, $s0 # v3 & v0
+ beqz $v0, .do_draw_q
+ and $v0, $s0, $s2 # v0 & v2
+ beqz $v0, .do_draw_q
+ and $v0, $s1, $s3 # v1 & v3
+ beqz $v0, .do_draw_q
+ nop
+ b .skip_prim
+ nop
+
+.do_draw_q:
+
+ srl $v0, $a3, 2 # Lighting enabled?
+ andi $v0, 0x3
+ bnez $v0, .F4_light
+ nop
+
+ andi $v0, $a3, 0x10 # Gouraud quad
+ bnez $v0, .F4_gouraud
+ nop
+
+ andi $v0, $a3, 0x20 # Textured quad
+ bnez $v0, .F4_textured
+ nop
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2800
+ or $v0, $v1
+
+ b .sort_F4_pri
+ sw $v0, POLYF4_rgbc($a1)
+
+.F4_textured:
+
+ lw $v0, 0($t4)
+ lui $v1, 0x2c00
+ or $v0, $v1
+ sw $v0, POLYFT4_rgbc( $a1 )
+ addiu $t4, 4
+
+ lhu $v0, 0($t4) # Load texture coordinates
+ lhu $v1, 2($t4)
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_FT4_pri
+ sh $v0, POLYFT4_clut($a1)
+
+.F4_gouraud:
+
+ lw $v0, 0($t4)
+ lw $v1, 4($t4)
+ .set noat
+ lui $at, 0x3800
+ or $v0, $at
+ .set at
+ sw $v0, POLYG4_rgbc0($a1)
+ lw $v0, 8($t4)
+ sw $v1, POLYG4_rgbc1($a1)
+ lw $v1, 12($t4)
+ sw $v0, POLYG4_rgbc2($a1)
+ b .sort_G4_pri
+ sw $v1, POLYG4_rgbc3($a1)
+
+
+.F4_light:
+
+ lhu $v0, 0( $t4 ) # Load normal 0
+
+ srl $v1, $a3, 2
+ andi $v1, $v1, 0x3
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ beq $v1, 0x2, .F4_light_smt
+ nop
+
+ lw $v0, 4( $t4 )
+ lui $v1, 0x2800
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ addiu $t4, 8
+ nop
+
+ NCCS
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex
+ nop
+
+ swc2 C2_RGB2, POLYF4_rgbc( $a1 )
+
+ b .sort_F4_pri
+ nop
+
+.F4_light_tex:
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYFT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYFT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYFT4_uv2( $a1 )
+ sh $v1, POLYFT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ nop
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYFT4_tpage( $a1 )
+ srl $v0, $v1, 16
+ sh $v0, POLYFT4_clut( $a1 )
+
+ mfc2 $v0, C2_RGB2
+ lui $v1, 0x2c00
+ or $v0, $v1
+
+ b .sort_FT4_pri
+ sw $v0, POLYFT4_rgbc( $a1 )
+
+.F4_light_smt:
+
+ lhu $v0, 2( $t4 ) # Load normals 1 and 2
+ lhu $v1, 4( $t4 )
+ sll $v0, 3
+ sll $v1, 3
+ addu $v0, $t2
+ addu $v1, $t2
+ lwc2 C2_VXY1, 0( $v0 )
+ lwc2 C2_VZ1 , 4( $v0 )
+ lwc2 C2_VXY2, 0( $v1 )
+ lwc2 C2_VZ2 , 4( $v1 )
+
+ lw $v0, 8( $t4 )
+ lui $v1, 0x3800 # Load color
+ or $v0, $v1
+ mtc2 $v0, C2_RGB
+
+ nNCCT
+
+ lhu $v0, 6( $t4 ) # Load normal 3
+
+ addiu $t4, 12
+
+ sll $v0, 3
+ addu $v0, $t2
+ lwc2 C2_VXY0, 0( $v0 )
+ lwc2 C2_VZ0 , 4( $v0 )
+
+ andi $v0, $a3, 0x20 # Textured triangle
+ bnez $v0, .F4_light_tex_smt
+ nop
+
+ swc2 C2_RGB0, POLYG4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYG4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYG4_rgbc2( $a1 )
+
+ nNCCS
+
+ swc2 C2_RGB2, POLYG4_rgbc3( $a1 )
+
+ b .sort_G4_pri
+ nop
+
+.F4_light_tex_smt:
+
+ mfc2 $v0, C2_RGB0
+ lui $v1, 0x3400
+ or $v0, $v1
+ sw $v0, POLYGT4_rgbc0( $a1 )
+ swc2 C2_RGB1, POLYGT4_rgbc1( $a1 )
+ swc2 C2_RGB2, POLYGT4_rgbc2( $a1 )
+
+ NCCS
+
+ lhu $v0, 0( $t4 ) # Load texture coordinates
+ lhu $v1, 2( $t4 )
+ sh $v0, POLYGT4_uv0( $a1 )
+ lhu $v0, 4( $t4 )
+ sh $v1, POLYGT4_uv1( $a1 )
+ lhu $v1, 6( $t4 )
+ sh $v0, POLYGT4_uv2( $a1 )
+ sh $v1, POLYGT4_uv3( $a1 )
+
+ lw $v1, 8( $t4 )
+ swc2 C2_RGB2, POLYGT4_rgbc3( $a1 )
+
+ andi $v0, $v1, 0xffff
+ sh $v0, POLYGT4_tpage( $a1 )
+ srl $v0, $v1, 16
+
+ b .sort_GT4_pri
+ sh $v0, POLYGT4_clut( $a1 )
+
+.sort_F4_pri:
+
+ sw $t6, POLYF4_xy0($a1)
+ swc2 C2_SXY0, POLYF4_xy1($a1)
+ swc2 C2_SXY1, POLYF4_xy2($a1)
+ swc2 C2_SXY2, POLYF4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYF4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0600
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYF4_len
+
+.sort_FT4_pri:
+
+ sw $t6, POLYFT4_xy0($a1)
+ swc2 C2_SXY0, POLYFT4_xy1($a1)
+ swc2 C2_SXY1, POLYFT4_xy2($a1)
+ swc2 C2_SXY2, POLYFT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYFT4_len
+
+.sort_G4_pri:
+
+ sw $t6, POLYG4_xy0($a1)
+ swc2 C2_SXY0, POLYG4_xy1($a1)
+ swc2 C2_SXY1, POLYG4_xy2($a1)
+ swc2 C2_SXY2, POLYG4_xy3($a1)
+
+ la $v0, _smd_tpage_base
+ lhu $v0, 0($v0)
+ srl $v1, $a3, 6 # Get blend mode
+ andi $v1, 0x3
+ sll $v1, 5
+ or $v0, $v1
+ lui $v1, 0xe100
+ or $v0, $v1
+ sw $v0, POLYG4_tpage($a1) # Store TPage
+
+ .set noat
+
+ lui $v1, 0x0900
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYG4_len
+
+.sort_GT4_pri:
+
+ sw $t6, POLYGT4_xy0($a1)
+ swc2 C2_SXY0, POLYGT4_xy1($a1)
+ swc2 C2_SXY1, POLYGT4_xy2($a1)
+ swc2 C2_SXY2, POLYGT4_xy3($a1)
+
+ .set noat
+
+ lui $v1, 0x0c00
+ lw $v0, 0($a0)
+ lui $at, 0xff00
+ and $v1, $at
+ lui $at, 0x00ff
+ or $at, 0xffff
+ and $v0, $at
+ or $v1, $v0
+ sw $v1, 0($a1)
+ lw $v0, 0($a0)
+ and $a1, $at
+ lui $at, 0xff00
+ and $v0, $at
+ or $v0, $a1
+ sw $v0, 0($a0)
+
+ .set at
+
+ lui $v0, 0x8000
+ or $a1, $v0
+
+ b .sort_loop
+ addiu $a1, POLYGT4_len
+
+.skip_prim:
+
+ b .sort_loop
+ nop
+
+.exit:
+
+ lw $s0, 0( $sp )
+ lw $s1, 4( $sp )
+ lw $s2, 8( $sp )
+ lw $s3, 12( $sp )
+ addiu $sp, 16
+ jr $ra
+ move $v0, $a1