From beb76e4dd362374b8f42cd971d394bba1074cd8d Mon Sep 17 00:00:00 2001 From: Xavier Del Campo Romero Date: Sat, 5 Jul 2025 02:34:11 +0200 Subject: Replace .include with #include For some reason, both mipsel-unknown-elf-gcc 8.2.0 and mipsel-non-elf 15.1.0 were unable to resolve .include assembler directives. As a workaround, it is still possible to use the preprocessor, and therefore the usual #include preprocessor directive. However, this requires the assembly files to use the uppercase .S file extension. --- examples/demos/n00bdemo/plasmagen.S | 181 +++++ examples/demos/n00bdemo/plasmagen.s | 181 ----- examples/graphics/tilesasm/drawtiles.S | 272 ++++++++ examples/graphics/tilesasm/drawtiles.s | 272 -------- examples/lowlevel/cartrom/CMakeLists.txt | 2 +- examples/lowlevel/cartrom/rom.S | 402 +++++++++++ examples/lowlevel/cartrom/rom.s | 402 ----------- indev/psn00bdbg-mk2/monitor/sdkinst.S | 65 ++ indev/psn00bdbg-mk2/monitor/sdkinst.s | 65 -- indev/psn00bdbg-mk2/monitor/stubinst.S | 185 +++++ indev/psn00bdbg-mk2/monitor/stubinst.s | 185 ----- indev/psxpad/card.S | 377 +++++++++++ indev/psxpad/card.s | 377 ----------- indev/psxpad/pad.S | 264 ++++++++ indev/psxpad/pad.s | 264 -------- libpsn00b/CMakeLists.txt | 2 +- libpsn00b/include/psxgpu.h | 2 +- libpsn00b/libc/_start.S | 18 + libpsn00b/libc/_start.s | 18 - libpsn00b/libc/clz.S | 53 ++ libpsn00b/libc/clz.s | 53 -- libpsn00b/libc/memset.S | 119 ++++ libpsn00b/libc/memset.s | 119 ---- libpsn00b/libc/setjmp.S | 50 ++ libpsn00b/libc/setjmp.s | 50 -- libpsn00b/psxapi/_syscalls.S | 161 +++++ libpsn00b/psxapi/_syscalls.s | 161 ----- libpsn00b/psxapi/drivers.S | 164 +++++ libpsn00b/psxapi/drivers.s | 164 ----- libpsn00b/psxapi/fs.S | 58 ++ libpsn00b/psxapi/fs.s | 58 -- libpsn00b/psxapi/stdio.S | 140 ++++ libpsn00b/psxapi/stdio.s | 140 ---- libpsn00b/psxapi/sys.S | 358 ++++++++++ libpsn00b/psxapi/sys.s | 358 ---------- libpsn00b/psxetc/_dl_resolve_wrapper.S | 48 ++ libpsn00b/psxetc/_dl_resolve_wrapper.s | 48 -- libpsn00b/psxgpu/common.c | 2 +- libpsn00b/psxgte/initgeom.S | 44 ++ libpsn00b/psxgte/initgeom.s | 44 -- libpsn00b/psxgte/matrix.S | 439 ++++++++++++ libpsn00b/psxgte/matrix.s | 439 ------------ libpsn00b/psxgte/squareroot.S | 118 ++++ libpsn00b/psxgte/squareroot.s | 118 ---- libpsn00b/psxgte/vector.S | 123 ++++ libpsn00b/psxgte/vector.s | 123 ---- libpsn00b/psxpress/vlc.S | 577 ++++++++++++++++ libpsn00b/psxpress/vlc.s | 577 ---------------- libpsn00b/smd/smd.S | 928 +++++++++++++++++++++++++ libpsn00b/smd/smd.s | 928 ------------------------- libpsn00b/smd/smd_cel.S | 1078 ++++++++++++++++++++++++++++++ libpsn00b/smd/smd_cel.s | 1078 ------------------------------ libpsn00b/smd/smd_flat.S | 833 +++++++++++++++++++++++ libpsn00b/smd/smd_flat.s | 833 ----------------------- libpsn00b/smd/smdparser.S | 130 ++++ libpsn00b/smd/smdparser.s | 130 ---- 56 files changed, 7189 insertions(+), 7189 deletions(-) create mode 100644 examples/demos/n00bdemo/plasmagen.S delete mode 100644 examples/demos/n00bdemo/plasmagen.s create mode 100644 examples/graphics/tilesasm/drawtiles.S delete mode 100644 examples/graphics/tilesasm/drawtiles.s create mode 100644 examples/lowlevel/cartrom/rom.S delete mode 100644 examples/lowlevel/cartrom/rom.s create mode 100644 indev/psn00bdbg-mk2/monitor/sdkinst.S delete mode 100644 indev/psn00bdbg-mk2/monitor/sdkinst.s create mode 100644 indev/psn00bdbg-mk2/monitor/stubinst.S delete mode 100644 indev/psn00bdbg-mk2/monitor/stubinst.s create mode 100644 indev/psxpad/card.S delete mode 100644 indev/psxpad/card.s create mode 100644 indev/psxpad/pad.S delete mode 100644 indev/psxpad/pad.s create mode 100644 libpsn00b/libc/_start.S delete mode 100644 libpsn00b/libc/_start.s create mode 100644 libpsn00b/libc/clz.S delete mode 100644 libpsn00b/libc/clz.s create mode 100644 libpsn00b/libc/memset.S delete mode 100644 libpsn00b/libc/memset.s create mode 100644 libpsn00b/libc/setjmp.S delete mode 100644 libpsn00b/libc/setjmp.s create mode 100644 libpsn00b/psxapi/_syscalls.S delete mode 100644 libpsn00b/psxapi/_syscalls.s create mode 100644 libpsn00b/psxapi/drivers.S delete mode 100644 libpsn00b/psxapi/drivers.s create mode 100644 libpsn00b/psxapi/fs.S delete mode 100644 libpsn00b/psxapi/fs.s create mode 100644 libpsn00b/psxapi/stdio.S delete mode 100644 libpsn00b/psxapi/stdio.s create mode 100644 libpsn00b/psxapi/sys.S delete mode 100644 libpsn00b/psxapi/sys.s create mode 100644 libpsn00b/psxetc/_dl_resolve_wrapper.S delete mode 100644 libpsn00b/psxetc/_dl_resolve_wrapper.s create mode 100644 libpsn00b/psxgte/initgeom.S delete mode 100644 libpsn00b/psxgte/initgeom.s create mode 100644 libpsn00b/psxgte/matrix.S delete mode 100644 libpsn00b/psxgte/matrix.s create mode 100644 libpsn00b/psxgte/squareroot.S delete mode 100644 libpsn00b/psxgte/squareroot.s create mode 100644 libpsn00b/psxgte/vector.S delete mode 100644 libpsn00b/psxgte/vector.s create mode 100644 libpsn00b/psxpress/vlc.S delete mode 100644 libpsn00b/psxpress/vlc.s create mode 100644 libpsn00b/smd/smd.S delete mode 100644 libpsn00b/smd/smd.s create mode 100644 libpsn00b/smd/smd_cel.S delete mode 100644 libpsn00b/smd/smd_cel.s create mode 100644 libpsn00b/smd/smd_flat.S delete mode 100644 libpsn00b/smd/smd_flat.s create mode 100644 libpsn00b/smd/smdparser.S delete mode 100644 libpsn00b/smd/smdparser.s diff --git a/examples/demos/n00bdemo/plasmagen.S b/examples/demos/n00bdemo/plasmagen.S new file mode 100644 index 0000000..07b3f48 --- /dev/null +++ b/examples/demos/n00bdemo/plasmagen.S @@ -0,0 +1,181 @@ +.set noreorder + +.set POLYG4_tag, 0 +.set POLYG4_rgb0, 4 +.set POLYG4_xy0, 8 +.set POLYG4_rgb1, 12 +.set POLYG4_xy1, 16 +.set POLYG4_rgb2, 20 +.set POLYG4_xy2, 24 +.set POLYG4_rgb3, 28 +.set POLYG4_xy3, 32 +.set POLYG4_len, 36 + +# a0 - Plasma output +# a1 - Counter +.global genPlasma +.type genPlasma, @function +genPlasma: + move $t1, $0 + +.gn_y_loop: move $t0, $0 + +.gn_x_loop: la $v0, plasma_sin1 + sll $v1, $t0, 1 + addu $v1, $v0 + lh $v0, 0($v1) + + la $a2, plasma_sin2 + sll $v1, $t1, 1 + addu $v1, $a2 + lh $v1, 0($v1) + nop + add $v0, $v1 + + add $v1, $t0, $t1 + add $v1, $a1 + divu $v1, 90 + la $a2, plasma_sin3 + mfhi $v1 + + sll $v1, 1 + addu $v1, $a2 + lh $v1, 0($v1) + nop + add $v0, $v1 + + andi $v0, 0xff + + sb $v0, 0($a0) + addu $a0, 1 + + addiu $t0, 1 + blt $t0, 41, .gn_x_loop + nop + + addiu $t1, 1 + blt $t1, 31, .gn_y_loop + nop + + jr $ra + nop + + +# a0 - OT entry +# a1 - Primitive address +# a2 - Plasma map source +.global sortPlasma +.type sortPlasma, @function +sortPlasma: + move $t1, $0 + +.y_loop: + move $t0, $0 + +.x_loop: + lbu $v0, 0($a2) + addiu $a2, 1 + la $a3, plasma_colors + sll $v0, 2 + addu $v0, $a3 + lw $v0, 0($v0) + + lui $v1, 0x3800 + or $v0, $v1 + sw $v0, POLYG4_rgb0($a1) + + + lbu $v0, 0($a2) + nop + sll $v0, 2 + addu $v0, $a3 + lw $v0, 0($v0) + nop + sw $v0, POLYG4_rgb1($a1) + + + lbu $v0, 40($a2) + nop + sll $v0, 2 + addu $v0, $a3 + lw $v0, 0($v0) + nop + sw $v0, POLYG4_rgb2($a1) + + + lbu $v0, 41($a2) + nop + sll $v0, 2 + addu $v0, $a3 + lw $v0, 0($v0) + nop + sw $v0, POLYG4_rgb3($a1) + + + sll $v0, $t0, 4 + andi $v0, 0xffff + sll $v1, $t1, 20 + or $v0, $v1 + sw $v0, POLYG4_xy0($a1) + + sll $v0, $t0, 4 + andi $v0, 0xffff + addi $v0, 16 + sll $v1, $t1, 20 + or $v0, $v1 + sw $v0, POLYG4_xy1($a1) + + sll $v0, $t0, 4 + andi $v0, 0xffff + sll $v1, $t1, 4 + addi $v1, 16 + sll $v1, 16 + or $v0, $v1 + sw $v0, POLYG4_xy2($a1) + + sll $v0, $t0, 4 + andi $v0, 0xffff + addi $v0, 16 + sll $v1, $t1, 4 + addi $v1, 16 + sll $v1, 16 + or $v0, $v1 + sw $v0, POLYG4_xy3($a1) + + .set noat + + lui $v1, 0x0800 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + addiu $a1, POLYG4_len + + + addiu $t0, 1 + blt $t0, 40, .x_loop + nop + + addiu $a2, 1 + addiu $t1, 1 + blt $t1, 30, .y_loop + nop + + + jr $ra + move $v0, $a1 \ No newline at end of file diff --git a/examples/demos/n00bdemo/plasmagen.s b/examples/demos/n00bdemo/plasmagen.s deleted file mode 100644 index 07b3f48..0000000 --- a/examples/demos/n00bdemo/plasmagen.s +++ /dev/null @@ -1,181 +0,0 @@ -.set noreorder - -.set POLYG4_tag, 0 -.set POLYG4_rgb0, 4 -.set POLYG4_xy0, 8 -.set POLYG4_rgb1, 12 -.set POLYG4_xy1, 16 -.set POLYG4_rgb2, 20 -.set POLYG4_xy2, 24 -.set POLYG4_rgb3, 28 -.set POLYG4_xy3, 32 -.set POLYG4_len, 36 - -# a0 - Plasma output -# a1 - Counter -.global genPlasma -.type genPlasma, @function -genPlasma: - move $t1, $0 - -.gn_y_loop: move $t0, $0 - -.gn_x_loop: la $v0, plasma_sin1 - sll $v1, $t0, 1 - addu $v1, $v0 - lh $v0, 0($v1) - - la $a2, plasma_sin2 - sll $v1, $t1, 1 - addu $v1, $a2 - lh $v1, 0($v1) - nop - add $v0, $v1 - - add $v1, $t0, $t1 - add $v1, $a1 - divu $v1, 90 - la $a2, plasma_sin3 - mfhi $v1 - - sll $v1, 1 - addu $v1, $a2 - lh $v1, 0($v1) - nop - add $v0, $v1 - - andi $v0, 0xff - - sb $v0, 0($a0) - addu $a0, 1 - - addiu $t0, 1 - blt $t0, 41, .gn_x_loop - nop - - addiu $t1, 1 - blt $t1, 31, .gn_y_loop - nop - - jr $ra - nop - - -# a0 - OT entry -# a1 - Primitive address -# a2 - Plasma map source -.global sortPlasma -.type sortPlasma, @function -sortPlasma: - move $t1, $0 - -.y_loop: - move $t0, $0 - -.x_loop: - lbu $v0, 0($a2) - addiu $a2, 1 - la $a3, plasma_colors - sll $v0, 2 - addu $v0, $a3 - lw $v0, 0($v0) - - lui $v1, 0x3800 - or $v0, $v1 - sw $v0, POLYG4_rgb0($a1) - - - lbu $v0, 0($a2) - nop - sll $v0, 2 - addu $v0, $a3 - lw $v0, 0($v0) - nop - sw $v0, POLYG4_rgb1($a1) - - - lbu $v0, 40($a2) - nop - sll $v0, 2 - addu $v0, $a3 - lw $v0, 0($v0) - nop - sw $v0, POLYG4_rgb2($a1) - - - lbu $v0, 41($a2) - nop - sll $v0, 2 - addu $v0, $a3 - lw $v0, 0($v0) - nop - sw $v0, POLYG4_rgb3($a1) - - - sll $v0, $t0, 4 - andi $v0, 0xffff - sll $v1, $t1, 20 - or $v0, $v1 - sw $v0, POLYG4_xy0($a1) - - sll $v0, $t0, 4 - andi $v0, 0xffff - addi $v0, 16 - sll $v1, $t1, 20 - or $v0, $v1 - sw $v0, POLYG4_xy1($a1) - - sll $v0, $t0, 4 - andi $v0, 0xffff - sll $v1, $t1, 4 - addi $v1, 16 - sll $v1, 16 - or $v0, $v1 - sw $v0, POLYG4_xy2($a1) - - sll $v0, $t0, 4 - andi $v0, 0xffff - addi $v0, 16 - sll $v1, $t1, 4 - addi $v1, 16 - sll $v1, 16 - or $v0, $v1 - sw $v0, POLYG4_xy3($a1) - - .set noat - - lui $v1, 0x0800 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - addiu $a1, POLYG4_len - - - addiu $t0, 1 - blt $t0, 40, .x_loop - nop - - addiu $a2, 1 - addiu $t1, 1 - blt $t1, 30, .y_loop - nop - - - jr $ra - move $v0, $a1 \ No newline at end of file diff --git a/examples/graphics/tilesasm/drawtiles.S b/examples/graphics/tilesasm/drawtiles.S new file mode 100644 index 0000000..15a0707 --- /dev/null +++ b/examples/graphics/tilesasm/drawtiles.S @@ -0,0 +1,272 @@ +# +# LibPSn00b Example Programs +# +# Drawing Tile-maps with Assembler Routines +# 2022 Meido-Tek Productions / PSn00bSDK Project +# +# Example by John "Lameguy" Wilbert Villamor (Lameguy64) +# +# This file contains the assembler routine DrawTiles which can be called from +# a C or C++ compiled module. The routine makes use of constants and assembler +# macros written in the GNU GAS syntax. +# +# Assembler routines called from C-language modules can freely use registers +# $v0-$v1, $at, $a0-$a3, $t0-$t9 without preserving through stack. Registers +# $s0-$s9, $gp and $fp must be preserved through stack before returning and +# registers $k0-$k1 should not be used for obvious reasons (kernel registers). +# $sp and $ra is used as stack pointer and return address respectively. +# $0 or $zero is constantly zero. +# +# A C caller always passes arguments as 32-bit values on registers $a0 to $a3 +# regardless of the data type specified in the function's C declaration. +# Additional arguments are stored in the stack 16 bytes relative to the stack +# pointer ($sp). To get around the stack being modified in different parts of +# a larger routine the stack pointer would be copied to the frame pointer ($fp) +# and the previous value of the frame pointer would be pushed into stack. This +# way additional arguments may be read anywhere starting from $fp+20 instead. +# +.set noreorder # Disable GAS' annoying nop insertion + +.equ db, 1 # Constants for "emulating" SNASM style structs +.equ dh, 2 +.equ dw, 4 + +# +# TILEDEF struct +# + rs=0 # rs is used to emulate SNASM style structs +.equ TILEDEF_uv , rs # Tile texture coordinate + rs=rs+dh +.equ TILEDEF_clut , rs # Tile CLUT + rs=rs+dh +.equ TILEDEF_pad , rs # Padding + rs=rs+dh +.equ TILEDEF_tpage , rs # Tile tpage + rs=rs+dh +.equ TILEDEF_len , rs # Entry length + +# +# TILEINFO struct (to use as offsets) +# + rs=0 +.equ TILEINFO_window_x , rs # \ + rs=rs+dh # - Window coordinates +.equ TILEINFO_window_y , rs # / + rs=rs+dh +.equ TILEINFO_window_w , rs # \ + rs=rs+dh # - Window size +.equ TILEINFO_window_h , rs # / + rs=rs+dh +.equ TILEINFO_tiles , rs # Pointer to TILEDEF entries + rs=rs+dw +.equ TILEINFO_mapdata , rs # Pointer to map data + rs=rs+dw +.equ TILEINFO_map_w , rs # Map width in tile units + rs=rs+dh +.equ TILEINFO_map_h , rs # Map height in tile units + rs=rs+dh + +# +# TILEPKT struct +# + rs=0 +.equ TILEPKT_tag , rs # Primitive tag + rs=rs+dw +.equ TILEPKT_tpage , rs # tpage packet + rs=rs+dw +.equ TILEPKT_rgbc , rs # Tile color + rs=rs+dw +.equ TILEPKT_x , rs # Tile screen coordinates + rs=rs+dh +.equ TILEPKT_y , rs + rs=rs+dh +.equ TILEPKT_uv , rs # Tile texture coordinates + rs=rs+dh +.equ TILEPKT_clut , rs # Tile CLUT + rs=rs+dh +.equ TILEPKT_len , rs # Packet length + +# addprim Macro +# +# Registers a primitive to a ordering table entry. +# +# Arguments: +# ot - Register name of pointer to ordering table entry +# pri - Pointer to a primitive packet +# len - Size of packet in long words (specify as 0xnn00, ie. 0x2000) +# +# Destroys: +# at, v0, v1 +# +.macro addprim ot,pri,len + .set noat + lw $v0, 0(\ot) # Get OT entry + lui $at, 0x00ff # Mask out the packet length field + or $at, 0xffff + and $v0, $at + lui $v1, \len # Merge packet length + or $v1, $v0 + sw $v1, 0(\pri) # Store updated OT entry to packet + lw $v0, 0(\ot) # Get OT entry + and \pri, $at # Mask out last 8-bits of packet address + lui $at, 0xff00 # Mask out OT entry's address + and $v0, $at + or $v0, \pri # Merge packet address to OT entry + sw $v0, 0(\ot) # Store updated OT entry + .set at +.endm + +# +# Start of text section +# +.section .text + +# DrawTiles Function +# +# Renders a tilemap by generating TILEPKT primitives (combined SPRT_16 and +# DR_TPAGE primitives) and registering it to the specified ordering table. +# The drawing region, tile definitions and the tilemap are specified through +# a TILEINFO struct. +# +# C Declaration: +# extern u_char *DrawTiles(int scroll_x, int scroll_y, +# TILEINFO *info, long *ot, u_char *pri); +# +# Arguments: +# scroll_x - X scrolling offset of tile-map +# scroll_y - Y scrolling offset of tile-map +# info - Pointer to a TILEINFO struct +# ot - Pointer to a ordering table entry +# pri - Pointer to next primitive (placed in stack) +# +# Returns: +# New next primitive pointer value. +# +.global DrawTiles # Declare symbol as global +.type DrawTiles, @function # Declare it as a function +DrawTiles: # Symbol label of function + + addiu $sp, -4 # Push frame pointer (fp) to stack + sw $fp, 0($sp) + move $fp, $sp # Copy stack pointer (sp) to fp + + # Register reference: + # + # t0 - Packet address + # t1 - Map data address + # t2 - Tile X offset + # t3 - Tile Y offset + # t4 - Tile X coordinate backup + # t5 - Tile X loop counter + # t6 - Number of tiles to sort per row + # t7 - Number of tile rows to sort + + lhu $t6, TILEINFO_window_w($a2) # Calculate size of window in tiles + lhu $t7, TILEINFO_window_h($a2) + addi $t6, 15 # So the result will be rounded-up + addi $t7, 15 + srl $t6, 4 # Effectively divide by 16 + srl $t7, 4 + + lw $t0, 20($sp) # Obtain next primitive pointer + + srl $t2, $a0, 4 # Compute map offset in tile units + srl $t3, $a1, 4 + + bgez $a0, .Lno_neg_clip_X # Negative X clip test + sub $v0, $0 , $a0 + move $t2, $0 # Force tile offset to zero + add $v0, 15 # Reduce number of tile columns + srl $v0, 4 + sub $t6, $v0 +.Lno_neg_clip_X: + lhu $v1, TILEINFO_map_w($a2) # Positive X clip test + add $v0, $t2, $t6 + addi $v0, 1 + blt $v0, $v1, .Lno_pos_clip_X + nop + sub $v0, $v1 # Compute how many tiles to clip + sub $t6, $v0 # Reduce number of tile columns +.Lno_pos_clip_X: + bgez $a1, .Lno_neg_clip_Y # Negative Y clip test + sub $v0, $0 , $a1 + move $t3, $0 + add $v0, 15 + srl $v0, 4 + sub $t7, $v0 +.Lno_neg_clip_Y: + lhu $v1, TILEINFO_map_h($a2) # Positive Y clip test + add $v0, $t3, $t7 + addi $v0, 1 + blt $v0, $v1, .Lno_pos_clip_Y + nop + sub $v0, $v1 + sub $t7, $v0 +.Lno_pos_clip_Y: + bltz $t6, .Lno_draw # Exit when no tiles to draw + nop + bltz $t7, .Lno_draw + nop + + lh $v0, TILEINFO_window_x($a2) # Compute pixel coordinates for + sub $a0, $v0, $a0 # tiles based on the scroll offset + sll $v0, $t2, 4 + add $a0, $v0 + lh $v0, TILEINFO_window_y($a2) + sub $a1, $v0, $a1 + sll $v0, $t3, 4 + add $a1, $v0 + move $t4, $a0 + sll $t2, 1 + +.Lloop_y: # Begin of tile row loop + lhu $v0, TILEINFO_map_w($a2) # Get width of tilemap + move $t5, $t6 # n tiles to draw row + mult $t3, $v0 # Multiply Y offset by map width + lw $t1, TILEINFO_mapdata($a2) # Get tilemap address + nop + addu $t1, $t2 # Add X offset to address + mflo $v0 # Get Y offset result + sll $v0, 1 # Multiply by two + addu $t1, $v0 # Add to tile address + +.Lloop_x: # Begin of tile column loop + lhu $v1, 0($t1) # Load tile index + addiu $t1, 2 # Advance to next tile + beq $v1, 0xFFFF, .Lskip_tile # Skip tile if index is 0xFFFF + nop + lw $v0, TILEINFO_tiles($a2) # Get pointer to TILEDEF entries + sll $v1, 3 # Multiply by 8 (size of TILEDEFs) + addu $v1, $v0 # Adjust to tiledefs pointer + lw $v0, TILEDEF_uv($v1) # Obtain UV+CLUT + lhu $v1, TILEDEF_tpage($v1) # Obtain tpage + sw $v0, TILEPKT_uv($t0) # Start constructing packet + lui $v0, 0xE100 # tpage packet code + or $v0, $v1 # Merge tpage bits + sw $v0, TILEPKT_tpage($t0) + sh $a0, TILEPKT_x($t0) # Set tile screen coords + sh $a1, TILEPKT_y($t0) + li $v0, 0x7C7F7F7F # Packet code and color + sw $v0, TILEPKT_rgbc($t0) + addprim $a3, $t0, 0x0400 # Register to OT + addiu $t0, TILEPKT_len # Advance packet pointer +.Lskip_tile: + addi $t5, -1 # Decrement and continue iterating + bgez $t5, .Lloop_x # if non-zero + addiu $a0, 16 # Advance X tile coordinate + + move $a0, $t4 # Restore tile X coordinate + addi $t3, 1 # Increment Y offset + addi $t7, -1 # Decrement and continue iterating + bgez $t7, .Lloop_y # if non-zero + addiu $a1, 16 # Advance Y tile coordinate + +.Lno_draw: + + move $v0, $t0 # Set packet pointer as return value + + lw $fp, 0($sp) # Restore frame pointer and return + jr $ra + addiu $sp, 4 + # DrawTiles + \ No newline at end of file diff --git a/examples/graphics/tilesasm/drawtiles.s b/examples/graphics/tilesasm/drawtiles.s deleted file mode 100644 index 15a0707..0000000 --- a/examples/graphics/tilesasm/drawtiles.s +++ /dev/null @@ -1,272 +0,0 @@ -# -# LibPSn00b Example Programs -# -# Drawing Tile-maps with Assembler Routines -# 2022 Meido-Tek Productions / PSn00bSDK Project -# -# Example by John "Lameguy" Wilbert Villamor (Lameguy64) -# -# This file contains the assembler routine DrawTiles which can be called from -# a C or C++ compiled module. The routine makes use of constants and assembler -# macros written in the GNU GAS syntax. -# -# Assembler routines called from C-language modules can freely use registers -# $v0-$v1, $at, $a0-$a3, $t0-$t9 without preserving through stack. Registers -# $s0-$s9, $gp and $fp must be preserved through stack before returning and -# registers $k0-$k1 should not be used for obvious reasons (kernel registers). -# $sp and $ra is used as stack pointer and return address respectively. -# $0 or $zero is constantly zero. -# -# A C caller always passes arguments as 32-bit values on registers $a0 to $a3 -# regardless of the data type specified in the function's C declaration. -# Additional arguments are stored in the stack 16 bytes relative to the stack -# pointer ($sp). To get around the stack being modified in different parts of -# a larger routine the stack pointer would be copied to the frame pointer ($fp) -# and the previous value of the frame pointer would be pushed into stack. This -# way additional arguments may be read anywhere starting from $fp+20 instead. -# -.set noreorder # Disable GAS' annoying nop insertion - -.equ db, 1 # Constants for "emulating" SNASM style structs -.equ dh, 2 -.equ dw, 4 - -# -# TILEDEF struct -# - rs=0 # rs is used to emulate SNASM style structs -.equ TILEDEF_uv , rs # Tile texture coordinate - rs=rs+dh -.equ TILEDEF_clut , rs # Tile CLUT - rs=rs+dh -.equ TILEDEF_pad , rs # Padding - rs=rs+dh -.equ TILEDEF_tpage , rs # Tile tpage - rs=rs+dh -.equ TILEDEF_len , rs # Entry length - -# -# TILEINFO struct (to use as offsets) -# - rs=0 -.equ TILEINFO_window_x , rs # \ - rs=rs+dh # - Window coordinates -.equ TILEINFO_window_y , rs # / - rs=rs+dh -.equ TILEINFO_window_w , rs # \ - rs=rs+dh # - Window size -.equ TILEINFO_window_h , rs # / - rs=rs+dh -.equ TILEINFO_tiles , rs # Pointer to TILEDEF entries - rs=rs+dw -.equ TILEINFO_mapdata , rs # Pointer to map data - rs=rs+dw -.equ TILEINFO_map_w , rs # Map width in tile units - rs=rs+dh -.equ TILEINFO_map_h , rs # Map height in tile units - rs=rs+dh - -# -# TILEPKT struct -# - rs=0 -.equ TILEPKT_tag , rs # Primitive tag - rs=rs+dw -.equ TILEPKT_tpage , rs # tpage packet - rs=rs+dw -.equ TILEPKT_rgbc , rs # Tile color - rs=rs+dw -.equ TILEPKT_x , rs # Tile screen coordinates - rs=rs+dh -.equ TILEPKT_y , rs - rs=rs+dh -.equ TILEPKT_uv , rs # Tile texture coordinates - rs=rs+dh -.equ TILEPKT_clut , rs # Tile CLUT - rs=rs+dh -.equ TILEPKT_len , rs # Packet length - -# addprim Macro -# -# Registers a primitive to a ordering table entry. -# -# Arguments: -# ot - Register name of pointer to ordering table entry -# pri - Pointer to a primitive packet -# len - Size of packet in long words (specify as 0xnn00, ie. 0x2000) -# -# Destroys: -# at, v0, v1 -# -.macro addprim ot,pri,len - .set noat - lw $v0, 0(\ot) # Get OT entry - lui $at, 0x00ff # Mask out the packet length field - or $at, 0xffff - and $v0, $at - lui $v1, \len # Merge packet length - or $v1, $v0 - sw $v1, 0(\pri) # Store updated OT entry to packet - lw $v0, 0(\ot) # Get OT entry - and \pri, $at # Mask out last 8-bits of packet address - lui $at, 0xff00 # Mask out OT entry's address - and $v0, $at - or $v0, \pri # Merge packet address to OT entry - sw $v0, 0(\ot) # Store updated OT entry - .set at -.endm - -# -# Start of text section -# -.section .text - -# DrawTiles Function -# -# Renders a tilemap by generating TILEPKT primitives (combined SPRT_16 and -# DR_TPAGE primitives) and registering it to the specified ordering table. -# The drawing region, tile definitions and the tilemap are specified through -# a TILEINFO struct. -# -# C Declaration: -# extern u_char *DrawTiles(int scroll_x, int scroll_y, -# TILEINFO *info, long *ot, u_char *pri); -# -# Arguments: -# scroll_x - X scrolling offset of tile-map -# scroll_y - Y scrolling offset of tile-map -# info - Pointer to a TILEINFO struct -# ot - Pointer to a ordering table entry -# pri - Pointer to next primitive (placed in stack) -# -# Returns: -# New next primitive pointer value. -# -.global DrawTiles # Declare symbol as global -.type DrawTiles, @function # Declare it as a function -DrawTiles: # Symbol label of function - - addiu $sp, -4 # Push frame pointer (fp) to stack - sw $fp, 0($sp) - move $fp, $sp # Copy stack pointer (sp) to fp - - # Register reference: - # - # t0 - Packet address - # t1 - Map data address - # t2 - Tile X offset - # t3 - Tile Y offset - # t4 - Tile X coordinate backup - # t5 - Tile X loop counter - # t6 - Number of tiles to sort per row - # t7 - Number of tile rows to sort - - lhu $t6, TILEINFO_window_w($a2) # Calculate size of window in tiles - lhu $t7, TILEINFO_window_h($a2) - addi $t6, 15 # So the result will be rounded-up - addi $t7, 15 - srl $t6, 4 # Effectively divide by 16 - srl $t7, 4 - - lw $t0, 20($sp) # Obtain next primitive pointer - - srl $t2, $a0, 4 # Compute map offset in tile units - srl $t3, $a1, 4 - - bgez $a0, .Lno_neg_clip_X # Negative X clip test - sub $v0, $0 , $a0 - move $t2, $0 # Force tile offset to zero - add $v0, 15 # Reduce number of tile columns - srl $v0, 4 - sub $t6, $v0 -.Lno_neg_clip_X: - lhu $v1, TILEINFO_map_w($a2) # Positive X clip test - add $v0, $t2, $t6 - addi $v0, 1 - blt $v0, $v1, .Lno_pos_clip_X - nop - sub $v0, $v1 # Compute how many tiles to clip - sub $t6, $v0 # Reduce number of tile columns -.Lno_pos_clip_X: - bgez $a1, .Lno_neg_clip_Y # Negative Y clip test - sub $v0, $0 , $a1 - move $t3, $0 - add $v0, 15 - srl $v0, 4 - sub $t7, $v0 -.Lno_neg_clip_Y: - lhu $v1, TILEINFO_map_h($a2) # Positive Y clip test - add $v0, $t3, $t7 - addi $v0, 1 - blt $v0, $v1, .Lno_pos_clip_Y - nop - sub $v0, $v1 - sub $t7, $v0 -.Lno_pos_clip_Y: - bltz $t6, .Lno_draw # Exit when no tiles to draw - nop - bltz $t7, .Lno_draw - nop - - lh $v0, TILEINFO_window_x($a2) # Compute pixel coordinates for - sub $a0, $v0, $a0 # tiles based on the scroll offset - sll $v0, $t2, 4 - add $a0, $v0 - lh $v0, TILEINFO_window_y($a2) - sub $a1, $v0, $a1 - sll $v0, $t3, 4 - add $a1, $v0 - move $t4, $a0 - sll $t2, 1 - -.Lloop_y: # Begin of tile row loop - lhu $v0, TILEINFO_map_w($a2) # Get width of tilemap - move $t5, $t6 # n tiles to draw row - mult $t3, $v0 # Multiply Y offset by map width - lw $t1, TILEINFO_mapdata($a2) # Get tilemap address - nop - addu $t1, $t2 # Add X offset to address - mflo $v0 # Get Y offset result - sll $v0, 1 # Multiply by two - addu $t1, $v0 # Add to tile address - -.Lloop_x: # Begin of tile column loop - lhu $v1, 0($t1) # Load tile index - addiu $t1, 2 # Advance to next tile - beq $v1, 0xFFFF, .Lskip_tile # Skip tile if index is 0xFFFF - nop - lw $v0, TILEINFO_tiles($a2) # Get pointer to TILEDEF entries - sll $v1, 3 # Multiply by 8 (size of TILEDEFs) - addu $v1, $v0 # Adjust to tiledefs pointer - lw $v0, TILEDEF_uv($v1) # Obtain UV+CLUT - lhu $v1, TILEDEF_tpage($v1) # Obtain tpage - sw $v0, TILEPKT_uv($t0) # Start constructing packet - lui $v0, 0xE100 # tpage packet code - or $v0, $v1 # Merge tpage bits - sw $v0, TILEPKT_tpage($t0) - sh $a0, TILEPKT_x($t0) # Set tile screen coords - sh $a1, TILEPKT_y($t0) - li $v0, 0x7C7F7F7F # Packet code and color - sw $v0, TILEPKT_rgbc($t0) - addprim $a3, $t0, 0x0400 # Register to OT - addiu $t0, TILEPKT_len # Advance packet pointer -.Lskip_tile: - addi $t5, -1 # Decrement and continue iterating - bgez $t5, .Lloop_x # if non-zero - addiu $a0, 16 # Advance X tile coordinate - - move $a0, $t4 # Restore tile X coordinate - addi $t3, 1 # Increment Y offset - addi $t7, -1 # Decrement and continue iterating - bgez $t7, .Lloop_y # if non-zero - addiu $a1, 16 # Advance Y tile coordinate - -.Lno_draw: - - move $v0, $t0 # Set packet pointer as return value - - lw $fp, 0($sp) # Restore frame pointer and return - jr $ra - addiu $sp, 4 - # DrawTiles - \ No newline at end of file diff --git a/examples/lowlevel/cartrom/CMakeLists.txt b/examples/lowlevel/cartrom/CMakeLists.txt index 6a94d9f..b674002 100644 --- a/examples/lowlevel/cartrom/CMakeLists.txt +++ b/examples/lowlevel/cartrom/CMakeLists.txt @@ -11,7 +11,7 @@ project( HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk" ) -file(GLOB _sources *.c *.s) +file(GLOB _sources *.c *.S) # This example only uses the toolchain (without the rest of the SDK), so the # executable has to be created manually and converted into raw binary format diff --git a/examples/lowlevel/cartrom/rom.S b/examples/lowlevel/cartrom/rom.S new file mode 100644 index 0000000..587ba6f --- /dev/null +++ b/examples/lowlevel/cartrom/rom.S @@ -0,0 +1,402 @@ +# LibPSn00b Example Programs +# Part of the PSn00bSDk project +# +# TurboBoot Example by Lameguy64 +# +# Note: This example is being obsoleted as GAS is not ideal for making +# ROM firmwares. Use ARMIPS instead, but it cannot build this example +# as it is not GAS syntax compatible. + + +# Uncomment either PAR or XPLORER depending on the cartridge +# you're going to use (makes disabling turbo boot via switch to work) + +#.set PAR, 0 +#.set XPLORER, 1 + + +.set noreorder + +.include "cop0.inc" # Contains definitions for cop0 registers + +.set SP_base, 0x801ffff0 +.set BREAK_ADDR, 0xa0000040 # cop0 breakpoint vector address + + +.set RAM_buff, 2048 +.set RAM_handle, 2052 +.set RAM_tcb, 2056 +.set RAM_evcb, 2060 +.set RAM_stack, 2064 +.set RAM_psexe, 2068 + + +.set EXE_pc0, 0 # PS-EXE header offsets +.set EXE_gp0, 4 +.set EXE_taddr, 8 +.set EXE_tsize, 12 +.set EXE_daddr, 16 +.set EXE_dsize, 20 +.set EXE_baddr, 24 +.set EXE_bsize, 28 +.set EXE_spaddr, 32 +.set EXE_sp_size, 36 +.set EXE_sp, 40 +.set EXE_fp, 44 +.set EXE_gp, 48 +.set EXE_ret, 52 +.set EXE_base, 56 +.set EXE_datapos, 60 + + +.section .text + + +# ROM header +# +# The Licensed by... strings are essential otherwise the BIOS will not +# execute the boot vectors. Always make sure the tty message fields (string +# after Licensed by) must be no more than 80 bytes long and must have a null +# terminating byte. +# +# Postboot vector isn't particularly practical as its only executed in between +# the PS boot logo and the point where game execution occurs. +# +header: + # Postboot vector + .word 0 + .ascii "Licensed by Sony Computer Entertainment Inc." + .ascii "Not officially licensed or endorsed by Sony Computer Entertainment Inc." + + .balign 0x80 # This keeps things in the header aligned + + # Preboot vector + .word preboot + .ascii "Licensed by Sony Computer Entertainment Inc." + .ascii "Cart ROM example for PSn00bSDK https://github.com/lameguy64/psn00bsdk" + + .balign 0x80 # This keeps things in the header aligned + + +# Preboot vector +# +# All it does is it initializes a breakpoint vector at 0x40 +# and sets a cop0 breakpoint at 0x80030000 to perform a midboot +# exploit as preboot doesn't have the kernel area initialized. +# +preboot: + + li $v0, 1 + +.ifdef XPLORER + lui $a0, 0x1f06 # Read switch status for Xplorer + lbu $v0, 0($a0) +.endif + +.ifdef PAR + lui $a0, 0x1f02 # Read switch status for PAR/GS devices + lbu $v0, 0x18($a0) +.endif + + nop + andi $v0, 0x1 + beqz $v0, .no_rom # If switch is off don't install hook + nop # and effectively disables the cartridge + + li $v0, BREAK_ADDR # Patch a jump at cop0 breakpoint vector + + li $a0, 0x3c1a1f00 # lui $k0, $1f00 + sw $a0, 0($v0) + la $a1, entry # ori $k0, < address to code entrypoint > + andi $a1, 0xffff + lui $a0, 0x375a + or $a0, $a1 + sw $a0, 4($v0) + li $a0, 0x03400008 # jr $k0 + sw $a0, 8($v0) + sw $0 , 12($v0) # nop + + lui $v0, 0xffff # Set BPCM and BDAM masks + ori $v0, 0xffff + mtc0 $v0, BDAM + mtc0 $v0, BPCM + + + li $v0, 0x80030000 # Set break on PC and data-write address + + mtc0 $v0, BDA # BPC break is for compatibility with no$psx + mtc0 $v0, BPC # as it does not emulate break on BDA + + lui $v0, 0xeb80 # Enable break on data-write and and break + mtc0 $v0, DCIC # on PC to DCIC control register + +.no_rom: + + jr $ra # Return to BIOS + nop + + +# Actual ROM entrypoint +.global entry +entry: + + mtc0 $0 , DCIC # Clear DCIC register + + la $sp, SP_base # Set stack base + la $gp, 0x8000c000 # Set GP address as RAM base addr in this case + + jal SetDefaultExitFromException # Set default exit handler just in case + nop + jal ExitCriticalSection # Exit out of critical section (brings back interrupts) + nop + + # Beyond this point, the PS1 is in full control to the ROM + + la $a0, m_banner # Print out program banner + jal printf + addiu $sp, -4 + addiu $sp, 4 + + la $a0, m_cdinit + jal printf + addiu $sp, -4 + addiu $sp, 4 + + jal _96_init # Initialize the CD + nop + + la $a0, m_ok # Print OK message if init didn't crash + jal printf + addiu $sp, -4 + addiu $sp, 4 + + la $a0, m_readfile + la $a1, s_systemcnf + jal printf + addiu $sp, -8 + addiu $sp, 8 + + la $a0, s_systemcnf # Attempt to open the SYSTEM.CNF file on CD + li $a1, 1 + jal open + addiu $sp, -8 + addiu $sp, 8 + + bltz $v0, .no_systemcnf # Fallback to loading PSX.EXE if not found + nop + + sw $v0, RAM_handle($gp) # Save file handle + + move $a0, $v0 # Read file contents of SYSTEM.CNF + move $a1, $gp + li $a2, 0x0800 + jal read + addiu $sp, -12 + addiu $sp, 12 + + lw $a0, RAM_handle($gp) # Close file + jal close + addiu $sp, -4 + addiu $sp, 4 + + la $a0, m_ok # Output ok message + jal printf + addiu $sp, -4 + addiu $sp, 4 + + # Parse CNF file + + la $a0, m_parsecnf + jal printf + nop + + la $a1, s_tcb # Get TCB number + jal strcasestr + move $a0, $gp + jal skipspace # Skip spaces + addiu $a0, $v0, 3 + addiu $a0, $v0, -2 # Step two charactters back and inject '0x' + li $v0, '0' + sb $v0, 0($a0) + li $v0, 'x' + sb $v0, 1($a0) + jal atoi + addiu $sp, -4 + addiu $sp, 4 + move $s1, $v0 + + la $a1, s_evcb # Get EVCB number + jal strcasestr + move $a0, $gp + jal skipspace + addiu $a0, $v0, 5 + addiu $a0, $v0, -2 + li $v0, '0' + sb $v0, 0($a0) + li $v0, 'x' + sb $v0, 1($a0) + jal atoi + addiu $sp, -4 + addiu $sp, 4 + move $s0, $v0 + + la $a1, s_stack # Get STACK address + jal strcasestr + move $a0, $gp + jal skipspace + addiu $a0, $v0, 5 + addiu $a0, $v0, -2 + li $v0, '0' + sb $v0, 0($a0) + li $v0, 'x' + sb $v0, 1($a0) + jal atoi + addiu $sp, -4 + addiu $sp, 4 + move $s2, $v0 + + la $a1, s_boot # Get the PS-EXE file name + jal strcasestr + move $a0, $gp + + jal skipspace # Skip spaces + addiu $a0, $v0, 4 + + addiu $a0, $gp, RAM_psexe # Extract the line + jal getline + move $a1, $v0 + + la $a0, m_ok # Print successful parsing + jal printf + addiu $sp, -4 + addiu $sp, 4 + + la $a0, m_readfile + addiu $a1, $gp, RAM_psexe + jal printf + addiu $sp, -8 + addiu $sp, 8 + + b .do_load # Proceed loading PS-EXE + addiu $a0, $gp, RAM_psexe + +.no_systemcnf: # Load fallback + + la $a0, m_notfound + jal printf + addiu $sp, -4 + addiu $sp, 4 + + la $a0, m_fallback + jal printf + addiu $sp, -4 + addiu $sp, 4 + + li $s0, 0x10 # Default EvCBs and TCBs + li $s1, 0x04 + li $s2, SP_base # Default stack + la $a0, s_psxexe # Attempt loading PSX.EXE + +.do_load: + + jal LoadExe # Load PS-EXE + move $a1, $gp + + beqz $v0, load_fail + nop + + la $a0, m_ok + jal printf + addiu $sp, -4 + addiu $sp, 4 + + sw $s2, EXE_sp($gp) # Patch the header + sw $s2, EXE_spaddr($gp) + + la $a0, m_boot + jal printf + addiu $sp, -4 + addiu $sp, 4 + + jal EnterCriticalSection # Disable interrupt handling + nop + + move $a0, $s0 # Set configuration (EvCBs and TCBs) + move $a1, $s1 + move $a2, $s2 + jal SetConf + addiu $sp, -12 + addiu $sp, 12 + + move $a0, $gp # Transfer execution + move $a1, $0 + move $a2, $0 + jal DoExec + addiu $sp, -12 + addiu $sp, 12 + + +load_fail: # Fail state + la $a0, m_loadfail + jal printf + nop +.fail_loop: + b .fail_loop + nop + + +.include "parse.inc" +.include "bios.inc" + + +# Strings + +s_boot: + .asciz "BOOT" + .balign 4 +s_tcb: + .asciz "TCB" + .balign 4 +s_evcb: + .asciz "EVENT" + .balign 4 +s_stack: + .asciz "STACK" + .balign 4 +s_systemcnf: + .asciz "cdrom:SYSTEM.CNF;1" + .balign 4 +s_psxexe: + .asciz "cdrom:PSX.EXE;1" + .balign 4 + + +# Message strings + +m_banner: + .asciz "\nCARTROM Bootstrap Example by Lameguy64\nPart of the PSn00bSDK Project\n\n" + .balign 4 +m_cdinit: + .asciz "Initializing CD-ROM (BIOS)... " + .balign 4 +m_readfile: + .asciz "Attempting to read %s... " + .balign 4 +m_parsecnf: + .asciz "Parsing CNF file... " + .balign 4 +m_fallback: + .asciz "Falling back to loading PSX.EXE... " + .balign 4 +m_notfound: + .asciiz "Not found.\n" + .balign 4 +m_ok: + .asciz "Ok.\n" + .balign 4 +m_boot: + .asciz "Boot!\n" + .balign 4 +m_loadfail: + .asciz "Failed to load PS-EXE file.\n" + .balign 4 diff --git a/examples/lowlevel/cartrom/rom.s b/examples/lowlevel/cartrom/rom.s deleted file mode 100644 index 587ba6f..0000000 --- a/examples/lowlevel/cartrom/rom.s +++ /dev/null @@ -1,402 +0,0 @@ -# LibPSn00b Example Programs -# Part of the PSn00bSDk project -# -# TurboBoot Example by Lameguy64 -# -# Note: This example is being obsoleted as GAS is not ideal for making -# ROM firmwares. Use ARMIPS instead, but it cannot build this example -# as it is not GAS syntax compatible. - - -# Uncomment either PAR or XPLORER depending on the cartridge -# you're going to use (makes disabling turbo boot via switch to work) - -#.set PAR, 0 -#.set XPLORER, 1 - - -.set noreorder - -.include "cop0.inc" # Contains definitions for cop0 registers - -.set SP_base, 0x801ffff0 -.set BREAK_ADDR, 0xa0000040 # cop0 breakpoint vector address - - -.set RAM_buff, 2048 -.set RAM_handle, 2052 -.set RAM_tcb, 2056 -.set RAM_evcb, 2060 -.set RAM_stack, 2064 -.set RAM_psexe, 2068 - - -.set EXE_pc0, 0 # PS-EXE header offsets -.set EXE_gp0, 4 -.set EXE_taddr, 8 -.set EXE_tsize, 12 -.set EXE_daddr, 16 -.set EXE_dsize, 20 -.set EXE_baddr, 24 -.set EXE_bsize, 28 -.set EXE_spaddr, 32 -.set EXE_sp_size, 36 -.set EXE_sp, 40 -.set EXE_fp, 44 -.set EXE_gp, 48 -.set EXE_ret, 52 -.set EXE_base, 56 -.set EXE_datapos, 60 - - -.section .text - - -# ROM header -# -# The Licensed by... strings are essential otherwise the BIOS will not -# execute the boot vectors. Always make sure the tty message fields (string -# after Licensed by) must be no more than 80 bytes long and must have a null -# terminating byte. -# -# Postboot vector isn't particularly practical as its only executed in between -# the PS boot logo and the point where game execution occurs. -# -header: - # Postboot vector - .word 0 - .ascii "Licensed by Sony Computer Entertainment Inc." - .ascii "Not officially licensed or endorsed by Sony Computer Entertainment Inc." - - .balign 0x80 # This keeps things in the header aligned - - # Preboot vector - .word preboot - .ascii "Licensed by Sony Computer Entertainment Inc." - .ascii "Cart ROM example for PSn00bSDK https://github.com/lameguy64/psn00bsdk" - - .balign 0x80 # This keeps things in the header aligned - - -# Preboot vector -# -# All it does is it initializes a breakpoint vector at 0x40 -# and sets a cop0 breakpoint at 0x80030000 to perform a midboot -# exploit as preboot doesn't have the kernel area initialized. -# -preboot: - - li $v0, 1 - -.ifdef XPLORER - lui $a0, 0x1f06 # Read switch status for Xplorer - lbu $v0, 0($a0) -.endif - -.ifdef PAR - lui $a0, 0x1f02 # Read switch status for PAR/GS devices - lbu $v0, 0x18($a0) -.endif - - nop - andi $v0, 0x1 - beqz $v0, .no_rom # If switch is off don't install hook - nop # and effectively disables the cartridge - - li $v0, BREAK_ADDR # Patch a jump at cop0 breakpoint vector - - li $a0, 0x3c1a1f00 # lui $k0, $1f00 - sw $a0, 0($v0) - la $a1, entry # ori $k0, < address to code entrypoint > - andi $a1, 0xffff - lui $a0, 0x375a - or $a0, $a1 - sw $a0, 4($v0) - li $a0, 0x03400008 # jr $k0 - sw $a0, 8($v0) - sw $0 , 12($v0) # nop - - lui $v0, 0xffff # Set BPCM and BDAM masks - ori $v0, 0xffff - mtc0 $v0, BDAM - mtc0 $v0, BPCM - - - li $v0, 0x80030000 # Set break on PC and data-write address - - mtc0 $v0, BDA # BPC break is for compatibility with no$psx - mtc0 $v0, BPC # as it does not emulate break on BDA - - lui $v0, 0xeb80 # Enable break on data-write and and break - mtc0 $v0, DCIC # on PC to DCIC control register - -.no_rom: - - jr $ra # Return to BIOS - nop - - -# Actual ROM entrypoint -.global entry -entry: - - mtc0 $0 , DCIC # Clear DCIC register - - la $sp, SP_base # Set stack base - la $gp, 0x8000c000 # Set GP address as RAM base addr in this case - - jal SetDefaultExitFromException # Set default exit handler just in case - nop - jal ExitCriticalSection # Exit out of critical section (brings back interrupts) - nop - - # Beyond this point, the PS1 is in full control to the ROM - - la $a0, m_banner # Print out program banner - jal printf - addiu $sp, -4 - addiu $sp, 4 - - la $a0, m_cdinit - jal printf - addiu $sp, -4 - addiu $sp, 4 - - jal _96_init # Initialize the CD - nop - - la $a0, m_ok # Print OK message if init didn't crash - jal printf - addiu $sp, -4 - addiu $sp, 4 - - la $a0, m_readfile - la $a1, s_systemcnf - jal printf - addiu $sp, -8 - addiu $sp, 8 - - la $a0, s_systemcnf # Attempt to open the SYSTEM.CNF file on CD - li $a1, 1 - jal open - addiu $sp, -8 - addiu $sp, 8 - - bltz $v0, .no_systemcnf # Fallback to loading PSX.EXE if not found - nop - - sw $v0, RAM_handle($gp) # Save file handle - - move $a0, $v0 # Read file contents of SYSTEM.CNF - move $a1, $gp - li $a2, 0x0800 - jal read - addiu $sp, -12 - addiu $sp, 12 - - lw $a0, RAM_handle($gp) # Close file - jal close - addiu $sp, -4 - addiu $sp, 4 - - la $a0, m_ok # Output ok message - jal printf - addiu $sp, -4 - addiu $sp, 4 - - # Parse CNF file - - la $a0, m_parsecnf - jal printf - nop - - la $a1, s_tcb # Get TCB number - jal strcasestr - move $a0, $gp - jal skipspace # Skip spaces - addiu $a0, $v0, 3 - addiu $a0, $v0, -2 # Step two charactters back and inject '0x' - li $v0, '0' - sb $v0, 0($a0) - li $v0, 'x' - sb $v0, 1($a0) - jal atoi - addiu $sp, -4 - addiu $sp, 4 - move $s1, $v0 - - la $a1, s_evcb # Get EVCB number - jal strcasestr - move $a0, $gp - jal skipspace - addiu $a0, $v0, 5 - addiu $a0, $v0, -2 - li $v0, '0' - sb $v0, 0($a0) - li $v0, 'x' - sb $v0, 1($a0) - jal atoi - addiu $sp, -4 - addiu $sp, 4 - move $s0, $v0 - - la $a1, s_stack # Get STACK address - jal strcasestr - move $a0, $gp - jal skipspace - addiu $a0, $v0, 5 - addiu $a0, $v0, -2 - li $v0, '0' - sb $v0, 0($a0) - li $v0, 'x' - sb $v0, 1($a0) - jal atoi - addiu $sp, -4 - addiu $sp, 4 - move $s2, $v0 - - la $a1, s_boot # Get the PS-EXE file name - jal strcasestr - move $a0, $gp - - jal skipspace # Skip spaces - addiu $a0, $v0, 4 - - addiu $a0, $gp, RAM_psexe # Extract the line - jal getline - move $a1, $v0 - - la $a0, m_ok # Print successful parsing - jal printf - addiu $sp, -4 - addiu $sp, 4 - - la $a0, m_readfile - addiu $a1, $gp, RAM_psexe - jal printf - addiu $sp, -8 - addiu $sp, 8 - - b .do_load # Proceed loading PS-EXE - addiu $a0, $gp, RAM_psexe - -.no_systemcnf: # Load fallback - - la $a0, m_notfound - jal printf - addiu $sp, -4 - addiu $sp, 4 - - la $a0, m_fallback - jal printf - addiu $sp, -4 - addiu $sp, 4 - - li $s0, 0x10 # Default EvCBs and TCBs - li $s1, 0x04 - li $s2, SP_base # Default stack - la $a0, s_psxexe # Attempt loading PSX.EXE - -.do_load: - - jal LoadExe # Load PS-EXE - move $a1, $gp - - beqz $v0, load_fail - nop - - la $a0, m_ok - jal printf - addiu $sp, -4 - addiu $sp, 4 - - sw $s2, EXE_sp($gp) # Patch the header - sw $s2, EXE_spaddr($gp) - - la $a0, m_boot - jal printf - addiu $sp, -4 - addiu $sp, 4 - - jal EnterCriticalSection # Disable interrupt handling - nop - - move $a0, $s0 # Set configuration (EvCBs and TCBs) - move $a1, $s1 - move $a2, $s2 - jal SetConf - addiu $sp, -12 - addiu $sp, 12 - - move $a0, $gp # Transfer execution - move $a1, $0 - move $a2, $0 - jal DoExec - addiu $sp, -12 - addiu $sp, 12 - - -load_fail: # Fail state - la $a0, m_loadfail - jal printf - nop -.fail_loop: - b .fail_loop - nop - - -.include "parse.inc" -.include "bios.inc" - - -# Strings - -s_boot: - .asciz "BOOT" - .balign 4 -s_tcb: - .asciz "TCB" - .balign 4 -s_evcb: - .asciz "EVENT" - .balign 4 -s_stack: - .asciz "STACK" - .balign 4 -s_systemcnf: - .asciz "cdrom:SYSTEM.CNF;1" - .balign 4 -s_psxexe: - .asciz "cdrom:PSX.EXE;1" - .balign 4 - - -# Message strings - -m_banner: - .asciz "\nCARTROM Bootstrap Example by Lameguy64\nPart of the PSn00bSDK Project\n\n" - .balign 4 -m_cdinit: - .asciz "Initializing CD-ROM (BIOS)... " - .balign 4 -m_readfile: - .asciz "Attempting to read %s... " - .balign 4 -m_parsecnf: - .asciz "Parsing CNF file... " - .balign 4 -m_fallback: - .asciz "Falling back to loading PSX.EXE... " - .balign 4 -m_notfound: - .asciiz "Not found.\n" - .balign 4 -m_ok: - .asciz "Ok.\n" - .balign 4 -m_boot: - .asciz "Boot!\n" - .balign 4 -m_loadfail: - .asciz "Failed to load PS-EXE file.\n" - .balign 4 diff --git a/indev/psn00bdbg-mk2/monitor/sdkinst.S b/indev/psn00bdbg-mk2/monitor/sdkinst.S new file mode 100644 index 0000000..6d20624 --- /dev/null +++ b/indev/psn00bdbg-mk2/monitor/sdkinst.S @@ -0,0 +1,65 @@ +# +# To use this installer properly, the program text must be compiled to +# load at a higher address such as 0x80012000. +# +# Then call the function early in your program: +# +# void install_monitor(void); +# +# ... +# +# int main(int argc, const char *argv[]) +# { +# ResetGraph(0); +# +# EnterCriticalSection(); +# install_monitor(); +# ExitCriticalSection(); +# ... +# +.set noreorder + +.section .text + +.global _install_monitor +.type _install_monitor, @function +_install_monitor: # = Patch installer routine + + addiu $sp, -4 + sw $ra, 0($sp) + + la $a0, debug_payload + la $a1, debug_payload_end + la $a2, debug_payload + subu $a1, $a2 + jal .Lcopymem + lui $a2, 0xA001 + + lui $a0, 0x8001 + jalr $a0 + nop + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + + .Lcopymem: # installer's copy routine + + addiu $a1, -4 + lw $v0, 0($a0) + addiu $a0, 4 + sw $v0, 0($a2) + bgtz $a1, .Lcopymem + addiu $a2, 4 + jr $ra + nop + + +.section .data + +debug_payload: + + .incbin "patch.bin" + +debug_payload_end: diff --git a/indev/psn00bdbg-mk2/monitor/sdkinst.s b/indev/psn00bdbg-mk2/monitor/sdkinst.s deleted file mode 100644 index 6d20624..0000000 --- a/indev/psn00bdbg-mk2/monitor/sdkinst.s +++ /dev/null @@ -1,65 +0,0 @@ -# -# To use this installer properly, the program text must be compiled to -# load at a higher address such as 0x80012000. -# -# Then call the function early in your program: -# -# void install_monitor(void); -# -# ... -# -# int main(int argc, const char *argv[]) -# { -# ResetGraph(0); -# -# EnterCriticalSection(); -# install_monitor(); -# ExitCriticalSection(); -# ... -# -.set noreorder - -.section .text - -.global _install_monitor -.type _install_monitor, @function -_install_monitor: # = Patch installer routine - - addiu $sp, -4 - sw $ra, 0($sp) - - la $a0, debug_payload - la $a1, debug_payload_end - la $a2, debug_payload - subu $a1, $a2 - jal .Lcopymem - lui $a2, 0xA001 - - lui $a0, 0x8001 - jalr $a0 - nop - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - .Lcopymem: # installer's copy routine - - addiu $a1, -4 - lw $v0, 0($a0) - addiu $a0, 4 - sw $v0, 0($a2) - bgtz $a1, .Lcopymem - addiu $a2, 4 - jr $ra - nop - - -.section .data - -debug_payload: - - .incbin "patch.bin" - -debug_payload_end: diff --git a/indev/psn00bdbg-mk2/monitor/stubinst.S b/indev/psn00bdbg-mk2/monitor/stubinst.S new file mode 100644 index 0000000..54bc8b9 --- /dev/null +++ b/indev/psn00bdbg-mk2/monitor/stubinst.S @@ -0,0 +1,185 @@ +# +# C-callable and linkable version of 'patchinst' portion of monitor.asm +# +# Assemble with: +# mipsel-none-elf-gcc -march=r3000 -c stubinst.s -o stubinst.o +# +# Then call the function early in your program: +# +# void mk2_InstallMonitor(void); +# +# ... +# +# int main(int argc, const char *argv[]) +# { +# ResetGraph(0); +# +# EnterCriticalSection(); +# mk2_InstallMonitor(); +# ExitCriticalSection(); +# ... +# +.set noreorder + +# +# These constants must reflect those in monitor.asm +# +.set MONADDR, 0xC000 +.set MAX_BREAK, 32 + +.set SAVE_mode, 0x30 +.set SAVE_tmode, 0x31 +.set SAVE_k0, 0x34 +.set SAVE_k1, 0x38 +.set SAVE_dcic, 0x3C + +.set DB_BRK_FLAG, 0 +.set DB_BRK_ADDR, 4 +.set DB_BRK_INST, 8 +.set DB_BRK_LCNT, 12 +.set DB_BRK_HCNT, 14 +.set DB_BRK_LEN, 16 + +.set DCIC, $7 + +.section .text + +.global _mk2_InstallMonitor +.type _mk2_InstallMonitor, @function +_mk2_InstallMonitor: + + addiu $sp, -4 + sw $ra, 0($sp) + li $a0, .Lbreakhook # Install breakpoint vector hook + li $a1, (.Lbreakhook_end-.Lbreakhook)+4 + li $a2, 0xA0000040 + jal .Lcopymem + nop + li $v0, 0x200 # Hook monitor entrypoint to + li $v1, 0x40 # SysErrUnresolvedException() slot + sll $v1, 2 + addu $v0, $v1 + la $v1, .Lpayload # Get entrypoint address + lw $v1, 0($v1) + nop + sw $v1, 0($v0) + addiu $t2, $0, 0xB0 # GetB0Table() + jalr $t2 + addiu $t1, $0, 0x57 + li $a0, 0x17 # Get pointer of ReturnFromException() + sll $a0, 2 + addu $a0, $v0 + lw $v0, 0($a0) # Save original address for later + la $v1, .Lpayload + lw $v1, 0($v1) + nop + beq $v0, $v1, .Lnoinstall # Don't install if hooked already + nop + addiu $sp, -4 + sw $a0, 0($sp) + li $a0, .Lpayload # Load monitor code into target + li $a1, (.Lpayload_end-.Lpayload)+4 # address + lui $a2, 0xA000 # write via uncached segment + jal .Lcopymem + ori $a2, MONADDR + lw $a0, 0($sp) + addiu $sp, 4 + lw $v0, 0($a0) + la $v1, .Lpayload + lw $v1, 4($v1) + nop + sw $v0, 0($v1) + la $v0, .Lpayload # Set new address to table + lw $v0, 0($v0) + nop + sw $v0, 0($a0) + li $a0, 0x80 # Move existing exception vector + li $a2, 0x90 # jump to prepend patch + jal .Lcopymem + li $a1, 16 + la $a0, .Lexceptpatch # Patch the exception vector for + li $a2, 0x80 # trace to work properly + jal .Lcopymem + li $a1, 16 + addiu $t2, $0, 0xA0 # FlushCache() just to make sure + jalr $t2 + addiu $t1, $0, 0x44 + jal .Linit_breakpoints + nop + .Lnoinstall: + lw $ra, 0($sp) # Return to caller, debugger + addiu $sp, 4 # already installed + jr $ra + nop + + # patchinst + +# +# = installer's copy routine +# +.Lcopymem: + addiu $a1, -4 + lw $v0, 0($a0) + addiu $a0, 4 + sw $v0, 0($a2) + bgtz $a1, .Lcopymem + addiu $a2, 4 + jr $ra + nop + + # copymem + +# +# = Initializes breakpoint list +# +.Linit_breakpoints: + la $a2, .Lpayload # Get address of breakpoint table + lw $a2, 8($a2) + li $a3, MAX_BREAK + addiu $v0, $0, -1 + .Lclear_loop: + sw $v0, DB_BRK_INST($a2) + sw $v0, DB_BRK_ADDR($a2) + sw $0 , DB_BRK_FLAG($a2) + addiu $a3, -1 + bnez $a3, .Lclear_loop + addiu $a2, DB_BRK_LEN + jr $ra + nop + # init_breakpoints + +# +# = Break vector hook code +# +# This is copied to the breakpoint exception vector at address 40h +# +.Lbreakhook: + sw $k0, SAVE_k0($0) # Save K0 and K1 registers + mfc0 $k0, DCIC # Save DCIC + sw $k1, SAVE_k1($0) + mtc0 $0, DCIC # Clear DCIC in case trace is still + sw $k0, SAVE_dcic($0) # effective + la $k0, breakhandler # Jump to break vector handler + jr $k0 + nop +.Lbreakhook_end: + +# +# = Exception vector patch +# +# This is prepended to the exception vector jump at address 80h +# +.Lexceptpatch: + mfc0 $k0, DCIC # backup DCIC value + nop + mtc0 $0 , DCIC # clear DCIC + sw $k0, SAVE_dcic($0) + +# +# = Payload data +# +.section .data + +.Lpayload: + .incbin "patchcode.bin" +.Lpayload_end: diff --git a/indev/psn00bdbg-mk2/monitor/stubinst.s b/indev/psn00bdbg-mk2/monitor/stubinst.s deleted file mode 100644 index 54bc8b9..0000000 --- a/indev/psn00bdbg-mk2/monitor/stubinst.s +++ /dev/null @@ -1,185 +0,0 @@ -# -# C-callable and linkable version of 'patchinst' portion of monitor.asm -# -# Assemble with: -# mipsel-none-elf-gcc -march=r3000 -c stubinst.s -o stubinst.o -# -# Then call the function early in your program: -# -# void mk2_InstallMonitor(void); -# -# ... -# -# int main(int argc, const char *argv[]) -# { -# ResetGraph(0); -# -# EnterCriticalSection(); -# mk2_InstallMonitor(); -# ExitCriticalSection(); -# ... -# -.set noreorder - -# -# These constants must reflect those in monitor.asm -# -.set MONADDR, 0xC000 -.set MAX_BREAK, 32 - -.set SAVE_mode, 0x30 -.set SAVE_tmode, 0x31 -.set SAVE_k0, 0x34 -.set SAVE_k1, 0x38 -.set SAVE_dcic, 0x3C - -.set DB_BRK_FLAG, 0 -.set DB_BRK_ADDR, 4 -.set DB_BRK_INST, 8 -.set DB_BRK_LCNT, 12 -.set DB_BRK_HCNT, 14 -.set DB_BRK_LEN, 16 - -.set DCIC, $7 - -.section .text - -.global _mk2_InstallMonitor -.type _mk2_InstallMonitor, @function -_mk2_InstallMonitor: - - addiu $sp, -4 - sw $ra, 0($sp) - li $a0, .Lbreakhook # Install breakpoint vector hook - li $a1, (.Lbreakhook_end-.Lbreakhook)+4 - li $a2, 0xA0000040 - jal .Lcopymem - nop - li $v0, 0x200 # Hook monitor entrypoint to - li $v1, 0x40 # SysErrUnresolvedException() slot - sll $v1, 2 - addu $v0, $v1 - la $v1, .Lpayload # Get entrypoint address - lw $v1, 0($v1) - nop - sw $v1, 0($v0) - addiu $t2, $0, 0xB0 # GetB0Table() - jalr $t2 - addiu $t1, $0, 0x57 - li $a0, 0x17 # Get pointer of ReturnFromException() - sll $a0, 2 - addu $a0, $v0 - lw $v0, 0($a0) # Save original address for later - la $v1, .Lpayload - lw $v1, 0($v1) - nop - beq $v0, $v1, .Lnoinstall # Don't install if hooked already - nop - addiu $sp, -4 - sw $a0, 0($sp) - li $a0, .Lpayload # Load monitor code into target - li $a1, (.Lpayload_end-.Lpayload)+4 # address - lui $a2, 0xA000 # write via uncached segment - jal .Lcopymem - ori $a2, MONADDR - lw $a0, 0($sp) - addiu $sp, 4 - lw $v0, 0($a0) - la $v1, .Lpayload - lw $v1, 4($v1) - nop - sw $v0, 0($v1) - la $v0, .Lpayload # Set new address to table - lw $v0, 0($v0) - nop - sw $v0, 0($a0) - li $a0, 0x80 # Move existing exception vector - li $a2, 0x90 # jump to prepend patch - jal .Lcopymem - li $a1, 16 - la $a0, .Lexceptpatch # Patch the exception vector for - li $a2, 0x80 # trace to work properly - jal .Lcopymem - li $a1, 16 - addiu $t2, $0, 0xA0 # FlushCache() just to make sure - jalr $t2 - addiu $t1, $0, 0x44 - jal .Linit_breakpoints - nop - .Lnoinstall: - lw $ra, 0($sp) # Return to caller, debugger - addiu $sp, 4 # already installed - jr $ra - nop - - # patchinst - -# -# = installer's copy routine -# -.Lcopymem: - addiu $a1, -4 - lw $v0, 0($a0) - addiu $a0, 4 - sw $v0, 0($a2) - bgtz $a1, .Lcopymem - addiu $a2, 4 - jr $ra - nop - - # copymem - -# -# = Initializes breakpoint list -# -.Linit_breakpoints: - la $a2, .Lpayload # Get address of breakpoint table - lw $a2, 8($a2) - li $a3, MAX_BREAK - addiu $v0, $0, -1 - .Lclear_loop: - sw $v0, DB_BRK_INST($a2) - sw $v0, DB_BRK_ADDR($a2) - sw $0 , DB_BRK_FLAG($a2) - addiu $a3, -1 - bnez $a3, .Lclear_loop - addiu $a2, DB_BRK_LEN - jr $ra - nop - # init_breakpoints - -# -# = Break vector hook code -# -# This is copied to the breakpoint exception vector at address 40h -# -.Lbreakhook: - sw $k0, SAVE_k0($0) # Save K0 and K1 registers - mfc0 $k0, DCIC # Save DCIC - sw $k1, SAVE_k1($0) - mtc0 $0, DCIC # Clear DCIC in case trace is still - sw $k0, SAVE_dcic($0) # effective - la $k0, breakhandler # Jump to break vector handler - jr $k0 - nop -.Lbreakhook_end: - -# -# = Exception vector patch -# -# This is prepended to the exception vector jump at address 80h -# -.Lexceptpatch: - mfc0 $k0, DCIC # backup DCIC value - nop - mtc0 $0 , DCIC # clear DCIC - sw $k0, SAVE_dcic($0) - -# -# = Payload data -# -.section .data - -.Lpayload: - .incbin "patchcode.bin" -.Lpayload_end: diff --git a/indev/psxpad/card.S b/indev/psxpad/card.S new file mode 100644 index 0000000..2732194 --- /dev/null +++ b/indev/psxpad/card.S @@ -0,0 +1,377 @@ +.set noreorder + +.include "hwregs_a.h" + +.section .text + +_CardCSum: + + # a0 - base csum + # a1 - data pointer + # a2 - length + + lbu $v0, 0($a1) + addi $a2, -1 + xor $a0, $v0 + bgtz $a2, _CardCSum + addiu $a1, 1 + + jr $ra + move $v0, $a0 + + +.global _CardWrite +.type _CardWrite, @function +_CardWrite: + + # a0 - port number + # a1 - pointer to 128 byte buffer + # a2 - sector number + + # return values: + # 0 - ok + # 1 - no device + # 2 - timeout + # 3 - bad checksum + # 4 - bad sector + + # note: you must wait at least two vsyncs between each sector write + + addiu $sp, -4 + sw $ra, 0($sp) + + lui $t0, IOBASE + + li $v0, 0x1003 # TX Enable, Joypad port select + andi $a0, 1 + sll $a0, 13 + or $v0, $a0 # Select port 2 if a0 is 1 + + sh $v0, JOY_CTRL($t0) # Set to Joypad control interface + + jal _wait # Delay for analog pads + li $v0, 310 # (needs optimization testing) + +# May cause issues with third party adapters such as Brook wireless +#.Lread_empty_fifo_write: # Flush the RX FIFO just in case +# lbu $v1, JOY_TXRX($t0) +# lhu $v0, JOY_STAT($t0) +# nop +# andi $v0, 0x2 +# bnez $v0, .Lread_empty_fifo_write +# nop + + lhu $v1, JOY_CTRL($t0) + nop + or $v1, 0x10 + sh $v1, JOY_CTRL($t0) + + jal _CardExchng # Send device check byte + li $a0, 0x81 + andi $v1, $v0, 0x100 # No card if exchange timed out + bnez $v1, .Lno_device_write + addiu $v0, $0 , 1 + + jal _wait # 1st exchange needs 27microsec after ACK + li $v0, 190 # (e.g. 7 as usual + an extra 20) + + jal _CardExchng # Send write command + li $a0, 0x57 + andi $v1, $v0, 0x100 # No card if exchange timed out + bnez $v1, .Lno_device_write + addiu $v0, $0 , 1 + + jal _CardExchng # Receive card ID bytes + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + jal _CardExchng + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + + jal _CardExchng # Send address bytes + srl $a0, $a2, 8 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + jal _CardExchng + andi $a0, $a2, 0xFF + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + + srl $t1, $a2, 8 # Checksum address by MSB xor LSB + andi $v0, $a2, 0xFF + xor $t1, $v0 + + move $a2, $0 # Send data and compute checksum +.Lwrite_loop: + lbu $a0, 0($a1) + addiu $a1, 1 + jal _CardExchng + xor $t1, $a0 + addiu $a2, 1 + blt $a2, 128, .Lwrite_loop + nop + + jal _CardExchng # Send checksum byte + move $a0, $t1 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + + jal _CardExchng # Receive card acknowledge bytes + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + jal _CardExchng + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lwrite_timeout + addiu $v0, $0 , 2 + + sb $0 , JOY_TXRX($t0) # Gets end byte + nop +.Lsend_wait_end_write: + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x6 + bne $v0, 0x6, .Lsend_wait_end_write + nop + lbu $v0, JOY_TXRX($t0) + nop + + beq $v0, 0x4e, .Lwrite_timeout # Bad checksum + addiu $v0, $0 , 3 + beq $v0, 0xff, .Lwrite_timeout # Bad sector + addiu $v0, $0 , 4 + + move $v0, $0 + +.Lwrite_timeout: +.Lno_device_write: + + sh $0 , JOY_CTRL($t0) # Apparently required + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + + +.global _CardRead +.type _CardRead, @function +_CardRead: + + # a0 - port number + # a1 - pointer to 128 byte buffer + # a2 - sector number + + addiu $sp, -12 + sw $ra, 0($sp) + sw $a1, 4($sp) + sw $a2, 8($sp) + + lui $t0, IOBASE + + li $v0, 0x1003 # TX Enable, Joypad port select + andi $a0, 1 + sll $a0, 13 + or $v0, $a0 # Select port 2 if a0 is 1 + + sh $v0, JOY_CTRL($t0) # Set to Joypad control interface + + jal _wait # Delay for analog pads (needs testing) + li $v0, 310 + +# May cause issues with third party adapters such as Brook wireless +#.Lread_empty_fifo: # Flush the RX FIFO just in case +# lbu $v1, JOY_TXRX($t0) +# lhu $v0, JOY_STAT($t0) +# nop +# andi $v0, 0x2 +# bnez $v0, .Lread_empty_fifo +# nop + + lhu $v1, JOY_CTRL($t0) + nop + or $v1, 0x10 + sh $v1, JOY_CTRL($t0) + + jal _CardExchng # Send device check byte + li $a0, 0x81 + andi $v1, $v0, 0x100 # No card if exchange timed out + bnez $v1, .Lno_device + addiu $v0, $0 , 1 + + jal _wait # 1st exchange needs 27microsec after ACK + li $v0, 190 # (e.g. 7 as usual + an extra 20) + + jal _CardExchng # Send read command + li $a0, 0x52 + andi $v1, $v0, 0x100 # No card if exchange timed out + bnez $v1, .Lno_device + addiu $v0, $0 , 2 + + jal _CardExchng # Receive card ID bytes + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 3 + jal _CardExchng + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 4 + + jal _CardExchng # Send address + srl $a0, $a2, 8 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 5 + jal _CardExchng + andi $a0, $a2, 0xFF + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 6 + + sb $0 , JOY_TXRX($t0) # Receive command acknowledge 1 + nop +.Lsend_wait: + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x4 + beqz $v0, .Lsend_wait + nop + move $v1, $0 + lui $a0, 0xBFC0 +.Lwait_ack: + bgt $v1, 30000, .Lread_timeout + addiu $v0, $0, 10 + lhu $v0, JOY_STAT($t0) + lw $0 , 4($a0) + lw $0 , 0($a0) + andi $v0, 0x202 + bne $v0, 0x202, .Lwait_ack + addiu $v1, 1 + lhu $v1, JOY_CTRL($t0) + lbu $v0, JOY_TXRX($t0) + or $v1, 0x10 + sh $v1, JOY_CTRL($t0) + + jal _CardExchng # Receive command acknowledge 2 + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 7 + + jal _CardExchng # Receive confirmed address MSB + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 8 + + jal _CardExchng # Receive confirmed address LSB + move $a0, $0 + andi $v1, $v0, 0x100 + bnez $v1, .Lno_device + addiu $v0, $0 , 9 + + move $a2, $0 +.Ltransfer_loop: + jal _CardExchng + move $a0, $0 + sb $v0, 0($a1) + addiu $a2, 1 + blt $a2, 128, .Ltransfer_loop + addiu $a1, 1 + + jal _CardExchng # Gets checksum byte + move $a0, $0 + move $a3, $v0 + + sb $0 , JOY_TXRX($t0) # Gets end byte + nop +.Lsend_wait_end: + lhu $v1, JOY_STAT($t0) + nop + andi $v1, 0x6 + bne $v1, 0x6, .Lsend_wait_end + nop + lbu $v1, JOY_TXRX($t0) + nop + + lw $a2, 8($sp) + lw $a1, 4($sp) + andi $v1, $a2, 0xff + srl $a2, 8 + xor $a0, $a2, $v1 + jal _CardCSum + li $a2, 128 + + bne $v0, $a3, .Lno_device + addiu $v0, $0 , -1 + + move $v0, $0 +.Lread_timeout: +.Lno_device: + + sh $0 , JOY_CTRL($t0) # Apparently required + + lw $ra, 0($sp) + addiu $sp, 12 + jr $ra + nop + + +.global _CardExchng +.type _CardExchng, @function +_CardExchng: + + lui $t0, IOBASE + + sb $a0, JOY_TXRX($t0) + nop +.Lsend_wait_exchg: + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x4 + beqz $v0, .Lsend_wait_exchg + nop + + lui $a0, 0xBFC0 + move $v1, $0 +.Lwait_ack_exchg: + bgt $v1, 500, .Ltimeout + lhu $v0, JOY_STAT($t0) + lw $0 , 4($a0) + lw $0 , 0($a0) + andi $v0, 0x202 + bne $v0, 0x202, .Lwait_ack_exchg + addiu $v1, 1 + + b .Ldone + nop + +.Ltimeout: + + lbu $v0, JOY_TXRX($t0) + nop + b .Lexit_exchg + ori $v0, 0x100 + +.Ldone: + + lhu $v1, JOY_CTRL($t0) + lbu $v0, JOY_TXRX($t0) + or $v1, 0x10 + sh $v1, JOY_CTRL($t0) + +.Lexit_exchg: + + jr $ra + nop diff --git a/indev/psxpad/card.s b/indev/psxpad/card.s deleted file mode 100644 index 2732194..0000000 --- a/indev/psxpad/card.s +++ /dev/null @@ -1,377 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -_CardCSum: - - # a0 - base csum - # a1 - data pointer - # a2 - length - - lbu $v0, 0($a1) - addi $a2, -1 - xor $a0, $v0 - bgtz $a2, _CardCSum - addiu $a1, 1 - - jr $ra - move $v0, $a0 - - -.global _CardWrite -.type _CardWrite, @function -_CardWrite: - - # a0 - port number - # a1 - pointer to 128 byte buffer - # a2 - sector number - - # return values: - # 0 - ok - # 1 - no device - # 2 - timeout - # 3 - bad checksum - # 4 - bad sector - - # note: you must wait at least two vsyncs between each sector write - - addiu $sp, -4 - sw $ra, 0($sp) - - lui $t0, IOBASE - - li $v0, 0x1003 # TX Enable, Joypad port select - andi $a0, 1 - sll $a0, 13 - or $v0, $a0 # Select port 2 if a0 is 1 - - sh $v0, JOY_CTRL($t0) # Set to Joypad control interface - - jal _wait # Delay for analog pads - li $v0, 310 # (needs optimization testing) - -# May cause issues with third party adapters such as Brook wireless -#.Lread_empty_fifo_write: # Flush the RX FIFO just in case -# lbu $v1, JOY_TXRX($t0) -# lhu $v0, JOY_STAT($t0) -# nop -# andi $v0, 0x2 -# bnez $v0, .Lread_empty_fifo_write -# nop - - lhu $v1, JOY_CTRL($t0) - nop - or $v1, 0x10 - sh $v1, JOY_CTRL($t0) - - jal _CardExchng # Send device check byte - li $a0, 0x81 - andi $v1, $v0, 0x100 # No card if exchange timed out - bnez $v1, .Lno_device_write - addiu $v0, $0 , 1 - - jal _wait # 1st exchange needs 27microsec after ACK - li $v0, 190 # (e.g. 7 as usual + an extra 20) - - jal _CardExchng # Send write command - li $a0, 0x57 - andi $v1, $v0, 0x100 # No card if exchange timed out - bnez $v1, .Lno_device_write - addiu $v0, $0 , 1 - - jal _CardExchng # Receive card ID bytes - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - jal _CardExchng - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - - jal _CardExchng # Send address bytes - srl $a0, $a2, 8 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - jal _CardExchng - andi $a0, $a2, 0xFF - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - - srl $t1, $a2, 8 # Checksum address by MSB xor LSB - andi $v0, $a2, 0xFF - xor $t1, $v0 - - move $a2, $0 # Send data and compute checksum -.Lwrite_loop: - lbu $a0, 0($a1) - addiu $a1, 1 - jal _CardExchng - xor $t1, $a0 - addiu $a2, 1 - blt $a2, 128, .Lwrite_loop - nop - - jal _CardExchng # Send checksum byte - move $a0, $t1 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - - jal _CardExchng # Receive card acknowledge bytes - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - jal _CardExchng - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lwrite_timeout - addiu $v0, $0 , 2 - - sb $0 , JOY_TXRX($t0) # Gets end byte - nop -.Lsend_wait_end_write: - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x6 - bne $v0, 0x6, .Lsend_wait_end_write - nop - lbu $v0, JOY_TXRX($t0) - nop - - beq $v0, 0x4e, .Lwrite_timeout # Bad checksum - addiu $v0, $0 , 3 - beq $v0, 0xff, .Lwrite_timeout # Bad sector - addiu $v0, $0 , 4 - - move $v0, $0 - -.Lwrite_timeout: -.Lno_device_write: - - sh $0 , JOY_CTRL($t0) # Apparently required - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -.global _CardRead -.type _CardRead, @function -_CardRead: - - # a0 - port number - # a1 - pointer to 128 byte buffer - # a2 - sector number - - addiu $sp, -12 - sw $ra, 0($sp) - sw $a1, 4($sp) - sw $a2, 8($sp) - - lui $t0, IOBASE - - li $v0, 0x1003 # TX Enable, Joypad port select - andi $a0, 1 - sll $a0, 13 - or $v0, $a0 # Select port 2 if a0 is 1 - - sh $v0, JOY_CTRL($t0) # Set to Joypad control interface - - jal _wait # Delay for analog pads (needs testing) - li $v0, 310 - -# May cause issues with third party adapters such as Brook wireless -#.Lread_empty_fifo: # Flush the RX FIFO just in case -# lbu $v1, JOY_TXRX($t0) -# lhu $v0, JOY_STAT($t0) -# nop -# andi $v0, 0x2 -# bnez $v0, .Lread_empty_fifo -# nop - - lhu $v1, JOY_CTRL($t0) - nop - or $v1, 0x10 - sh $v1, JOY_CTRL($t0) - - jal _CardExchng # Send device check byte - li $a0, 0x81 - andi $v1, $v0, 0x100 # No card if exchange timed out - bnez $v1, .Lno_device - addiu $v0, $0 , 1 - - jal _wait # 1st exchange needs 27microsec after ACK - li $v0, 190 # (e.g. 7 as usual + an extra 20) - - jal _CardExchng # Send read command - li $a0, 0x52 - andi $v1, $v0, 0x100 # No card if exchange timed out - bnez $v1, .Lno_device - addiu $v0, $0 , 2 - - jal _CardExchng # Receive card ID bytes - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 3 - jal _CardExchng - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 4 - - jal _CardExchng # Send address - srl $a0, $a2, 8 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 5 - jal _CardExchng - andi $a0, $a2, 0xFF - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 6 - - sb $0 , JOY_TXRX($t0) # Receive command acknowledge 1 - nop -.Lsend_wait: - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x4 - beqz $v0, .Lsend_wait - nop - move $v1, $0 - lui $a0, 0xBFC0 -.Lwait_ack: - bgt $v1, 30000, .Lread_timeout - addiu $v0, $0, 10 - lhu $v0, JOY_STAT($t0) - lw $0 , 4($a0) - lw $0 , 0($a0) - andi $v0, 0x202 - bne $v0, 0x202, .Lwait_ack - addiu $v1, 1 - lhu $v1, JOY_CTRL($t0) - lbu $v0, JOY_TXRX($t0) - or $v1, 0x10 - sh $v1, JOY_CTRL($t0) - - jal _CardExchng # Receive command acknowledge 2 - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 7 - - jal _CardExchng # Receive confirmed address MSB - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 8 - - jal _CardExchng # Receive confirmed address LSB - move $a0, $0 - andi $v1, $v0, 0x100 - bnez $v1, .Lno_device - addiu $v0, $0 , 9 - - move $a2, $0 -.Ltransfer_loop: - jal _CardExchng - move $a0, $0 - sb $v0, 0($a1) - addiu $a2, 1 - blt $a2, 128, .Ltransfer_loop - addiu $a1, 1 - - jal _CardExchng # Gets checksum byte - move $a0, $0 - move $a3, $v0 - - sb $0 , JOY_TXRX($t0) # Gets end byte - nop -.Lsend_wait_end: - lhu $v1, JOY_STAT($t0) - nop - andi $v1, 0x6 - bne $v1, 0x6, .Lsend_wait_end - nop - lbu $v1, JOY_TXRX($t0) - nop - - lw $a2, 8($sp) - lw $a1, 4($sp) - andi $v1, $a2, 0xff - srl $a2, 8 - xor $a0, $a2, $v1 - jal _CardCSum - li $a2, 128 - - bne $v0, $a3, .Lno_device - addiu $v0, $0 , -1 - - move $v0, $0 -.Lread_timeout: -.Lno_device: - - sh $0 , JOY_CTRL($t0) # Apparently required - - lw $ra, 0($sp) - addiu $sp, 12 - jr $ra - nop - - -.global _CardExchng -.type _CardExchng, @function -_CardExchng: - - lui $t0, IOBASE - - sb $a0, JOY_TXRX($t0) - nop -.Lsend_wait_exchg: - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x4 - beqz $v0, .Lsend_wait_exchg - nop - - lui $a0, 0xBFC0 - move $v1, $0 -.Lwait_ack_exchg: - bgt $v1, 500, .Ltimeout - lhu $v0, JOY_STAT($t0) - lw $0 , 4($a0) - lw $0 , 0($a0) - andi $v0, 0x202 - bne $v0, 0x202, .Lwait_ack_exchg - addiu $v1, 1 - - b .Ldone - nop - -.Ltimeout: - - lbu $v0, JOY_TXRX($t0) - nop - b .Lexit_exchg - ori $v0, 0x100 - -.Ldone: - - lhu $v1, JOY_CTRL($t0) - lbu $v0, JOY_TXRX($t0) - or $v1, 0x10 - sh $v1, JOY_CTRL($t0) - -.Lexit_exchg: - - jr $ra - nop diff --git a/indev/psxpad/pad.S b/indev/psxpad/pad.S new file mode 100644 index 0000000..0fa66aa --- /dev/null +++ b/indev/psxpad/pad.S @@ -0,0 +1,264 @@ +.set noreorder + +.include "hwregs_a.h" + +.section .text + +.global _InitPadDirect +.type _InitPadDirect, @function +_InitPadDirect: + + addiu $sp, -4 + sw $ra, 0($sp) + + lui $t0, IOBASE + + # Interface setup + li $v0, 0x40 # Interface reset + sh $v0, JOY_CTRL($t0) + li $v0, 0x88 # 250kHz clock rate + sh $v0, JOY_BAUD($t0) + li $v0, 0x0d # 8-bit, no parity, x1 multiplier + sh $v0, JOY_MODE($t0) + li $v0, 0x1003 # JOY1, TX enabled + sh $v0, JOY_CTRL($t0) + + jal _wait + li $v0, 1000 + + # Empty RX fifo +.Lempty_fifo: + lbu $v1, JOY_TXRX($t0) + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x2 + bnez $v0, .Lempty_fifo + nop + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + + +.global _PadSetPort +.type _PadSetPort, @function +_PadSetPort: + addiu $sp, -4 + sw $ra, 0($sp) + + lui $t0, IOBASE + + beq $a0, 2, .Lstop_comms + nop + + li $v0, 0x1003 # TX Enable, Joypad port select + andi $a0, 1 + sll $a0, 13 + or $v0, $a0 # Select port 2 if a0 is 1 + + sh $v0, JOY_CTRL($t0) # Set to Joypad control interface + + jal _wait # Delay for analog pads (needs testing) + li $v0, 500 + +.Lread_empty_fifo_set: # Flush the RX FIFO just in case + lbu $v1, JOY_TXRX($t0) + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x2 + bnez $v0, .Lread_empty_fifo_set + nop + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + +.Lstop_comms: + + sh $0 , JOY_CTRL($t0) + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + + +.global _PadReadDirect +.type _PadReadDirect, @function +_PadReadDirect: + + # a0 - port number + # a1 - device data buffer + # a2 - data max length + + addiu $sp, -4 + sw $ra, 0($sp) + + lui $t0, IOBASE + + li $v0, 0x1003 # TX Enable, Joypad port select + andi $a0, 1 + sll $a0, 13 + or $v0, $a0 # Select port 2 if a0 is 1 + + sh $v0, JOY_CTRL($t0) # Set to Joypad control interface + + jal _wait # Delay for analog pads (needs testing) + li $v0, 310 + +# May cause issues with third party adapters such as Brook wireless +#.Lread_empty_fifo: # Flush the RX FIFO just in case +# lbu $v1, JOY_TXRX($t0) +# lhu $v0, JOY_STAT($t0) +# nop +# andi $v0, 0x2 +# bnez $v0, .Lread_empty_fifo +# nop + + jal _PadExchng # Send device check byte + li $a0, 0x01 + + andi $v1, $v0, 0x100 # No pad if exchange timed out + bnez $v1, .Lno_device + addiu $v0, $0 , 1 + + sb $v0, 0($a1) + addiu $a1, 1 + + jal _wait # 1st exchange needs 27microsec after ACK + li $v0, 190 # (e.g. 7 as usual + an extra 20) + + jal _PadExchng # Send command byte + li $a0, 0x42 + + sb $v0, 0($a1) + addiu $a1, 1 + addiu $a2, -2 + + jal _PadExchng # Send 0 for pads, 1 for multitap + move $a0, $0 # Read is usually 0x5A + + addi $a3, $0 , 1 + + la $t1, _pad_mot_values + +.Lread_loop: # Read until buffer full, or no more data + + lbu $a0, 0($t1) + nop + beqz $a0, .Lskip_mot + nop + + jal _PadExchng + nop + + b .Ldone_exchg + addiu $t1, 1 + +.Lskip_mot: + + jal _PadExchng # when ACK is no longer triggered + move $a0, $0 + +.Ldone_exchg: + + sb $v0, 0($a1) + + andi $v0, 0x100 + bnez $v0, .Lread_end + addi $a3, 1 + + addiu $a2, -1 + bgtz $a2, .Lread_loop + addiu $a1, 1 + +.Lread_end: + + b .Lexit + move $v0, $a3 + +.Lno_device: + + addiu $v0, $0 , -1 + sb $v0, 0($a1) + +.Lexit: + + sh $0 , JOY_CTRL($t0) + + lw $ra, 0($sp) + addiu $sp, 4 + jr $ra + nop + + +.global _PadExchng +.type _PadExchng, @function +_PadExchng: + + lui $t0, IOBASE + + sb $a0, JOY_TXRX($t0) + nop +.Lsend_wait: + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x4 + beqz $v0, .Lsend_wait + nop + + move $v1, $0 +.Lwait_ack: + bgt $v1, 100, .Ltimeout + lhu $v0, JOY_STAT($t0) + nop + andi $v0, 0x202 + bne $v0, 0x202, .Lwait_ack + addiu $v1, 1 + + b .Ldone + nop + +.Ltimeout: + + lbu $v0, JOY_TXRX($t0) + nop + b .Lexit_exchg + ori $v0, 0x100 + +.Ldone: + + lhu $v1, JOY_CTRL($t0) + lbu $v0, JOY_TXRX($t0) + or $v1, 0x10 + sh $v1, JOY_CTRL($t0) + +.Lexit_exchg: + + jr $ra + nop + + +.global _wait +.type _wait, @function +_wait: + addiu $v0, -1 + bgtz $v0, _wait + nop + jr $ra + nop + + +.section .data + +.global _pad_mot_values +.type _pad_mot_values, @object +_pad_mot_values: + .byte 0 # Small motor + .byte 0 # Big motor + .byte 0 + .byte 0 + + diff --git a/indev/psxpad/pad.s b/indev/psxpad/pad.s deleted file mode 100644 index 0fa66aa..0000000 --- a/indev/psxpad/pad.s +++ /dev/null @@ -1,264 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -.global _InitPadDirect -.type _InitPadDirect, @function -_InitPadDirect: - - addiu $sp, -4 - sw $ra, 0($sp) - - lui $t0, IOBASE - - # Interface setup - li $v0, 0x40 # Interface reset - sh $v0, JOY_CTRL($t0) - li $v0, 0x88 # 250kHz clock rate - sh $v0, JOY_BAUD($t0) - li $v0, 0x0d # 8-bit, no parity, x1 multiplier - sh $v0, JOY_MODE($t0) - li $v0, 0x1003 # JOY1, TX enabled - sh $v0, JOY_CTRL($t0) - - jal _wait - li $v0, 1000 - - # Empty RX fifo -.Lempty_fifo: - lbu $v1, JOY_TXRX($t0) - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x2 - bnez $v0, .Lempty_fifo - nop - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -.global _PadSetPort -.type _PadSetPort, @function -_PadSetPort: - addiu $sp, -4 - sw $ra, 0($sp) - - lui $t0, IOBASE - - beq $a0, 2, .Lstop_comms - nop - - li $v0, 0x1003 # TX Enable, Joypad port select - andi $a0, 1 - sll $a0, 13 - or $v0, $a0 # Select port 2 if a0 is 1 - - sh $v0, JOY_CTRL($t0) # Set to Joypad control interface - - jal _wait # Delay for analog pads (needs testing) - li $v0, 500 - -.Lread_empty_fifo_set: # Flush the RX FIFO just in case - lbu $v1, JOY_TXRX($t0) - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x2 - bnez $v0, .Lread_empty_fifo_set - nop - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - -.Lstop_comms: - - sh $0 , JOY_CTRL($t0) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -.global _PadReadDirect -.type _PadReadDirect, @function -_PadReadDirect: - - # a0 - port number - # a1 - device data buffer - # a2 - data max length - - addiu $sp, -4 - sw $ra, 0($sp) - - lui $t0, IOBASE - - li $v0, 0x1003 # TX Enable, Joypad port select - andi $a0, 1 - sll $a0, 13 - or $v0, $a0 # Select port 2 if a0 is 1 - - sh $v0, JOY_CTRL($t0) # Set to Joypad control interface - - jal _wait # Delay for analog pads (needs testing) - li $v0, 310 - -# May cause issues with third party adapters such as Brook wireless -#.Lread_empty_fifo: # Flush the RX FIFO just in case -# lbu $v1, JOY_TXRX($t0) -# lhu $v0, JOY_STAT($t0) -# nop -# andi $v0, 0x2 -# bnez $v0, .Lread_empty_fifo -# nop - - jal _PadExchng # Send device check byte - li $a0, 0x01 - - andi $v1, $v0, 0x100 # No pad if exchange timed out - bnez $v1, .Lno_device - addiu $v0, $0 , 1 - - sb $v0, 0($a1) - addiu $a1, 1 - - jal _wait # 1st exchange needs 27microsec after ACK - li $v0, 190 # (e.g. 7 as usual + an extra 20) - - jal _PadExchng # Send command byte - li $a0, 0x42 - - sb $v0, 0($a1) - addiu $a1, 1 - addiu $a2, -2 - - jal _PadExchng # Send 0 for pads, 1 for multitap - move $a0, $0 # Read is usually 0x5A - - addi $a3, $0 , 1 - - la $t1, _pad_mot_values - -.Lread_loop: # Read until buffer full, or no more data - - lbu $a0, 0($t1) - nop - beqz $a0, .Lskip_mot - nop - - jal _PadExchng - nop - - b .Ldone_exchg - addiu $t1, 1 - -.Lskip_mot: - - jal _PadExchng # when ACK is no longer triggered - move $a0, $0 - -.Ldone_exchg: - - sb $v0, 0($a1) - - andi $v0, 0x100 - bnez $v0, .Lread_end - addi $a3, 1 - - addiu $a2, -1 - bgtz $a2, .Lread_loop - addiu $a1, 1 - -.Lread_end: - - b .Lexit - move $v0, $a3 - -.Lno_device: - - addiu $v0, $0 , -1 - sb $v0, 0($a1) - -.Lexit: - - sh $0 , JOY_CTRL($t0) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -.global _PadExchng -.type _PadExchng, @function -_PadExchng: - - lui $t0, IOBASE - - sb $a0, JOY_TXRX($t0) - nop -.Lsend_wait: - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x4 - beqz $v0, .Lsend_wait - nop - - move $v1, $0 -.Lwait_ack: - bgt $v1, 100, .Ltimeout - lhu $v0, JOY_STAT($t0) - nop - andi $v0, 0x202 - bne $v0, 0x202, .Lwait_ack - addiu $v1, 1 - - b .Ldone - nop - -.Ltimeout: - - lbu $v0, JOY_TXRX($t0) - nop - b .Lexit_exchg - ori $v0, 0x100 - -.Ldone: - - lhu $v1, JOY_CTRL($t0) - lbu $v0, JOY_TXRX($t0) - or $v1, 0x10 - sh $v1, JOY_CTRL($t0) - -.Lexit_exchg: - - jr $ra - nop - - -.global _wait -.type _wait, @function -_wait: - addiu $v0, -1 - bgtz $v0, _wait - nop - jr $ra - nop - - -.section .data - -.global _pad_mot_values -.type _pad_mot_values, @object -_pad_mot_values: - .byte 0 # Small motor - .byte 0 # Big motor - .byte 0 - .byte 0 - - diff --git a/libpsn00b/CMakeLists.txt b/libpsn00b/CMakeLists.txt index a6b6df3..77784de 100644 --- a/libpsn00b/CMakeLists.txt +++ b/libpsn00b/CMakeLists.txt @@ -30,7 +30,7 @@ foreach(_library IN LISTS PSN00BSDK_LIBRARIES) file( GLOB_RECURSE _sources - ${_path}/*.s ${_path}/*.c ${_path}/*.cpp + ${_path}/*.S ${_path}/*.c ${_path}/*.cpp ) # Build a separate version of the library for each supported target type diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 52ddba8..887f4ba 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -592,7 +592,7 @@ void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); void SetDrawOpType(GPU_DrawOpType type); -int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3); +int EnqueueDrawOp(void (*func)(uint32_t, uint32_t, uint32_t), uint32_t arg1, uint32_t arg2, uint32_t arg3); int DrawSync(int mode); void *DrawSyncCallback(void (*func)(void)); diff --git a/libpsn00b/libc/_start.S b/libpsn00b/libc/_start.S new file mode 100644 index 0000000..35a4eaa --- /dev/null +++ b/libpsn00b/libc/_start.S @@ -0,0 +1,18 @@ +# PSn00bSDK _start() trampoline +# (C) 2022 spicyjpeg - MPL licensed +# +# This file provides a weak function that can be easily overridden to e.g. set +# $sp or perform additional initialization before the "real" _start() function +# (_start_inner()) is called. + +.set noreorder + +.section .text._start, "ax", @progbits +.global _start +.type _start, @function +.weak _start + +_start: + la $gp, _gp + j _start_inner + nop diff --git a/libpsn00b/libc/_start.s b/libpsn00b/libc/_start.s deleted file mode 100644 index 35a4eaa..0000000 --- a/libpsn00b/libc/_start.s +++ /dev/null @@ -1,18 +0,0 @@ -# PSn00bSDK _start() trampoline -# (C) 2022 spicyjpeg - MPL licensed -# -# This file provides a weak function that can be easily overridden to e.g. set -# $sp or perform additional initialization before the "real" _start() function -# (_start_inner()) is called. - -.set noreorder - -.section .text._start, "ax", @progbits -.global _start -.type _start, @function -.weak _start - -_start: - la $gp, _gp - j _start_inner - nop diff --git a/libpsn00b/libc/clz.S b/libpsn00b/libc/clz.S new file mode 100644 index 0000000..1ccff2e --- /dev/null +++ b/libpsn00b/libc/clz.S @@ -0,0 +1,53 @@ +# PSn00bSDK leading zero count intrinsics +# (C) 2022-2023 spicyjpeg - MPL licensed +# +# libgcc provides two functions used internally by GCC to count the number of +# leading zeroes in a value, __clzsi2() (32-bit) and __clzdi2() (64-bit). This +# file overrides them with smaller implementations that make use of the GTE's +# LZCS/LZCR registers. + +.set noreorder + +.set LZCS, $30 +.set LZCR, $31 + +.section .text.__clzsi2, "ax", @progbits +.global __clzsi2 +.type __clzsi2, @function + +__clzsi2: + mtc2 $a0, LZCS + bltz $a0, .Lreturn # if (value & (1 << 31)) return 0 + li $v0, 0 + mfc2 $v0, LZCR # else return GTE_CLZ(value) + +.Lreturn: + jr $ra + nop + +.section .text.__clzdi2, "ax", @progbits +.global __clzdi2 +.type __clzdi2, @function + +__clzdi2: + mtc2 $a1, LZCS + bltz $a1, .Lreturn2 # if (msb & (1 << 31)) return 0 + li $v0, 0 + bnez $a1, .LreturnMSB # else if (msb) return GTE_CLZ(msb) + nop + +.LnoMSB: + mtc2 $a0, LZCS + bltz $a0, .Lreturn2 # else if (lsb & (1 << 31)) return 32 + li $v0, 32 + mfc2 $v0, LZCR # else return 32 + GTE_CLZ(lsb) + + jr $ra + addiu $v0, 32 + +.LreturnMSB: + mfc2 $v0, LZCR + +.Lreturn2: + jr $ra + nop diff --git a/libpsn00b/libc/clz.s b/libpsn00b/libc/clz.s deleted file mode 100644 index 1ccff2e..0000000 --- a/libpsn00b/libc/clz.s +++ /dev/null @@ -1,53 +0,0 @@ -# PSn00bSDK leading zero count intrinsics -# (C) 2022-2023 spicyjpeg - MPL licensed -# -# libgcc provides two functions used internally by GCC to count the number of -# leading zeroes in a value, __clzsi2() (32-bit) and __clzdi2() (64-bit). This -# file overrides them with smaller implementations that make use of the GTE's -# LZCS/LZCR registers. - -.set noreorder - -.set LZCS, $30 -.set LZCR, $31 - -.section .text.__clzsi2, "ax", @progbits -.global __clzsi2 -.type __clzsi2, @function - -__clzsi2: - mtc2 $a0, LZCS - bltz $a0, .Lreturn # if (value & (1 << 31)) return 0 - li $v0, 0 - mfc2 $v0, LZCR # else return GTE_CLZ(value) - -.Lreturn: - jr $ra - nop - -.section .text.__clzdi2, "ax", @progbits -.global __clzdi2 -.type __clzdi2, @function - -__clzdi2: - mtc2 $a1, LZCS - bltz $a1, .Lreturn2 # if (msb & (1 << 31)) return 0 - li $v0, 0 - bnez $a1, .LreturnMSB # else if (msb) return GTE_CLZ(msb) - nop - -.LnoMSB: - mtc2 $a0, LZCS - bltz $a0, .Lreturn2 # else if (lsb & (1 << 31)) return 32 - li $v0, 32 - mfc2 $v0, LZCR # else return 32 + GTE_CLZ(lsb) - - jr $ra - addiu $v0, 32 - -.LreturnMSB: - mfc2 $v0, LZCR - -.Lreturn2: - jr $ra - nop diff --git a/libpsn00b/libc/memset.S b/libpsn00b/libc/memset.S new file mode 100644 index 0000000..59cb10b --- /dev/null +++ b/libpsn00b/libc/memset.S @@ -0,0 +1,119 @@ +# PSn00bSDK optimized memset +# (C) 2022 spicyjpeg - MPL licensed + +.set noreorder + +.section .text.memset, "ax", @progbits +.global memset +.type memset, @function + +memset: + # If more than 16 bytes have to be written then take the "large" path, + # otherwise use the code below. + addiu $t0, $a2, -16 + bgtz $t0, .Llarge_fill + move $v0, $a0 # return_value = dest + + # Jump to one of the sb opcodes below. This is basically a cut-down Duff's + # device implementation with no looping. + la $t0, .Lsmall_duff + 0x40 # jump_addr = &small_duff[(16 - count) * 4] + sll $t1, $a2, 2 + subu $t0, $t1 + addu $a0, $a2 # dest -= 16 - count + jr $t0 + addiu $a0, -16 + +.Lsmall_duff: + sb $a1, 0x0($a0) + sb $a1, 0x1($a0) + sb $a1, 0x2($a0) + sb $a1, 0x3($a0) + sb $a1, 0x4($a0) + sb $a1, 0x5($a0) + sb $a1, 0x6($a0) + sb $a1, 0x7($a0) + sb $a1, 0x8($a0) + sb $a1, 0x9($a0) + sb $a1, 0xa($a0) + sb $a1, 0xb($a0) + sb $a1, 0xc($a0) + sb $a1, 0xd($a0) + sb $a1, 0xe($a0) + sb $a1, 0xf($a0) + jr $ra + nop + +.Llarge_fill: + # Initialize fast filling by repeating the fill byte 4 times, so it can be + # written 32 bits at a time. + andi $a1, 0xff # ch &= 0xff + sll $t0, $a1, 8 # ch |= (ch << 8) | (ch << 16) | (ch << 24) + or $a1, $t0 + sll $t0, $a1, 16 + or $a1, $t0 + + # Fill the first 1-4 bytes (here the swr instruction does all the magic) + # and update dest and count accordingly. + swr $a1, 0($a0) + andi $t0, $a0, 3 # align = 4 - (dest % 4) + addiu $t0, -4 + addu $a2, $t0 # count -= align + subu $a0, $t0 # dest += align + + la $t1, .Llarge_duff + andi $t2, $a2, 3 # remainder = count % 4 + subu $a2, $t2 # count -= remainder + +.Llarge_fill_loop: + # If 128 bytes or more still have to be written, skip calculating the jump + # offset and execute the whole block of sw opcodes. + addiu $a2, -0x80 # count -= 0x80 + bgez $a2, .Llarge_duff + #nop + + # Jump to one of the sw opcodes below. This is the "full" Duff's device. + subu $t0, $t1, $a2 # jump_addr = &large_duff[0x80 - (count + 0x80)] + jr $t0 + addu $a0, $a2 # dest -= 0x80 - (count + 0x80) + +.Llarge_duff: + sw $a1, 0x00($a0) + sw $a1, 0x04($a0) + sw $a1, 0x08($a0) + sw $a1, 0x0c($a0) + sw $a1, 0x10($a0) + sw $a1, 0x14($a0) + sw $a1, 0x18($a0) + sw $a1, 0x1c($a0) + sw $a1, 0x20($a0) + sw $a1, 0x24($a0) + sw $a1, 0x28($a0) + sw $a1, 0x2c($a0) + sw $a1, 0x30($a0) + sw $a1, 0x34($a0) + sw $a1, 0x38($a0) + sw $a1, 0x3c($a0) + sw $a1, 0x40($a0) + sw $a1, 0x44($a0) + sw $a1, 0x48($a0) + sw $a1, 0x4c($a0) + sw $a1, 0x50($a0) + sw $a1, 0x54($a0) + sw $a1, 0x58($a0) + sw $a1, 0x5c($a0) + sw $a1, 0x60($a0) + sw $a1, 0x64($a0) + sw $a1, 0x68($a0) + sw $a1, 0x6c($a0) + sw $a1, 0x70($a0) + sw $a1, 0x74($a0) + sw $a1, 0x78($a0) + sw $a1, 0x7c($a0) + + bgtz $a2, .Llarge_fill_loop + addiu $a0, 0x80 # dest += 0x80 + + # Fill the remaining 1-4 bytes, using (again) an unaligned store. + addu $a0, $t2 # last_byte = dest + remainder - 1 + jr $ra + swl $a1, -1($a0) diff --git a/libpsn00b/libc/memset.s b/libpsn00b/libc/memset.s deleted file mode 100644 index 59cb10b..0000000 --- a/libpsn00b/libc/memset.s +++ /dev/null @@ -1,119 +0,0 @@ -# PSn00bSDK optimized memset -# (C) 2022 spicyjpeg - MPL licensed - -.set noreorder - -.section .text.memset, "ax", @progbits -.global memset -.type memset, @function - -memset: - # If more than 16 bytes have to be written then take the "large" path, - # otherwise use the code below. - addiu $t0, $a2, -16 - bgtz $t0, .Llarge_fill - move $v0, $a0 # return_value = dest - - # Jump to one of the sb opcodes below. This is basically a cut-down Duff's - # device implementation with no looping. - la $t0, .Lsmall_duff + 0x40 # jump_addr = &small_duff[(16 - count) * 4] - sll $t1, $a2, 2 - subu $t0, $t1 - addu $a0, $a2 # dest -= 16 - count - jr $t0 - addiu $a0, -16 - -.Lsmall_duff: - sb $a1, 0x0($a0) - sb $a1, 0x1($a0) - sb $a1, 0x2($a0) - sb $a1, 0x3($a0) - sb $a1, 0x4($a0) - sb $a1, 0x5($a0) - sb $a1, 0x6($a0) - sb $a1, 0x7($a0) - sb $a1, 0x8($a0) - sb $a1, 0x9($a0) - sb $a1, 0xa($a0) - sb $a1, 0xb($a0) - sb $a1, 0xc($a0) - sb $a1, 0xd($a0) - sb $a1, 0xe($a0) - sb $a1, 0xf($a0) - jr $ra - nop - -.Llarge_fill: - # Initialize fast filling by repeating the fill byte 4 times, so it can be - # written 32 bits at a time. - andi $a1, 0xff # ch &= 0xff - sll $t0, $a1, 8 # ch |= (ch << 8) | (ch << 16) | (ch << 24) - or $a1, $t0 - sll $t0, $a1, 16 - or $a1, $t0 - - # Fill the first 1-4 bytes (here the swr instruction does all the magic) - # and update dest and count accordingly. - swr $a1, 0($a0) - andi $t0, $a0, 3 # align = 4 - (dest % 4) - addiu $t0, -4 - addu $a2, $t0 # count -= align - subu $a0, $t0 # dest += align - - la $t1, .Llarge_duff - andi $t2, $a2, 3 # remainder = count % 4 - subu $a2, $t2 # count -= remainder - -.Llarge_fill_loop: - # If 128 bytes or more still have to be written, skip calculating the jump - # offset and execute the whole block of sw opcodes. - addiu $a2, -0x80 # count -= 0x80 - bgez $a2, .Llarge_duff - #nop - - # Jump to one of the sw opcodes below. This is the "full" Duff's device. - subu $t0, $t1, $a2 # jump_addr = &large_duff[0x80 - (count + 0x80)] - jr $t0 - addu $a0, $a2 # dest -= 0x80 - (count + 0x80) - -.Llarge_duff: - sw $a1, 0x00($a0) - sw $a1, 0x04($a0) - sw $a1, 0x08($a0) - sw $a1, 0x0c($a0) - sw $a1, 0x10($a0) - sw $a1, 0x14($a0) - sw $a1, 0x18($a0) - sw $a1, 0x1c($a0) - sw $a1, 0x20($a0) - sw $a1, 0x24($a0) - sw $a1, 0x28($a0) - sw $a1, 0x2c($a0) - sw $a1, 0x30($a0) - sw $a1, 0x34($a0) - sw $a1, 0x38($a0) - sw $a1, 0x3c($a0) - sw $a1, 0x40($a0) - sw $a1, 0x44($a0) - sw $a1, 0x48($a0) - sw $a1, 0x4c($a0) - sw $a1, 0x50($a0) - sw $a1, 0x54($a0) - sw $a1, 0x58($a0) - sw $a1, 0x5c($a0) - sw $a1, 0x60($a0) - sw $a1, 0x64($a0) - sw $a1, 0x68($a0) - sw $a1, 0x6c($a0) - sw $a1, 0x70($a0) - sw $a1, 0x74($a0) - sw $a1, 0x78($a0) - sw $a1, 0x7c($a0) - - bgtz $a2, .Llarge_fill_loop - addiu $a0, 0x80 # dest += 0x80 - - # Fill the remaining 1-4 bytes, using (again) an unaligned store. - addu $a0, $t2 # last_byte = dest + remainder - 1 - jr $ra - swl $a1, -1($a0) diff --git a/libpsn00b/libc/setjmp.S b/libpsn00b/libc/setjmp.S new file mode 100644 index 0000000..fb0dc6b --- /dev/null +++ b/libpsn00b/libc/setjmp.S @@ -0,0 +1,50 @@ +# PSn00bSDK setjmp/longjmp +# (C) 2023 spicyjpeg - MPL licensed +# +# This is not a "proper" implementation of setjmp/longjmp as it does not save +# COP0 and GTE registers, but it is fully compatible with the version found in +# the BIOS. + +.set noreorder + +.section .text.setjmp, "ax", @progbits +.global setjmp +.type setjmp, @function + +setjmp: + sw $ra, 0x00($a0) + sw $sp, 0x04($a0) + sw $fp, 0x08($a0) + sw $s0, 0x0c($a0) + sw $s1, 0x10($a0) + sw $s2, 0x14($a0) + sw $s3, 0x18($a0) + sw $s4, 0x1c($a0) + sw $s5, 0x20($a0) + sw $s6, 0x24($a0) + sw $s7, 0x28($a0) + sw $gp, 0x2c($a0) + + jr $ra + li $v0, 0 + +.section .text.longjmp, "ax", @progbits +.global longjmp +.type longjmp, @function + +longjmp: + lw $ra, 0x00($a0) + lw $sp, 0x04($a0) + lw $fp, 0x08($a0) + lw $s0, 0x0c($a0) + lw $s1, 0x10($a0) + lw $s2, 0x14($a0) + lw $s3, 0x18($a0) + lw $s4, 0x1c($a0) + lw $s5, 0x20($a0) + lw $s6, 0x24($a0) + lw $s7, 0x28($a0) + lw $gp, 0x2c($a0) + + jr $ra + move $v0, $a1 diff --git a/libpsn00b/libc/setjmp.s b/libpsn00b/libc/setjmp.s deleted file mode 100644 index fb0dc6b..0000000 --- a/libpsn00b/libc/setjmp.s +++ /dev/null @@ -1,50 +0,0 @@ -# PSn00bSDK setjmp/longjmp -# (C) 2023 spicyjpeg - MPL licensed -# -# This is not a "proper" implementation of setjmp/longjmp as it does not save -# COP0 and GTE registers, but it is fully compatible with the version found in -# the BIOS. - -.set noreorder - -.section .text.setjmp, "ax", @progbits -.global setjmp -.type setjmp, @function - -setjmp: - sw $ra, 0x00($a0) - sw $sp, 0x04($a0) - sw $fp, 0x08($a0) - sw $s0, 0x0c($a0) - sw $s1, 0x10($a0) - sw $s2, 0x14($a0) - sw $s3, 0x18($a0) - sw $s4, 0x1c($a0) - sw $s5, 0x20($a0) - sw $s6, 0x24($a0) - sw $s7, 0x28($a0) - sw $gp, 0x2c($a0) - - jr $ra - li $v0, 0 - -.section .text.longjmp, "ax", @progbits -.global longjmp -.type longjmp, @function - -longjmp: - lw $ra, 0x00($a0) - lw $sp, 0x04($a0) - lw $fp, 0x08($a0) - lw $s0, 0x0c($a0) - lw $s1, 0x10($a0) - lw $s2, 0x14($a0) - lw $s3, 0x18($a0) - lw $s4, 0x1c($a0) - lw $s5, 0x20($a0) - lw $s6, 0x24($a0) - lw $s7, 0x28($a0) - lw $gp, 0x2c($a0) - - jr $ra - move $v0, $a1 diff --git a/libpsn00b/psxapi/_syscalls.S b/libpsn00b/psxapi/_syscalls.S new file mode 100644 index 0000000..cef349d --- /dev/null +++ b/libpsn00b/psxapi/_syscalls.S @@ -0,0 +1,161 @@ +# PSn00bSDK syscall wrappers +# (C) 2022-2023 spicyjpeg - MPL licensed + +.set noreorder + +## Interrupt enable/disable + +.section .text.EnterCriticalSection, "ax", @progbits +.global EnterCriticalSection +.type EnterCriticalSection, @function + +EnterCriticalSection: + li $a0, 0x01 + syscall 0 + + jr $ra + nop + +.section .text.ExitCriticalSection, "ax", @progbits +.global ExitCriticalSection +.type ExitCriticalSection, @function + +ExitCriticalSection: + li $a0, 0x02 + syscall 0 + + jr $ra + nop + +.section .text.SwEnterCriticalSection, "ax", @progbits +.global SwEnterCriticalSection +.type SwEnterCriticalSection, @function + +SwEnterCriticalSection: + mfc0 $a0, $12 # cop0r12 &= ~0x401 + li $a1, -1026 + and $a1, $a0 + mtc0 $a1, $12 + andi $a0, 0x0401 # return !((cop0r12_prev & 0x401) < 0x401) + sltiu $v0, $a0, 0x0401 + + jr $ra + xori $v0, 1 + +.section .text.SwExitCriticalSection, "ax", @progbits +.global SwExitCriticalSection +.type SwExitCriticalSection, @function + +SwExitCriticalSection: + mfc0 $a0, $12 # cop0r12 |= 0x401 + nop + ori $a0, 0x0401 + mtc0 $a0, $12 + nop + + jr $ra + nop + +## PCDRV (host file access) API + +.section .text.PCinit, "ax", @progbits +.global PCinit +.type PCinit, @function + +PCinit: + break 0, 0x101 # () -> error + + jr $ra + nop + +.section .text.PCcreat, "ax", @progbits +.global PCcreat +.type PCcreat, @function + +PCcreat: + li $a2, 0 + move $a1, $a0 + break 0, 0x102 # (path, path, 0) -> error, fd + + bgez $v0, .Lcreate_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lcreate_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCopen, "ax", @progbits +.global PCopen +.type PCopen, @function + +PCopen: + move $a2, $a1 + move $a1, $a0 + break 0, 0x103 # (path, path, mode) -> error, fd + + bgez $v0, .Lopen_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lopen_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCclose, "ax", @progbits +.global PCclose +.type PCclose, @function + +PCclose: + move $a1, $a0 + break 0, 0x104 # (fd, fd) -> error + + jr $ra + nop + +.section .text.PCread, "ax", @progbits +.global PCread +.type PCread, @function + +PCread: + move $a3, $a1 + move $a1, $a0 + break 0, 0x105 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lread_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lread_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PCwrite, "ax", @progbits +.global PCwrite +.type PCwrite, @function + +PCwrite: + move $a3, $a1 + move $a1, $a0 + break 0, 0x106 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lwrite_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lwrite_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PClseek, "ax", @progbits +.global PClseek +.type PClseek, @function + +PClseek: + move $a3, $a2 + move $a2, $a1 + move $a1, $a0 + break 0, 0x107 # (fd, fd, offset, mode) -> error, offset + + bgez $v0, .Lseek_ok # if (error < 0) offset = error + nop + move $v1, $v0 +.Lseek_ok: + jr $ra # return offset + move $v0, $v1 diff --git a/libpsn00b/psxapi/_syscalls.s b/libpsn00b/psxapi/_syscalls.s deleted file mode 100644 index cef349d..0000000 --- a/libpsn00b/psxapi/_syscalls.s +++ /dev/null @@ -1,161 +0,0 @@ -# PSn00bSDK syscall wrappers -# (C) 2022-2023 spicyjpeg - MPL licensed - -.set noreorder - -## Interrupt enable/disable - -.section .text.EnterCriticalSection, "ax", @progbits -.global EnterCriticalSection -.type EnterCriticalSection, @function - -EnterCriticalSection: - li $a0, 0x01 - syscall 0 - - jr $ra - nop - -.section .text.ExitCriticalSection, "ax", @progbits -.global ExitCriticalSection -.type ExitCriticalSection, @function - -ExitCriticalSection: - li $a0, 0x02 - syscall 0 - - jr $ra - nop - -.section .text.SwEnterCriticalSection, "ax", @progbits -.global SwEnterCriticalSection -.type SwEnterCriticalSection, @function - -SwEnterCriticalSection: - mfc0 $a0, $12 # cop0r12 &= ~0x401 - li $a1, -1026 - and $a1, $a0 - mtc0 $a1, $12 - andi $a0, 0x0401 # return !((cop0r12_prev & 0x401) < 0x401) - sltiu $v0, $a0, 0x0401 - - jr $ra - xori $v0, 1 - -.section .text.SwExitCriticalSection, "ax", @progbits -.global SwExitCriticalSection -.type SwExitCriticalSection, @function - -SwExitCriticalSection: - mfc0 $a0, $12 # cop0r12 |= 0x401 - nop - ori $a0, 0x0401 - mtc0 $a0, $12 - nop - - jr $ra - nop - -## PCDRV (host file access) API - -.section .text.PCinit, "ax", @progbits -.global PCinit -.type PCinit, @function - -PCinit: - break 0, 0x101 # () -> error - - jr $ra - nop - -.section .text.PCcreat, "ax", @progbits -.global PCcreat -.type PCcreat, @function - -PCcreat: - li $a2, 0 - move $a1, $a0 - break 0, 0x102 # (path, path, 0) -> error, fd - - bgez $v0, .Lcreate_ok # if (error < 0) fd = error - nop - move $v1, $v0 -.Lcreate_ok: - jr $ra # return fd - move $v0, $v1 - -.section .text.PCopen, "ax", @progbits -.global PCopen -.type PCopen, @function - -PCopen: - move $a2, $a1 - move $a1, $a0 - break 0, 0x103 # (path, path, mode) -> error, fd - - bgez $v0, .Lopen_ok # if (error < 0) fd = error - nop - move $v1, $v0 -.Lopen_ok: - jr $ra # return fd - move $v0, $v1 - -.section .text.PCclose, "ax", @progbits -.global PCclose -.type PCclose, @function - -PCclose: - move $a1, $a0 - break 0, 0x104 # (fd, fd) -> error - - jr $ra - nop - -.section .text.PCread, "ax", @progbits -.global PCread -.type PCread, @function - -PCread: - move $a3, $a1 - move $a1, $a0 - break 0, 0x105 # (fd, fd, length, data) -> error, length - - bgez $v0, .Lread_ok # if (error < 0) length = error - nop - move $v1, $v0 -.Lread_ok: - jr $ra # return length - move $v0, $v1 - -.section .text.PCwrite, "ax", @progbits -.global PCwrite -.type PCwrite, @function - -PCwrite: - move $a3, $a1 - move $a1, $a0 - break 0, 0x106 # (fd, fd, length, data) -> error, length - - bgez $v0, .Lwrite_ok # if (error < 0) length = error - nop - move $v1, $v0 -.Lwrite_ok: - jr $ra # return length - move $v0, $v1 - -.section .text.PClseek, "ax", @progbits -.global PClseek -.type PClseek, @function - -PClseek: - move $a3, $a2 - move $a2, $a1 - move $a1, $a0 - break 0, 0x107 # (fd, fd, offset, mode) -> error, offset - - bgez $v0, .Lseek_ok # if (error < 0) offset = error - nop - move $v1, $v0 -.Lseek_ok: - jr $ra # return offset - move $v0, $v1 diff --git a/libpsn00b/psxapi/drivers.S b/libpsn00b/psxapi/drivers.S new file mode 100644 index 0000000..eb6dcd5 --- /dev/null +++ b/libpsn00b/psxapi/drivers.S @@ -0,0 +1,164 @@ +# PSn00bSDK BIOS API stubs +# (C) 2022 spicyjpeg - MPL licensed + +# This file has been generated automatically. Each function is placed in its +# own section to allow the linker to strip unused functions. + +.set noreorder + +## A0 table functions (7) + +.section .text._bu_init +.global _bu_init +.type _bu_init, @function +_bu_init: + li $t2, 0xa0 + jr $t2 + li $t1, 0x70 + +.section .text._96_init +.global _96_init +.type _96_init, @function +_96_init: + li $t2, 0xa0 + jr $t2 + li $t1, 0x71 + +.section .text._96_remove +.global _96_remove +.type _96_remove, @function +_96_remove: + li $t2, 0xa0 + jr $t2 + li $t1, 0x72 + +.section .text.add_nullcon_driver +.global add_nullcon_driver +.type add_nullcon_driver, @function +add_nullcon_driver: + li $t2, 0xa0 + jr $t2 + li $t1, 0x99 + +.section .text._card_info +.global _card_info +.type _card_info, @function +_card_info: + li $t2, 0xa0 + jr $t2 + li $t1, 0xab + +.section .text._card_load +.global _card_load +.type _card_load, @function +_card_load: + li $t2, 0xa0 + jr $t2 + li $t1, 0xac + +.section .text._card_clear +.global _card_clear +.type _card_clear, @function +_card_clear: + li $t2, 0xa0 + jr $t2 + li $t1, 0xaf + +## B0 table functions (12) + +.section .text.AddDrv +.global AddDrv +.type AddDrv, @function +AddDrv: + li $t2, 0xb0 + jr $t2 + li $t1, 0x47 + +.section .text.DelDrv +.global DelDrv +.type DelDrv, @function +DelDrv: + li $t2, 0xb0 + jr $t2 + li $t1, 0x48 + +.section .text.ListDrv +.global ListDrv +.type ListDrv, @function +ListDrv: + li $t2, 0xb0 + jr $t2 + li $t1, 0x49 + +.section .text.InitCARD +.global InitCARD +.type InitCARD, @function +InitCARD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x4a + +.section .text.StartCARD +.global StartCARD +.type StartCARD, @function +StartCARD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x4b + +.section .text.StopCARD +.global StopCARD +.type StopCARD, @function +StopCARD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x4c + +.section .text._card_write +.global _card_write +.type _card_write, @function +_card_write: + li $t2, 0xb0 + jr $t2 + li $t1, 0x4e + +.section .text._card_read +.global _card_read +.type _card_read, @function +_card_read: + li $t2, 0xb0 + jr $t2 + li $t1, 0x4f + +.section .text._new_card +.global _new_card +.type _new_card, @function +_new_card: + li $t2, 0xb0 + jr $t2 + li $t1, 0x50 + +.section .text._card_chan +.global _card_chan +.type _card_chan, @function +_card_chan: + li $t2, 0xb0 + jr $t2 + li $t1, 0x58 + +.section .text._card_status +.global _card_status +.type _card_status, @function +_card_status: + li $t2, 0xb0 + jr $t2 + li $t1, 0x5c + +.section .text._card_wait +.global _card_wait +.type _card_wait, @function +_card_wait: + li $t2, 0xb0 + jr $t2 + li $t1, 0x5d + diff --git a/libpsn00b/psxapi/drivers.s b/libpsn00b/psxapi/drivers.s deleted file mode 100644 index eb6dcd5..0000000 --- a/libpsn00b/psxapi/drivers.s +++ /dev/null @@ -1,164 +0,0 @@ -# PSn00bSDK BIOS API stubs -# (C) 2022 spicyjpeg - MPL licensed - -# This file has been generated automatically. Each function is placed in its -# own section to allow the linker to strip unused functions. - -.set noreorder - -## A0 table functions (7) - -.section .text._bu_init -.global _bu_init -.type _bu_init, @function -_bu_init: - li $t2, 0xa0 - jr $t2 - li $t1, 0x70 - -.section .text._96_init -.global _96_init -.type _96_init, @function -_96_init: - li $t2, 0xa0 - jr $t2 - li $t1, 0x71 - -.section .text._96_remove -.global _96_remove -.type _96_remove, @function -_96_remove: - li $t2, 0xa0 - jr $t2 - li $t1, 0x72 - -.section .text.add_nullcon_driver -.global add_nullcon_driver -.type add_nullcon_driver, @function -add_nullcon_driver: - li $t2, 0xa0 - jr $t2 - li $t1, 0x99 - -.section .text._card_info -.global _card_info -.type _card_info, @function -_card_info: - li $t2, 0xa0 - jr $t2 - li $t1, 0xab - -.section .text._card_load -.global _card_load -.type _card_load, @function -_card_load: - li $t2, 0xa0 - jr $t2 - li $t1, 0xac - -.section .text._card_clear -.global _card_clear -.type _card_clear, @function -_card_clear: - li $t2, 0xa0 - jr $t2 - li $t1, 0xaf - -## B0 table functions (12) - -.section .text.AddDrv -.global AddDrv -.type AddDrv, @function -AddDrv: - li $t2, 0xb0 - jr $t2 - li $t1, 0x47 - -.section .text.DelDrv -.global DelDrv -.type DelDrv, @function -DelDrv: - li $t2, 0xb0 - jr $t2 - li $t1, 0x48 - -.section .text.ListDrv -.global ListDrv -.type ListDrv, @function -ListDrv: - li $t2, 0xb0 - jr $t2 - li $t1, 0x49 - -.section .text.InitCARD -.global InitCARD -.type InitCARD, @function -InitCARD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x4a - -.section .text.StartCARD -.global StartCARD -.type StartCARD, @function -StartCARD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x4b - -.section .text.StopCARD -.global StopCARD -.type StopCARD, @function -StopCARD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x4c - -.section .text._card_write -.global _card_write -.type _card_write, @function -_card_write: - li $t2, 0xb0 - jr $t2 - li $t1, 0x4e - -.section .text._card_read -.global _card_read -.type _card_read, @function -_card_read: - li $t2, 0xb0 - jr $t2 - li $t1, 0x4f - -.section .text._new_card -.global _new_card -.type _new_card, @function -_new_card: - li $t2, 0xb0 - jr $t2 - li $t1, 0x50 - -.section .text._card_chan -.global _card_chan -.type _card_chan, @function -_card_chan: - li $t2, 0xb0 - jr $t2 - li $t1, 0x58 - -.section .text._card_status -.global _card_status -.type _card_status, @function -_card_status: - li $t2, 0xb0 - jr $t2 - li $t1, 0x5c - -.section .text._card_wait -.global _card_wait -.type _card_wait, @function -_card_wait: - li $t2, 0xb0 - jr $t2 - li $t1, 0x5d - diff --git a/libpsn00b/psxapi/fs.S b/libpsn00b/psxapi/fs.S new file mode 100644 index 0000000..8b6d57a --- /dev/null +++ b/libpsn00b/psxapi/fs.S @@ -0,0 +1,58 @@ +# PSn00bSDK BIOS API stubs +# (C) 2022 spicyjpeg - MPL licensed + +# This file has been generated automatically. Each function is placed in its +# own section to allow the linker to strip unused functions. + +.set noreorder + +## B0 table functions (6) + +.section .text.cd +.global cd +.type cd, @function +cd: + li $t2, 0xb0 + jr $t2 + li $t1, 0x40 + +.section .text.firstfile +.global firstfile +.type firstfile, @function +firstfile: + li $t2, 0xb0 + jr $t2 + li $t1, 0x42 + +.section .text.nextfile +.global nextfile +.type nextfile, @function +nextfile: + li $t2, 0xb0 + jr $t2 + li $t1, 0x43 + +.section .text.rename +.global rename +.type rename, @function +rename: + li $t2, 0xb0 + jr $t2 + li $t1, 0x44 + +.section .text.erase +.global erase +.type erase, @function +erase: + li $t2, 0xb0 + jr $t2 + li $t1, 0x45 + +.section .text.undelete +.global undelete +.type undelete, @function +undelete: + li $t2, 0xb0 + jr $t2 + li $t1, 0x46 + diff --git a/libpsn00b/psxapi/fs.s b/libpsn00b/psxapi/fs.s deleted file mode 100644 index 8b6d57a..0000000 --- a/libpsn00b/psxapi/fs.s +++ /dev/null @@ -1,58 +0,0 @@ -# PSn00bSDK BIOS API stubs -# (C) 2022 spicyjpeg - MPL licensed - -# This file has been generated automatically. Each function is placed in its -# own section to allow the linker to strip unused functions. - -.set noreorder - -## B0 table functions (6) - -.section .text.cd -.global cd -.type cd, @function -cd: - li $t2, 0xb0 - jr $t2 - li $t1, 0x40 - -.section .text.firstfile -.global firstfile -.type firstfile, @function -firstfile: - li $t2, 0xb0 - jr $t2 - li $t1, 0x42 - -.section .text.nextfile -.global nextfile -.type nextfile, @function -nextfile: - li $t2, 0xb0 - jr $t2 - li $t1, 0x43 - -.section .text.rename -.global rename -.type rename, @function -rename: - li $t2, 0xb0 - jr $t2 - li $t1, 0x44 - -.section .text.erase -.global erase -.type erase, @function -erase: - li $t2, 0xb0 - jr $t2 - li $t1, 0x45 - -.section .text.undelete -.global undelete -.type undelete, @function -undelete: - li $t2, 0xb0 - jr $t2 - li $t1, 0x46 - diff --git a/libpsn00b/psxapi/stdio.S b/libpsn00b/psxapi/stdio.S new file mode 100644 index 0000000..14c6d03 --- /dev/null +++ b/libpsn00b/psxapi/stdio.S @@ -0,0 +1,140 @@ +# PSn00bSDK BIOS API stubs +# (C) 2022 spicyjpeg - MPL licensed + +# This file has been generated automatically. Each function is placed in its +# own section to allow the linker to strip unused functions. + +.set noreorder + +## A0 table functions (14) + +.section .text.open +.global open +.type open, @function +open: + li $t2, 0xa0 + jr $t2 + li $t1, 0x00 + +.section .text.lseek +.global lseek +.type lseek, @function +lseek: + li $t2, 0xa0 + jr $t2 + li $t1, 0x01 + +.section .text.read +.global read +.type read, @function +read: + li $t2, 0xa0 + jr $t2 + li $t1, 0x02 + +.section .text.write +.global write +.type write, @function +write: + li $t2, 0xa0 + jr $t2 + li $t1, 0x03 + +.section .text.close +.global close +.type close, @function +close: + li $t2, 0xa0 + jr $t2 + li $t1, 0x04 + +.section .text.ioctl +.global ioctl +.type ioctl, @function +ioctl: + li $t2, 0xa0 + jr $t2 + li $t1, 0x05 + +.section .text.isatty +.global isatty +.type isatty, @function +isatty: + li $t2, 0xa0 + jr $t2 + li $t1, 0x07 + +.section .text.getc +.global getc +.type getc, @function +getc: + li $t2, 0xa0 + jr $t2 + li $t1, 0x08 + +.section .text.putc +.global putc +.type putc, @function +putc: + li $t2, 0xa0 + jr $t2 + li $t1, 0x09 + +.section .text.getchar +.global getchar +.type getchar, @function +getchar: + li $t2, 0xa0 + jr $t2 + li $t1, 0x3b + +.section .text.putchar +.global putchar +.type putchar, @function +putchar: + li $t2, 0xa0 + jr $t2 + li $t1, 0x3c + +.section .text.gets +.global gets +.type gets, @function +gets: + li $t2, 0xa0 + jr $t2 + li $t1, 0x3d + +.section .text.puts +.global puts +.type puts, @function +puts: + li $t2, 0xa0 + jr $t2 + li $t1, 0x3e + +.section .text.printf +.global printf +.type printf, @function +printf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x3f + +## B0 table functions (2) + +.section .text._get_errno +.global _get_errno +.type _get_errno, @function +_get_errno: + li $t2, 0xb0 + jr $t2 + li $t1, 0x54 + +.section .text._get_error +.global _get_error +.type _get_error, @function +_get_error: + li $t2, 0xb0 + jr $t2 + li $t1, 0x55 + diff --git a/libpsn00b/psxapi/stdio.s b/libpsn00b/psxapi/stdio.s deleted file mode 100644 index 14c6d03..0000000 --- a/libpsn00b/psxapi/stdio.s +++ /dev/null @@ -1,140 +0,0 @@ -# PSn00bSDK BIOS API stubs -# (C) 2022 spicyjpeg - MPL licensed - -# This file has been generated automatically. Each function is placed in its -# own section to allow the linker to strip unused functions. - -.set noreorder - -## A0 table functions (14) - -.section .text.open -.global open -.type open, @function -open: - li $t2, 0xa0 - jr $t2 - li $t1, 0x00 - -.section .text.lseek -.global lseek -.type lseek, @function -lseek: - li $t2, 0xa0 - jr $t2 - li $t1, 0x01 - -.section .text.read -.global read -.type read, @function -read: - li $t2, 0xa0 - jr $t2 - li $t1, 0x02 - -.section .text.write -.global write -.type write, @function -write: - li $t2, 0xa0 - jr $t2 - li $t1, 0x03 - -.section .text.close -.global close -.type close, @function -close: - li $t2, 0xa0 - jr $t2 - li $t1, 0x04 - -.section .text.ioctl -.global ioctl -.type ioctl, @function -ioctl: - li $t2, 0xa0 - jr $t2 - li $t1, 0x05 - -.section .text.isatty -.global isatty -.type isatty, @function -isatty: - li $t2, 0xa0 - jr $t2 - li $t1, 0x07 - -.section .text.getc -.global getc -.type getc, @function -getc: - li $t2, 0xa0 - jr $t2 - li $t1, 0x08 - -.section .text.putc -.global putc -.type putc, @function -putc: - li $t2, 0xa0 - jr $t2 - li $t1, 0x09 - -.section .text.getchar -.global getchar -.type getchar, @function -getchar: - li $t2, 0xa0 - jr $t2 - li $t1, 0x3b - -.section .text.putchar -.global putchar -.type putchar, @function -putchar: - li $t2, 0xa0 - jr $t2 - li $t1, 0x3c - -.section .text.gets -.global gets -.type gets, @function -gets: - li $t2, 0xa0 - jr $t2 - li $t1, 0x3d - -.section .text.puts -.global puts -.type puts, @function -puts: - li $t2, 0xa0 - jr $t2 - li $t1, 0x3e - -.section .text.printf -.global printf -.type printf, @function -printf: - li $t2, 0xa0 - jr $t2 - li $t1, 0x3f - -## B0 table functions (2) - -.section .text._get_errno -.global _get_errno -.type _get_errno, @function -_get_errno: - li $t2, 0xb0 - jr $t2 - li $t1, 0x54 - -.section .text._get_error -.global _get_error -.type _get_error, @function -_get_error: - li $t2, 0xb0 - jr $t2 - li $t1, 0x55 - diff --git a/libpsn00b/psxapi/sys.S b/libpsn00b/psxapi/sys.S new file mode 100644 index 0000000..40dcdff --- /dev/null +++ b/libpsn00b/psxapi/sys.S @@ -0,0 +1,358 @@ +# PSn00bSDK BIOS API stubs +# (C) 2022 spicyjpeg - MPL licensed + +# This file has been generated automatically. Each function is placed in its +# own section to allow the linker to strip unused functions. + +.set noreorder + +## A0 table functions (11) + +.section .text.b_setjmp +.global b_setjmp +.type b_setjmp, @function +b_setjmp: + li $t2, 0xa0 + jr $t2 + li $t1, 0x13 + +.section .text.b_longjmp +.global b_longjmp +.type b_longjmp, @function +b_longjmp: + li $t2, 0xa0 + jr $t2 + li $t1, 0x14 + +.section .text.b_InitHeap +.global b_InitHeap +.type b_InitHeap, @function +b_InitHeap: + li $t2, 0xa0 + jr $t2 + li $t1, 0x39 + +.section .text.Exec +.global Exec +.type Exec, @function +Exec: + li $t2, 0xa0 + jr $t2 + li $t1, 0x43 + +.section .text.FlushCache +.global FlushCache +.type FlushCache, @function +FlushCache: + li $t2, 0xa0 + jr $t2 + li $t1, 0x44 + +.section .text.LoadExec +.global LoadExec +.type LoadExec, @function +LoadExec: + li $t2, 0xa0 + jr $t2 + li $t1, 0x51 + +.section .text.SetConf +.global SetConf +.type SetConf, @function +SetConf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9c + +.section .text.GetConf +.global GetConf +.type GetConf, @function +GetConf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9d + +.section .text.SetMem +.global SetMem +.type SetMem, @function +SetMem: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9f + +.section .text._boot +.global _boot +.type _boot, @function +_boot: + li $t2, 0xa0 + jr $t2 + li $t1, 0xa0 + +.section .text.GetSystemInfo +.global GetSystemInfo +.type GetSystemInfo, @function +GetSystemInfo: + li $t2, 0xa0 + jr $t2 + li $t1, 0xb4 + +## B0 table functions (27) + +.section .text.alloc_kernel_memory +.global alloc_kernel_memory +.type alloc_kernel_memory, @function +alloc_kernel_memory: + li $t2, 0xb0 + jr $t2 + li $t1, 0x00 + +.section .text.free_kernel_memory +.global free_kernel_memory +.type free_kernel_memory, @function +free_kernel_memory: + li $t2, 0xb0 + jr $t2 + li $t1, 0x01 + +.section .text.SetRCnt +.global SetRCnt +.type SetRCnt, @function +SetRCnt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x02 + +.section .text.GetRCnt +.global GetRCnt +.type GetRCnt, @function +GetRCnt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x03 + +.section .text.StartRCnt +.global StartRCnt +.type StartRCnt, @function +StartRCnt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x04 + +.section .text.StopRCnt +.global StopRCnt +.type StopRCnt, @function +StopRCnt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x05 + +.section .text.ResetRCnt +.global ResetRCnt +.type ResetRCnt, @function +ResetRCnt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x06 + +.section .text.DeliverEvent +.global DeliverEvent +.type DeliverEvent, @function +DeliverEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x07 + +.section .text.OpenEvent +.global OpenEvent +.type OpenEvent, @function +OpenEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x08 + +.section .text.CloseEvent +.global CloseEvent +.type CloseEvent, @function +CloseEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x09 + +.section .text.WaitEvent +.global WaitEvent +.type WaitEvent, @function +WaitEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0a + +.section .text.TestEvent +.global TestEvent +.type TestEvent, @function +TestEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0b + +.section .text.EnableEvent +.global EnableEvent +.type EnableEvent, @function +EnableEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0c + +.section .text.DisableEvent +.global DisableEvent +.type DisableEvent, @function +DisableEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0d + +.section .text.OpenTh +.global OpenTh +.type OpenTh, @function +OpenTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0e + +.section .text.CloseTh +.global CloseTh +.type CloseTh, @function +CloseTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0f + +.section .text.ChangeTh +.global ChangeTh +.type ChangeTh, @function +ChangeTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x10 + +.section .text.InitPAD +.global InitPAD +.type InitPAD, @function +InitPAD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x12 + +.section .text.StartPAD +.global StartPAD +.type StartPAD, @function +StartPAD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x13 + +.section .text.StopPAD +.global StopPAD +.type StopPAD, @function +StopPAD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x14 + +.section .text.ReturnFromException +.global ReturnFromException +.type ReturnFromException, @function +ReturnFromException: + li $t2, 0xb0 + jr $t2 + li $t1, 0x17 + +.section .text.ResetEntryInt +.global ResetEntryInt +.type ResetEntryInt, @function +ResetEntryInt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x18 + +.section .text.HookEntryInt +.global HookEntryInt +.type HookEntryInt, @function +HookEntryInt: + li $t2, 0xb0 + jr $t2 + li $t1, 0x19 + +.section .text.UnDeliverEvent +.global UnDeliverEvent +.type UnDeliverEvent, @function +UnDeliverEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x20 + +.section .text.GetC0Table +.global GetC0Table +.type GetC0Table, @function +GetC0Table: + li $t2, 0xb0 + jr $t2 + li $t1, 0x56 + +.section .text.GetB0Table +.global GetB0Table +.type GetB0Table, @function +GetB0Table: + li $t2, 0xb0 + jr $t2 + li $t1, 0x57 + +.section .text.ChangeClearPAD +.global ChangeClearPAD +.type ChangeClearPAD, @function +ChangeClearPAD: + li $t2, 0xb0 + jr $t2 + li $t1, 0x5b + +## C0 table functions (5) + +.section .text.SysEnqIntRP +.global SysEnqIntRP +.type SysEnqIntRP, @function +SysEnqIntRP: + li $t2, 0xc0 + jr $t2 + li $t1, 0x02 + +.section .text.SysDeqIntRP +.global SysDeqIntRP +.type SysDeqIntRP, @function +SysDeqIntRP: + li $t2, 0xc0 + jr $t2 + li $t1, 0x03 + +.section .text.InstallExceptionHandlers +.global InstallExceptionHandlers +.type InstallExceptionHandlers, @function +InstallExceptionHandlers: + li $t2, 0xc0 + jr $t2 + li $t1, 0x07 + +.section .text.SysInitMemory +.global SysInitMemory +.type SysInitMemory, @function +SysInitMemory: + li $t2, 0xc0 + jr $t2 + li $t1, 0x08 + +.section .text.ChangeClearRCnt +.global ChangeClearRCnt +.type ChangeClearRCnt, @function +ChangeClearRCnt: + li $t2, 0xc0 + jr $t2 + li $t1, 0x0a + diff --git a/libpsn00b/psxapi/sys.s b/libpsn00b/psxapi/sys.s deleted file mode 100644 index 40dcdff..0000000 --- a/libpsn00b/psxapi/sys.s +++ /dev/null @@ -1,358 +0,0 @@ -# PSn00bSDK BIOS API stubs -# (C) 2022 spicyjpeg - MPL licensed - -# This file has been generated automatically. Each function is placed in its -# own section to allow the linker to strip unused functions. - -.set noreorder - -## A0 table functions (11) - -.section .text.b_setjmp -.global b_setjmp -.type b_setjmp, @function -b_setjmp: - li $t2, 0xa0 - jr $t2 - li $t1, 0x13 - -.section .text.b_longjmp -.global b_longjmp -.type b_longjmp, @function -b_longjmp: - li $t2, 0xa0 - jr $t2 - li $t1, 0x14 - -.section .text.b_InitHeap -.global b_InitHeap -.type b_InitHeap, @function -b_InitHeap: - li $t2, 0xa0 - jr $t2 - li $t1, 0x39 - -.section .text.Exec -.global Exec -.type Exec, @function -Exec: - li $t2, 0xa0 - jr $t2 - li $t1, 0x43 - -.section .text.FlushCache -.global FlushCache -.type FlushCache, @function -FlushCache: - li $t2, 0xa0 - jr $t2 - li $t1, 0x44 - -.section .text.LoadExec -.global LoadExec -.type LoadExec, @function -LoadExec: - li $t2, 0xa0 - jr $t2 - li $t1, 0x51 - -.section .text.SetConf -.global SetConf -.type SetConf, @function -SetConf: - li $t2, 0xa0 - jr $t2 - li $t1, 0x9c - -.section .text.GetConf -.global GetConf -.type GetConf, @function -GetConf: - li $t2, 0xa0 - jr $t2 - li $t1, 0x9d - -.section .text.SetMem -.global SetMem -.type SetMem, @function -SetMem: - li $t2, 0xa0 - jr $t2 - li $t1, 0x9f - -.section .text._boot -.global _boot -.type _boot, @function -_boot: - li $t2, 0xa0 - jr $t2 - li $t1, 0xa0 - -.section .text.GetSystemInfo -.global GetSystemInfo -.type GetSystemInfo, @function -GetSystemInfo: - li $t2, 0xa0 - jr $t2 - li $t1, 0xb4 - -## B0 table functions (27) - -.section .text.alloc_kernel_memory -.global alloc_kernel_memory -.type alloc_kernel_memory, @function -alloc_kernel_memory: - li $t2, 0xb0 - jr $t2 - li $t1, 0x00 - -.section .text.free_kernel_memory -.global free_kernel_memory -.type free_kernel_memory, @function -free_kernel_memory: - li $t2, 0xb0 - jr $t2 - li $t1, 0x01 - -.section .text.SetRCnt -.global SetRCnt -.type SetRCnt, @function -SetRCnt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x02 - -.section .text.GetRCnt -.global GetRCnt -.type GetRCnt, @function -GetRCnt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x03 - -.section .text.StartRCnt -.global StartRCnt -.type StartRCnt, @function -StartRCnt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x04 - -.section .text.StopRCnt -.global StopRCnt -.type StopRCnt, @function -StopRCnt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x05 - -.section .text.ResetRCnt -.global ResetRCnt -.type ResetRCnt, @function -ResetRCnt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x06 - -.section .text.DeliverEvent -.global DeliverEvent -.type DeliverEvent, @function -DeliverEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x07 - -.section .text.OpenEvent -.global OpenEvent -.type OpenEvent, @function -OpenEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x08 - -.section .text.CloseEvent -.global CloseEvent -.type CloseEvent, @function -CloseEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x09 - -.section .text.WaitEvent -.global WaitEvent -.type WaitEvent, @function -WaitEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0a - -.section .text.TestEvent -.global TestEvent -.type TestEvent, @function -TestEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0b - -.section .text.EnableEvent -.global EnableEvent -.type EnableEvent, @function -EnableEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0c - -.section .text.DisableEvent -.global DisableEvent -.type DisableEvent, @function -DisableEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0d - -.section .text.OpenTh -.global OpenTh -.type OpenTh, @function -OpenTh: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0e - -.section .text.CloseTh -.global CloseTh -.type CloseTh, @function -CloseTh: - li $t2, 0xb0 - jr $t2 - li $t1, 0x0f - -.section .text.ChangeTh -.global ChangeTh -.type ChangeTh, @function -ChangeTh: - li $t2, 0xb0 - jr $t2 - li $t1, 0x10 - -.section .text.InitPAD -.global InitPAD -.type InitPAD, @function -InitPAD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x12 - -.section .text.StartPAD -.global StartPAD -.type StartPAD, @function -StartPAD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x13 - -.section .text.StopPAD -.global StopPAD -.type StopPAD, @function -StopPAD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x14 - -.section .text.ReturnFromException -.global ReturnFromException -.type ReturnFromException, @function -ReturnFromException: - li $t2, 0xb0 - jr $t2 - li $t1, 0x17 - -.section .text.ResetEntryInt -.global ResetEntryInt -.type ResetEntryInt, @function -ResetEntryInt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x18 - -.section .text.HookEntryInt -.global HookEntryInt -.type HookEntryInt, @function -HookEntryInt: - li $t2, 0xb0 - jr $t2 - li $t1, 0x19 - -.section .text.UnDeliverEvent -.global UnDeliverEvent -.type UnDeliverEvent, @function -UnDeliverEvent: - li $t2, 0xb0 - jr $t2 - li $t1, 0x20 - -.section .text.GetC0Table -.global GetC0Table -.type GetC0Table, @function -GetC0Table: - li $t2, 0xb0 - jr $t2 - li $t1, 0x56 - -.section .text.GetB0Table -.global GetB0Table -.type GetB0Table, @function -GetB0Table: - li $t2, 0xb0 - jr $t2 - li $t1, 0x57 - -.section .text.ChangeClearPAD -.global ChangeClearPAD -.type ChangeClearPAD, @function -ChangeClearPAD: - li $t2, 0xb0 - jr $t2 - li $t1, 0x5b - -## C0 table functions (5) - -.section .text.SysEnqIntRP -.global SysEnqIntRP -.type SysEnqIntRP, @function -SysEnqIntRP: - li $t2, 0xc0 - jr $t2 - li $t1, 0x02 - -.section .text.SysDeqIntRP -.global SysDeqIntRP -.type SysDeqIntRP, @function -SysDeqIntRP: - li $t2, 0xc0 - jr $t2 - li $t1, 0x03 - -.section .text.InstallExceptionHandlers -.global InstallExceptionHandlers -.type InstallExceptionHandlers, @function -InstallExceptionHandlers: - li $t2, 0xc0 - jr $t2 - li $t1, 0x07 - -.section .text.SysInitMemory -.global SysInitMemory -.type SysInitMemory, @function -SysInitMemory: - li $t2, 0xc0 - jr $t2 - li $t1, 0x08 - -.section .text.ChangeClearRCnt -.global ChangeClearRCnt -.type ChangeClearRCnt, @function -ChangeClearRCnt: - li $t2, 0xc0 - jr $t2 - li $t1, 0x0a - diff --git a/libpsn00b/psxetc/_dl_resolve_wrapper.S b/libpsn00b/psxetc/_dl_resolve_wrapper.S new file mode 100644 index 0000000..b715720 --- /dev/null +++ b/libpsn00b/psxetc/_dl_resolve_wrapper.S @@ -0,0 +1,48 @@ +# PSn00bSDK dynamic linker +# (C) 2021-2022 spicyjpeg - MPL licensed +# +# This function is called by the lazy loader stubs generated by GCC in the +# .plt/.MIPS.stubs section when attempting to call a GOT entry whose address +# hasn't yet been resolved. The generated stubs conform to the MIPS ABI and +# uses the following registers: +# - $t7 = address the resolved function should return to (i.e. $ra of the +# caller that triggered the stub) +# - $t8 = index of the function in the .dynsym symbol table +# - $t9 = _dl_resolve_wrapper itself's address + +.set noreorder + +.section .text._dl_resolve_wrapper, "ax", @progbits +.global _dl_resolve_wrapper +.type _dl_resolve_wrapper, @function + +_dl_resolve_wrapper: + # Save the arguments being passed to the function to be resolved. + addiu $sp, -20 + sw $a0, 0($sp) + sw $a1, 4($sp) + sw $a2, 8($sp) + sw $a3, 12($sp) + sw $t7, 16($sp) # (will be restored directly to $ra) + + # Figure out where the DLL's struct is. dlinit() places a pointer to the + # struct in the second GOT entry, so it's just a matter of indexing the GOT + # using $gp. Then call _dl_resolve_helper with the struct and $t8 as + # arguments, and store the return value into $t0. + lw $a0, -0x7fec($gp) # dll = &((uint32_t *) (gp - 0x7ff0))[1] + move $a1, $t8 + + jal _dl_resolve_helper + addiu $sp, -8 + addiu $sp, 8 + + # Restore the arguments from the stack and tail-call the function at the + # address returned by the resolver. + lw $a0, 0($sp) + lw $a1, 4($sp) + lw $a2, 8($sp) + lw $a3, 12($sp) + lw $ra, 16($sp) + + jr $v0 + addiu $sp, 20 diff --git a/libpsn00b/psxetc/_dl_resolve_wrapper.s b/libpsn00b/psxetc/_dl_resolve_wrapper.s deleted file mode 100644 index b715720..0000000 --- a/libpsn00b/psxetc/_dl_resolve_wrapper.s +++ /dev/null @@ -1,48 +0,0 @@ -# PSn00bSDK dynamic linker -# (C) 2021-2022 spicyjpeg - MPL licensed -# -# This function is called by the lazy loader stubs generated by GCC in the -# .plt/.MIPS.stubs section when attempting to call a GOT entry whose address -# hasn't yet been resolved. The generated stubs conform to the MIPS ABI and -# uses the following registers: -# - $t7 = address the resolved function should return to (i.e. $ra of the -# caller that triggered the stub) -# - $t8 = index of the function in the .dynsym symbol table -# - $t9 = _dl_resolve_wrapper itself's address - -.set noreorder - -.section .text._dl_resolve_wrapper, "ax", @progbits -.global _dl_resolve_wrapper -.type _dl_resolve_wrapper, @function - -_dl_resolve_wrapper: - # Save the arguments being passed to the function to be resolved. - addiu $sp, -20 - sw $a0, 0($sp) - sw $a1, 4($sp) - sw $a2, 8($sp) - sw $a3, 12($sp) - sw $t7, 16($sp) # (will be restored directly to $ra) - - # Figure out where the DLL's struct is. dlinit() places a pointer to the - # struct in the second GOT entry, so it's just a matter of indexing the GOT - # using $gp. Then call _dl_resolve_helper with the struct and $t8 as - # arguments, and store the return value into $t0. - lw $a0, -0x7fec($gp) # dll = &((uint32_t *) (gp - 0x7ff0))[1] - move $a1, $t8 - - jal _dl_resolve_helper - addiu $sp, -8 - addiu $sp, 8 - - # Restore the arguments from the stack and tail-call the function at the - # address returned by the resolver. - lw $a0, 0($sp) - lw $a1, 4($sp) - lw $a2, 8($sp) - lw $a3, 12($sp) - lw $ra, 16($sp) - - jr $v0 - addiu $sp, 20 diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index 5678f35..6a47f1d 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -196,7 +196,7 @@ void SetDrawOpType(GPU_DrawOpType type) { _drawop_type = type; } -int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3) { +int EnqueueDrawOp(void (*func)(uint32_t, uint32_t, uint32_t), uint32_t arg1, uint32_t arg2, uint32_t arg3) { _sdk_validate_args(func, -1); // If GPU DMA is currently busy, append the command to the queue instead of diff --git a/libpsn00b/psxgte/initgeom.S b/libpsn00b/psxgte/initgeom.S new file mode 100644 index 0000000..d2f7ef4 --- /dev/null +++ b/libpsn00b/psxgte/initgeom.S @@ -0,0 +1,44 @@ +.set noreorder + +#include "hwregs_a.inc" +#include "gtereg.inc" + +.section .text.InitGeom +.global InitGeom +.type InitGeom, @function +InitGeom: + # Disable interrupts and make sure the GTE is enabled in COP0. + lui $v0, IOBASE + lhu $v1, IRQ_MASK($v0) + nop + sh $0, IRQ_MASK($v0) + + mfc0 $a0, $12 + lui $a1, 0x4000 + or $a1, $a0 + mtc0 $a1, $12 + nop + #nop + + # Re-enable interrupts, then load default values into some GTE registers. + sh $v1, IRQ_MASK($v0) + + ctc2 $0, C2_OFX + nop + ctc2 $0, C2_OFY + + li $a0, 320 + ctc2 $a0, C2_H + + li $a0, 0x155 + ctc2 $a0, C2_ZSF3 + li $a0, 0x100 + ctc2 $a0, C2_ZSF4 + + li $a0, 0xef9e + ctc2 $a0, C2_DQA + lui $a0, 0x0140 + ctc2 $a0, C2_DQB + + jr $ra + nop diff --git a/libpsn00b/psxgte/initgeom.s b/libpsn00b/psxgte/initgeom.s deleted file mode 100644 index ccda7f1..0000000 --- a/libpsn00b/psxgte/initgeom.s +++ /dev/null @@ -1,44 +0,0 @@ -.set noreorder - -.include "hwregs_a.inc" -.include "gtereg.inc" - -.section .text.InitGeom -.global InitGeom -.type InitGeom, @function -InitGeom: - # Disable interrupts and make sure the GTE is enabled in COP0. - lui $v0, IOBASE - lhu $v1, IRQ_MASK($v0) - nop - sh $0, IRQ_MASK($v0) - - mfc0 $a0, $12 - lui $a1, 0x4000 - or $a1, $a0 - mtc0 $a1, $12 - nop - #nop - - # Re-enable interrupts, then load default values into some GTE registers. - sh $v1, IRQ_MASK($v0) - - ctc2 $0, C2_OFX - nop - ctc2 $0, C2_OFY - - li $a0, 320 - ctc2 $a0, C2_H - - li $a0, 0x155 - ctc2 $a0, C2_ZSF3 - li $a0, 0x100 - ctc2 $a0, C2_ZSF4 - - li $a0, 0xef9e - ctc2 $a0, C2_DQA - lui $a0, 0x0140 - ctc2 $a0, C2_DQB - - jr $ra - nop diff --git a/libpsn00b/psxgte/matrix.S b/libpsn00b/psxgte/matrix.S new file mode 100644 index 0000000..57790bc --- /dev/null +++ b/libpsn00b/psxgte/matrix.S @@ -0,0 +1,439 @@ +.set noreorder + +#include "gtereg.inc" +#include "inline_s.inc" + +.set MATRIX_r11r12, 0 +.set MATRIX_r13r21, 4 +.set MATRIX_r22r23, 8 +.set MATRIX_r31r32, 12 +.set MATRIX_r33, 16 +.set MATRIX_trx, 20 +.set MATRIX_try, 24 +.set MATRIX_trz, 28 + +.section .text.ApplyMatrixLV +.global ApplyMatrixLV +.type ApplyMatrixLV, @function +ApplyMatrixLV: + # Load matrix to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lw $t0, 0($a1) + lw $t1, 4($a1) + mtc2 $t0, C2_IR1 + lw $t0, 8($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 3, 0) + + swc2 C2_IR1, 0($a2) + swc2 C2_IR2, 4($a2) + swc2 C2_IR3, 8($a2) + + jr $ra + move $v0, $a2 + +.section .text.CompMatrixLV +.global CompMatrixLV +.type CompMatrixLV, @function +CompMatrixLV: + # Load matrix v0 to GTE + lw $t0, MATRIX_r11r12($a0) + lw $t1, MATRIX_r13r21($a0) + ctc2 $t0, C2_R11R12 + ctc2 $t1, C2_R13R21 + lw $t0, MATRIX_r22r23($a0) + lw $t1, MATRIX_r31r32($a0) + lhu $t2, MATRIX_r33($a0) + ctc2 $t0, C2_R22R23 + lw $t0, MATRIX_trx($a0) + ctc2 $t1, C2_R31R32 + lw $t1, MATRIX_try($a0) + ctc2 $t2, C2_R33 + lw $t2, MATRIX_trz($a0) + ctc2 $t0, C2_TRX + ctc2 $t1, C2_TRY + ctc2 $t2, C2_TRZ + + lw $t0, MATRIX_trx($a1) + lw $t1, MATRIX_try($a1) + mtc2 $t0, C2_IR1 + lw $t0, MATRIX_trz($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 0, 0) + + swc2 C2_IR1, MATRIX_trx($a2) + swc2 C2_IR2, MATRIX_try($a2) + swc2 C2_IR3, MATRIX_trz($a2) + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 + +.section .text.MulMatrix +.global MulMatrix +.type MulMatrix, @function +MulMatrix: + # Load m1 to GTE + lw $t0, 0($a1) + lw $t1, 4($a1) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a1) + lw $t1, 12($a1) + lhu $t2, 16($a1) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a0) # Load values for first + lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a0) # Load values for second + lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a0) + sh $t0, 2*(0+(3*2))($a0) + + lhu $t1, 2*(2+(3*1))($a0) # Load values for third + lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a0) + sh $t0, 2*(1+(3*2))($a0) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a0) + sh $t0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.MulMatrix0 +.global MulMatrix0 +.type MulMatrix0, @function +MulMatrix0: + # Load m1 to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 + +.section .text.ScaleMatrix +.global ScaleMatrix +.type ScaleMatrix, @function +ScaleMatrix: + lwc2 C2_IR0, 0($a1) # X + + lh $v0, 2*(0+(3*0))($a0) + lh $v1, 2*(0+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(0+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(0+(3*1))($a0) + sh $v0, 2*(0+(3*2))($a0) + + lwc2 C2_IR0, 4($a1) # Y + + lh $v0, 2*(1+(3*0))($a0) + lh $v1, 2*(1+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(1+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(1+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*1))($a0) + sh $v0, 2*(1+(3*2))($a0) + + lwc2 C2_IR0, 8($a1) # Z + + lh $v0, 2*(2+(3*0))($a0) + lh $v1, 2*(2+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(2+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(2+(3*1))($a0) + sh $v0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.ScaleMatrixL +.global ScaleMatrixL +.type ScaleMatrixL, @function +ScaleMatrixL: + lwc2 C2_IR0, 0($a1) # X + + lh $v0, 2*(0+(3*0))($a0) + lh $v1, 2*(1+(3*0))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*0))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*0))($a0) + sh $v0, 2*(2+(3*0))($a0) + + lwc2 C2_IR0, 4($a1) # Y + + lh $v0, 2*(0+(3*1))($a0) + lh $v1, 2*(1+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*1))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*1))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*1))($a0) + sh $v0, 2*(2+(3*1))($a0) + + lwc2 C2_IR0, 8($a1) # Z + + lh $v0, 2*(0+(3*2))($a0) + lh $v1, 2*(1+(3*2))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*2))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*2))($a0) + sh $v0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.PushMatrix +.global PushMatrix +.type PushMatrix, @function +PushMatrix: + la $a0, _matrix_stack + cfc2 $v0, C2_R11R12 + cfc2 $v1, C2_R13R21 + sw $v0, 0($a0) + cfc2 $v0, C2_R22R23 + sw $v1, 4($a0) + sw $v0, 8($a0) + cfc2 $v0, C2_R31R32 + cfc2 $v1, C2_R33 + sw $v0, 12($a0) + sw $v1, 16($a0) + cfc2 $v0, C2_TRX + cfc2 $v1, C2_TRY + sw $v0, 20($a0) + cfc2 $v0, C2_TRZ + sw $v1, 24($a0) + jr $ra + sw $v0, 28($a0) + +.section .text.PopMatrix +.global PopMatrix +.type PopMatrix, @function +PopMatrix: + la $a0, _matrix_stack + lw $v0, 0($a0) + lw $v1, 4($a0) + ctc2 $v0, C2_R11R12 + ctc2 $v1, C2_R13R21 + lw $v0, 8($a0) + lw $v1, 12($a0) + ctc2 $v0, C2_R22R23 + lw $v0, 16($a0) + ctc2 $v1, C2_R31R32 + ctc2 $v0, C2_R33 + lw $v0, 20($a0) + lw $v1, 24($a0) + ctc2 $v0, C2_TRX + lw $v0, 28($a0) + ctc2 $v1, C2_TRY + ctc2 $v0, C2_TRZ + jr $ra + nop + +.section .data._matrix_stack +.type _matrix_stack, @object +_matrix_stack: + .word 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/libpsn00b/psxgte/matrix.s b/libpsn00b/psxgte/matrix.s deleted file mode 100644 index 9de0ccd..0000000 --- a/libpsn00b/psxgte/matrix.s +++ /dev/null @@ -1,439 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.set MATRIX_r11r12, 0 -.set MATRIX_r13r21, 4 -.set MATRIX_r22r23, 8 -.set MATRIX_r31r32, 12 -.set MATRIX_r33, 16 -.set MATRIX_trx, 20 -.set MATRIX_try, 24 -.set MATRIX_trz, 28 - -.section .text.ApplyMatrixLV -.global ApplyMatrixLV -.type ApplyMatrixLV, @function -ApplyMatrixLV: - # Load matrix to GTE - lw $t0, 0($a0) - lw $t1, 4($a0) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a0) - lw $t1, 12($a0) - lhu $t2, 16($a0) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lw $t0, 0($a1) - lw $t1, 4($a1) - mtc2 $t0, C2_IR1 - lw $t0, 8($a1) - mtc2 $t1, C2_IR2 - mtc2 $t0, C2_IR3 - - nMVMVA(1, 0, 3, 3, 0) - - swc2 C2_IR1, 0($a2) - swc2 C2_IR2, 4($a2) - swc2 C2_IR3, 8($a2) - - jr $ra - move $v0, $a2 - -.section .text.CompMatrixLV -.global CompMatrixLV -.type CompMatrixLV, @function -CompMatrixLV: - # Load matrix v0 to GTE - lw $t0, MATRIX_r11r12($a0) - lw $t1, MATRIX_r13r21($a0) - ctc2 $t0, C2_R11R12 - ctc2 $t1, C2_R13R21 - lw $t0, MATRIX_r22r23($a0) - lw $t1, MATRIX_r31r32($a0) - lhu $t2, MATRIX_r33($a0) - ctc2 $t0, C2_R22R23 - lw $t0, MATRIX_trx($a0) - ctc2 $t1, C2_R31R32 - lw $t1, MATRIX_try($a0) - ctc2 $t2, C2_R33 - lw $t2, MATRIX_trz($a0) - ctc2 $t0, C2_TRX - ctc2 $t1, C2_TRY - ctc2 $t2, C2_TRZ - - lw $t0, MATRIX_trx($a1) - lw $t1, MATRIX_try($a1) - mtc2 $t0, C2_IR1 - lw $t0, MATRIX_trz($a1) - mtc2 $t1, C2_IR2 - mtc2 $t0, C2_IR3 - - nMVMVA(1, 0, 3, 0, 0) - - swc2 C2_IR1, MATRIX_trx($a2) - swc2 C2_IR2, MATRIX_try($a2) - swc2 C2_IR3, MATRIX_trz($a2) - - lhu $t1, 2*(0+(3*1))($a1) # Load values for first - lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a1) # Load values for second - lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a2) - sh $t0, 2*(0+(3*2))($a2) - - lhu $t1, 2*(2+(3*1))($a1) # Load values for third - lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a2) - sh $t0, 2*(1+(3*2))($a2) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a2) - sh $t0, 2*(2+(3*2))($a2) - - jr $ra - move $v0, $a2 - -.section .text.MulMatrix -.global MulMatrix -.type MulMatrix, @function -MulMatrix: - # Load m1 to GTE - lw $t0, 0($a1) - lw $t1, 4($a1) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a1) - lw $t1, 12($a1) - lhu $t2, 16($a1) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lhu $t1, 2*(0+(3*1))($a0) # Load values for first - lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a0) # Load values for second - lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a0) - sh $t0, 2*(0+(3*2))($a0) - - lhu $t1, 2*(2+(3*1))($a0) # Load values for third - lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a0) - sh $t0, 2*(1+(3*2))($a0) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a0) - sh $t0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 - -.section .text.MulMatrix0 -.global MulMatrix0 -.type MulMatrix0, @function -MulMatrix0: - # Load m1 to GTE - lw $t0, 0($a0) - lw $t1, 4($a0) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a0) - lw $t1, 12($a0) - lhu $t2, 16($a0) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lhu $t1, 2*(0+(3*1))($a1) # Load values for first - lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a1) # Load values for second - lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a2) - sh $t0, 2*(0+(3*2))($a2) - - lhu $t1, 2*(2+(3*1))($a1) # Load values for third - lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a2) - sh $t0, 2*(1+(3*2))($a2) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a2) - sh $t0, 2*(2+(3*2))($a2) - - jr $ra - move $v0, $a2 - -.section .text.ScaleMatrix -.global ScaleMatrix -.type ScaleMatrix, @function -ScaleMatrix: - lwc2 C2_IR0, 0($a1) # X - - lh $v0, 2*(0+(3*0))($a0) - lh $v1, 2*(0+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(0+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(0+(3*1))($a0) - sh $v0, 2*(0+(3*2))($a0) - - lwc2 C2_IR0, 4($a1) # Y - - lh $v0, 2*(1+(3*0))($a0) - lh $v1, 2*(1+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(1+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(1+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*1))($a0) - sh $v0, 2*(1+(3*2))($a0) - - lwc2 C2_IR0, 8($a1) # Z - - lh $v0, 2*(2+(3*0))($a0) - lh $v1, 2*(2+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(2+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(2+(3*1))($a0) - sh $v0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 - -.section .text.ScaleMatrixL -.global ScaleMatrixL -.type ScaleMatrixL, @function -ScaleMatrixL: - lwc2 C2_IR0, 0($a1) # X - - lh $v0, 2*(0+(3*0))($a0) - lh $v1, 2*(1+(3*0))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*0))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*0))($a0) - sh $v0, 2*(2+(3*0))($a0) - - lwc2 C2_IR0, 4($a1) # Y - - lh $v0, 2*(0+(3*1))($a0) - lh $v1, 2*(1+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*1))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*1))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*1))($a0) - sh $v0, 2*(2+(3*1))($a0) - - lwc2 C2_IR0, 8($a1) # Z - - lh $v0, 2*(0+(3*2))($a0) - lh $v1, 2*(1+(3*2))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*2))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*2))($a0) - sh $v0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 - -.section .text.PushMatrix -.global PushMatrix -.type PushMatrix, @function -PushMatrix: - la $a0, _matrix_stack - cfc2 $v0, C2_R11R12 - cfc2 $v1, C2_R13R21 - sw $v0, 0($a0) - cfc2 $v0, C2_R22R23 - sw $v1, 4($a0) - sw $v0, 8($a0) - cfc2 $v0, C2_R31R32 - cfc2 $v1, C2_R33 - sw $v0, 12($a0) - sw $v1, 16($a0) - cfc2 $v0, C2_TRX - cfc2 $v1, C2_TRY - sw $v0, 20($a0) - cfc2 $v0, C2_TRZ - sw $v1, 24($a0) - jr $ra - sw $v0, 28($a0) - -.section .text.PopMatrix -.global PopMatrix -.type PopMatrix, @function -PopMatrix: - la $a0, _matrix_stack - lw $v0, 0($a0) - lw $v1, 4($a0) - ctc2 $v0, C2_R11R12 - ctc2 $v1, C2_R13R21 - lw $v0, 8($a0) - lw $v1, 12($a0) - ctc2 $v0, C2_R22R23 - lw $v0, 16($a0) - ctc2 $v1, C2_R31R32 - ctc2 $v0, C2_R33 - lw $v0, 20($a0) - lw $v1, 24($a0) - ctc2 $v0, C2_TRX - lw $v0, 28($a0) - ctc2 $v1, C2_TRY - ctc2 $v0, C2_TRZ - jr $ra - nop - -.section .data._matrix_stack -.type _matrix_stack, @object -_matrix_stack: - .word 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/libpsn00b/psxgte/squareroot.S b/libpsn00b/psxgte/squareroot.S new file mode 100644 index 0000000..519b0bd --- /dev/null +++ b/libpsn00b/psxgte/squareroot.S @@ -0,0 +1,118 @@ +.set noreorder + +#include "gtereg.inc" +#include "inline_s.inc" + +.section .text.SquareRoot12 +.global SquareRoot12 +.type SquareRoot12, @function +SquareRoot12: + mtc2 $a0, C2_LZCS + nop + nop + mfc2 $v0, C2_LZCR + beq $v0, 32, .Lbad_sqr12 + nop + andi $t0, $v0, 0x1 + addiu $v1, $0 , -2 + and $t2, $v0, $v1 + li $t1, 19 + sub $t1, $t2 + sra $t1, 1 + addi $t3, $t2, -24 + bltz $t3, .Lvalue_less12 + nop + sllv $t4, $a0, $t3 + b .Lvalue_greater12 +.Lvalue_less12: + addiu $t3, $0 , 24 + sub $t3, $t2 + srav $t4, $a0, $t3 +.Lvalue_greater12: + addi $t4, -64 + sll $t4, 1 + la $t5, _sqrt_table + addu $t5, $t4 + lh $t5, 0($t5) + nop + + bltz $t1, .L1594c + nop + jr $ra + sllv $v0, $t5, $t1 + +.L1594c: + sub $t1, $0 , $t1 + jr $ra + srl $v0, $t5, $t1 + +.Lbad_sqr12: + jr $ra + move $v0, $0 + +.section .text.SquareRoot0 +.global SquareRoot0 +.type SquareRoot0, @function +SquareRoot0: + mtc2 $a0, C2_LZCS + nop + nop + mfc2 $v0, C2_LZCR + beq $v0, 32, .Lbad_sqr + nop + andi $t0, $v0, 0x1 + addiu $v1, $0 , -2 + and $t2, $v0, $v1 + li $t1, 31 + sub $t1, $t2 + sra $t1, 1 + addi $t3, $t2, -24 + bltz $t3, .Lvalue_less + nop + sllv $t4, $a0, $t3 + b .Lvalue_greater +.Lvalue_less: + addiu $t3, $0 , 24 + sub $t3, $t2 + srav $t4, $a0, $t3 +.Lvalue_greater: + addi $t4, -64 + sll $t4, 1 + la $t5, _sqrt_table + addu $t5, $t4 + lh $t5, 0($t5) + nop + sllv $t5, $t5, $t1 + jr $ra + srl $v0, $t5, 12 +.Lbad_sqr: + jr $ra + move $v0, $0 + +.section .data._sqrt_table +.type _sqrt_table, @object +_sqrt_table: + .hword 0x1000, 0x101f, 0x103f, 0x105e, 0x107e, 0x109c, 0x10bb, 0x10da + .hword 0x10f8, 0x1116, 0x1134, 0x1152, 0x116f, 0x118c, 0x11a9, 0x11c6 + .hword 0x11e3, 0x1200, 0x121c, 0x1238, 0x1254, 0x1270, 0x128c, 0x12a7 + .hword 0x12c2, 0x12de, 0x12f9, 0x1314, 0x132e, 0x1349, 0x1364, 0x137e + .hword 0x1398, 0x13b2, 0x13cc, 0x13e6, 0x1400, 0x1419, 0x1432, 0x144c + .hword 0x1465, 0x147e, 0x1497, 0x14b0, 0x14c8, 0x14e1, 0x14f9, 0x1512 + .hword 0x152a, 0x1542, 0x155a, 0x1572, 0x158a, 0x15a2, 0x15b9, 0x15d1 + .hword 0x15e8, 0x1600, 0x1617, 0x162e, 0x1645, 0x165c, 0x1673, 0x1689 + .hword 0x16a0, 0x16b7, 0x16cd, 0x16e4, 0x16fa, 0x1710, 0x1726, 0x173c + .hword 0x1752, 0x1768, 0x177e, 0x1794, 0x17aa, 0x17bf, 0x17d5, 0x17ea + .hword 0x1800, 0x1815, 0x182a, 0x183f, 0x1854, 0x1869, 0x187e, 0x1893 + .hword 0x18a8, 0x18bd, 0x18d1, 0x18e6, 0x18fa, 0x190f, 0x1923, 0x1938 + .hword 0x194c, 0x1960, 0x1974, 0x1988, 0x199c, 0x19b0, 0x19c4, 0x19d8 + .hword 0x19ec, 0x1a00, 0x1a13, 0x1a27, 0x1a3a, 0x1a4e, 0x1a61, 0x1a75 + .hword 0x1a88, 0x1a9b, 0x1aae, 0x1ac2, 0x1ad5, 0x1ae8, 0x1afb, 0x1b0e + .hword 0x1b21, 0x1b33, 0x1b46, 0x1b59, 0x1b6c, 0x1b7e, 0x1b91, 0x1ba3 + .hword 0x1bb6, 0x1bc8, 0x1bdb, 0x1bed, 0x1c00, 0x1c12, 0x1c24, 0x1c36 + .hword 0x1c48, 0x1c5a, 0x1c6c, 0x1c7e, 0x1c90, 0x1ca2, 0x1cb4, 0x1cc6 + .hword 0x1cd8, 0x1ce9, 0x1cfb, 0x1d0d, 0x1d1e, 0x1d30, 0x1d41, 0x1d53 + .hword 0x1d64, 0x1d76, 0x1d87, 0x1d98, 0x1daa, 0x1dbb, 0x1dcc, 0x1ddd + .hword 0x1dee, 0x1e00, 0x1e11, 0x1e22, 0x1e33, 0x1e43, 0x1e54, 0x1e65 + .hword 0x1e76, 0x1e87, 0x1e98, 0x1ea8, 0x1eb9, 0x1eca, 0x1eda, 0x1eeb + .hword 0x1efb, 0x1f0c, 0x1f1c, 0x1f2d, 0x1f3d, 0x1f4e, 0x1f5e, 0x1f6e + .hword 0x1f7e, 0x1f8f, 0x1f9f, 0x1faf, 0x1fbf, 0x1fcf, 0x1fdf, 0x1fef diff --git a/libpsn00b/psxgte/squareroot.s b/libpsn00b/psxgte/squareroot.s deleted file mode 100644 index 4377ee4..0000000 --- a/libpsn00b/psxgte/squareroot.s +++ /dev/null @@ -1,118 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text.SquareRoot12 -.global SquareRoot12 -.type SquareRoot12, @function -SquareRoot12: - mtc2 $a0, C2_LZCS - nop - nop - mfc2 $v0, C2_LZCR - beq $v0, 32, .Lbad_sqr12 - nop - andi $t0, $v0, 0x1 - addiu $v1, $0 , -2 - and $t2, $v0, $v1 - li $t1, 19 - sub $t1, $t2 - sra $t1, 1 - addi $t3, $t2, -24 - bltz $t3, .Lvalue_less12 - nop - sllv $t4, $a0, $t3 - b .Lvalue_greater12 -.Lvalue_less12: - addiu $t3, $0 , 24 - sub $t3, $t2 - srav $t4, $a0, $t3 -.Lvalue_greater12: - addi $t4, -64 - sll $t4, 1 - la $t5, _sqrt_table - addu $t5, $t4 - lh $t5, 0($t5) - nop - - bltz $t1, .L1594c - nop - jr $ra - sllv $v0, $t5, $t1 - -.L1594c: - sub $t1, $0 , $t1 - jr $ra - srl $v0, $t5, $t1 - -.Lbad_sqr12: - jr $ra - move $v0, $0 - -.section .text.SquareRoot0 -.global SquareRoot0 -.type SquareRoot0, @function -SquareRoot0: - mtc2 $a0, C2_LZCS - nop - nop - mfc2 $v0, C2_LZCR - beq $v0, 32, .Lbad_sqr - nop - andi $t0, $v0, 0x1 - addiu $v1, $0 , -2 - and $t2, $v0, $v1 - li $t1, 31 - sub $t1, $t2 - sra $t1, 1 - addi $t3, $t2, -24 - bltz $t3, .Lvalue_less - nop - sllv $t4, $a0, $t3 - b .Lvalue_greater -.Lvalue_less: - addiu $t3, $0 , 24 - sub $t3, $t2 - srav $t4, $a0, $t3 -.Lvalue_greater: - addi $t4, -64 - sll $t4, 1 - la $t5, _sqrt_table - addu $t5, $t4 - lh $t5, 0($t5) - nop - sllv $t5, $t5, $t1 - jr $ra - srl $v0, $t5, 12 -.Lbad_sqr: - jr $ra - move $v0, $0 - -.section .data._sqrt_table -.type _sqrt_table, @object -_sqrt_table: - .hword 0x1000, 0x101f, 0x103f, 0x105e, 0x107e, 0x109c, 0x10bb, 0x10da - .hword 0x10f8, 0x1116, 0x1134, 0x1152, 0x116f, 0x118c, 0x11a9, 0x11c6 - .hword 0x11e3, 0x1200, 0x121c, 0x1238, 0x1254, 0x1270, 0x128c, 0x12a7 - .hword 0x12c2, 0x12de, 0x12f9, 0x1314, 0x132e, 0x1349, 0x1364, 0x137e - .hword 0x1398, 0x13b2, 0x13cc, 0x13e6, 0x1400, 0x1419, 0x1432, 0x144c - .hword 0x1465, 0x147e, 0x1497, 0x14b0, 0x14c8, 0x14e1, 0x14f9, 0x1512 - .hword 0x152a, 0x1542, 0x155a, 0x1572, 0x158a, 0x15a2, 0x15b9, 0x15d1 - .hword 0x15e8, 0x1600, 0x1617, 0x162e, 0x1645, 0x165c, 0x1673, 0x1689 - .hword 0x16a0, 0x16b7, 0x16cd, 0x16e4, 0x16fa, 0x1710, 0x1726, 0x173c - .hword 0x1752, 0x1768, 0x177e, 0x1794, 0x17aa, 0x17bf, 0x17d5, 0x17ea - .hword 0x1800, 0x1815, 0x182a, 0x183f, 0x1854, 0x1869, 0x187e, 0x1893 - .hword 0x18a8, 0x18bd, 0x18d1, 0x18e6, 0x18fa, 0x190f, 0x1923, 0x1938 - .hword 0x194c, 0x1960, 0x1974, 0x1988, 0x199c, 0x19b0, 0x19c4, 0x19d8 - .hword 0x19ec, 0x1a00, 0x1a13, 0x1a27, 0x1a3a, 0x1a4e, 0x1a61, 0x1a75 - .hword 0x1a88, 0x1a9b, 0x1aae, 0x1ac2, 0x1ad5, 0x1ae8, 0x1afb, 0x1b0e - .hword 0x1b21, 0x1b33, 0x1b46, 0x1b59, 0x1b6c, 0x1b7e, 0x1b91, 0x1ba3 - .hword 0x1bb6, 0x1bc8, 0x1bdb, 0x1bed, 0x1c00, 0x1c12, 0x1c24, 0x1c36 - .hword 0x1c48, 0x1c5a, 0x1c6c, 0x1c7e, 0x1c90, 0x1ca2, 0x1cb4, 0x1cc6 - .hword 0x1cd8, 0x1ce9, 0x1cfb, 0x1d0d, 0x1d1e, 0x1d30, 0x1d41, 0x1d53 - .hword 0x1d64, 0x1d76, 0x1d87, 0x1d98, 0x1daa, 0x1dbb, 0x1dcc, 0x1ddd - .hword 0x1dee, 0x1e00, 0x1e11, 0x1e22, 0x1e33, 0x1e43, 0x1e54, 0x1e65 - .hword 0x1e76, 0x1e87, 0x1e98, 0x1ea8, 0x1eb9, 0x1eca, 0x1eda, 0x1eeb - .hword 0x1efb, 0x1f0c, 0x1f1c, 0x1f2d, 0x1f3d, 0x1f4e, 0x1f5e, 0x1f6e - .hword 0x1f7e, 0x1f8f, 0x1f9f, 0x1faf, 0x1fbf, 0x1fcf, 0x1fdf, 0x1fef diff --git a/libpsn00b/psxgte/vector.S b/libpsn00b/psxgte/vector.S new file mode 100644 index 0000000..5fd139c --- /dev/null +++ b/libpsn00b/psxgte/vector.S @@ -0,0 +1,123 @@ +.set noreorder +.set noat + +#include "gtereg.inc" +#include "inline_s.inc" + +.section .text.Square0 +.global Square0 +.type Square0, @function +Square0: + # a0 - Pointer to input vector (v0) + # a1 - Pointer to output vector (v1) + + lwc2 C2_IR1, 0($a0) + lwc2 C2_IR2, 4($a0) + lwc2 C2_IR3, 8($a0) + + nSQR(0) + + swc2 C2_IR1, 0($a1) + swc2 C2_IR2, 4($a1) + swc2 C2_IR3, 8($a1) + + jr $ra + nop + +.section .text.VectorNormalS +.global VectorNormalS +.type VectorNormalS, @function +VectorNormalS: + + lw $t0, 0($a0) + lw $t1, 4($a0) + lw $t2, 8($a0) + + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + + nSQR(0) + + mfc2 $t3, C2_MAC1 + mfc2 $t4, C2_MAC2 + mfc2 $t5, C2_MAC3 + + add $t3, $t4 + add $v0, $t3, $t5 + mtc2 $v0, C2_LZCS + nop + nop + mfc2 $v1, C2_LZCR + + addiu $at, $0 , -2 + and $v1, $at + + addiu $t6, $0 , 0x1f + sub $t6, $v1 + sra $t6, 1 + addiu $t3, $v1, -24 + + bltz $t3, .Lvalue_neg + nop + b .Lvalue_pos + sllv $t4, $v0, $t3 +.Lvalue_neg: + addiu $t3, $0 , 24 + sub $t3, $v1 + srav $t4, $v0, $t3 +.Lvalue_pos: + addi $t4, -64 + sll $t4, 1 + + la $t5, _norm_table + addu $t5, $t4 + lh $t5, 0($t5) + + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + mtc2 $t5, C2_IR0 + + nGPF(0) + + mfc2 $t0, C2_MAC1 + mfc2 $t1, C2_MAC2 + mfc2 $t2, C2_MAC3 + + sra $t0, $t6 + sra $t1, $t6 + sra $t2, $t6 + + sh $t0, 0($a1) + sh $t1, 2($a1) + jr $ra + sh $t2, 4($a1) + +.section .data._norm_table +.type _norm_table, @object +_norm_table: + .hword 0x1000, 0x0fe0, 0x0fc1, 0x0fa3, 0x0f85, 0x0f68, 0x0f4c, 0x0f30 + .hword 0x0f15, 0x0efb, 0x0ee1, 0x0ec7, 0x0eae, 0x0e96, 0x0e7e, 0x0e66 + .hword 0x0e4f, 0x0e38, 0x0e22, 0x0e0c, 0x0df7, 0x0de2, 0x0dcd, 0x0db9 + .hword 0x0da5, 0x0d91, 0x0d7e, 0x0d6b, 0x0d58, 0x0d45, 0x0d33, 0x0d21 + .hword 0x0d10, 0x0cff, 0x0cee, 0x0cdd, 0x0ccc, 0x0cbc, 0x0cac, 0x0c9c + .hword 0x0c8d, 0x0c7d, 0x0c6e, 0x0c5f, 0x0c51, 0x0c42, 0x0c34, 0x0c26 + .hword 0x0c18, 0x0c0a, 0x0bfd, 0x0bef, 0x0be2, 0x0bd5, 0x0bc8, 0x0bbb + .hword 0x0baf, 0x0ba2, 0x0b96, 0x0b8a, 0x0b7e, 0x0b72, 0x0b67, 0x0b5b + .hword 0x0b50, 0x0b45, 0x0b39, 0x0b2e, 0x0b24, 0x0b19, 0x0b0e, 0x0b04 + .hword 0x0af9, 0x0aef, 0x0ae5, 0x0adb, 0x0ad1, 0x0ac7, 0x0abd, 0x0ab4 + .hword 0x0aaa, 0x0aa1, 0x0a97, 0x0a8e, 0x0a85, 0x0a7c, 0x0a73, 0x0a6a + .hword 0x0a61, 0x0a59, 0x0a50, 0x0a47, 0x0a3f, 0x0a37, 0x0a2e, 0x0a26 + .hword 0x0a1e, 0x0a16, 0x0a0e, 0x0a06, 0x09fe, 0x09f6, 0x09ef, 0x09e7 + .hword 0x09e0, 0x09d8, 0x09d1, 0x09c9, 0x09c2, 0x09bb, 0x09b4, 0x09ad + .hword 0x09a5, 0x099e, 0x0998, 0x0991, 0x098a, 0x0983, 0x097c, 0x0976 + .hword 0x096f, 0x0969, 0x0962, 0x095c, 0x0955, 0x094f, 0x0949, 0x0943 + .hword 0x093c, 0x0936, 0x0930, 0x092a, 0x0924, 0x091e, 0x0918, 0x0912 + .hword 0x090d, 0x0907, 0x0901, 0x08fb, 0x08f6, 0x08f0, 0x08eb, 0x08e5 + .hword 0x08e0, 0x08da, 0x08d5, 0x08cf, 0x08ca, 0x08c5, 0x08bf, 0x08ba + .hword 0x08b5, 0x08b0, 0x08ab, 0x08a6, 0x08a1, 0x089c, 0x0897, 0x0892 + .hword 0x088d, 0x0888, 0x0883, 0x087e, 0x087a, 0x0875, 0x0870, 0x086b + .hword 0x0867, 0x0862, 0x085e, 0x0859, 0x0855, 0x0850, 0x084c, 0x0847 + .hword 0x0843, 0x083e, 0x083a, 0x0836, 0x0831, 0x082d, 0x0829, 0x0824 + .hword 0x0820, 0x081c, 0x0818, 0x0814, 0x0810, 0x080c, 0x0808, 0x0804 diff --git a/libpsn00b/psxgte/vector.s b/libpsn00b/psxgte/vector.s deleted file mode 100644 index 2f2f8d6..0000000 --- a/libpsn00b/psxgte/vector.s +++ /dev/null @@ -1,123 +0,0 @@ -.set noreorder -.set noat - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text.Square0 -.global Square0 -.type Square0, @function -Square0: - # a0 - Pointer to input vector (v0) - # a1 - Pointer to output vector (v1) - - lwc2 C2_IR1, 0($a0) - lwc2 C2_IR2, 4($a0) - lwc2 C2_IR3, 8($a0) - - nSQR(0) - - swc2 C2_IR1, 0($a1) - swc2 C2_IR2, 4($a1) - swc2 C2_IR3, 8($a1) - - jr $ra - nop - -.section .text.VectorNormalS -.global VectorNormalS -.type VectorNormalS, @function -VectorNormalS: - - lw $t0, 0($a0) - lw $t1, 4($a0) - lw $t2, 8($a0) - - mtc2 $t0, C2_IR1 - mtc2 $t1, C2_IR2 - mtc2 $t2, C2_IR3 - - nSQR(0) - - mfc2 $t3, C2_MAC1 - mfc2 $t4, C2_MAC2 - mfc2 $t5, C2_MAC3 - - add $t3, $t4 - add $v0, $t3, $t5 - mtc2 $v0, C2_LZCS - nop - nop - mfc2 $v1, C2_LZCR - - addiu $at, $0 , -2 - and $v1, $at - - addiu $t6, $0 , 0x1f - sub $t6, $v1 - sra $t6, 1 - addiu $t3, $v1, -24 - - bltz $t3, .Lvalue_neg - nop - b .Lvalue_pos - sllv $t4, $v0, $t3 -.Lvalue_neg: - addiu $t3, $0 , 24 - sub $t3, $v1 - srav $t4, $v0, $t3 -.Lvalue_pos: - addi $t4, -64 - sll $t4, 1 - - la $t5, _norm_table - addu $t5, $t4 - lh $t5, 0($t5) - - mtc2 $t0, C2_IR1 - mtc2 $t1, C2_IR2 - mtc2 $t2, C2_IR3 - mtc2 $t5, C2_IR0 - - nGPF(0) - - mfc2 $t0, C2_MAC1 - mfc2 $t1, C2_MAC2 - mfc2 $t2, C2_MAC3 - - sra $t0, $t6 - sra $t1, $t6 - sra $t2, $t6 - - sh $t0, 0($a1) - sh $t1, 2($a1) - jr $ra - sh $t2, 4($a1) - -.section .data._norm_table -.type _norm_table, @object -_norm_table: - .hword 0x1000, 0x0fe0, 0x0fc1, 0x0fa3, 0x0f85, 0x0f68, 0x0f4c, 0x0f30 - .hword 0x0f15, 0x0efb, 0x0ee1, 0x0ec7, 0x0eae, 0x0e96, 0x0e7e, 0x0e66 - .hword 0x0e4f, 0x0e38, 0x0e22, 0x0e0c, 0x0df7, 0x0de2, 0x0dcd, 0x0db9 - .hword 0x0da5, 0x0d91, 0x0d7e, 0x0d6b, 0x0d58, 0x0d45, 0x0d33, 0x0d21 - .hword 0x0d10, 0x0cff, 0x0cee, 0x0cdd, 0x0ccc, 0x0cbc, 0x0cac, 0x0c9c - .hword 0x0c8d, 0x0c7d, 0x0c6e, 0x0c5f, 0x0c51, 0x0c42, 0x0c34, 0x0c26 - .hword 0x0c18, 0x0c0a, 0x0bfd, 0x0bef, 0x0be2, 0x0bd5, 0x0bc8, 0x0bbb - .hword 0x0baf, 0x0ba2, 0x0b96, 0x0b8a, 0x0b7e, 0x0b72, 0x0b67, 0x0b5b - .hword 0x0b50, 0x0b45, 0x0b39, 0x0b2e, 0x0b24, 0x0b19, 0x0b0e, 0x0b04 - .hword 0x0af9, 0x0aef, 0x0ae5, 0x0adb, 0x0ad1, 0x0ac7, 0x0abd, 0x0ab4 - .hword 0x0aaa, 0x0aa1, 0x0a97, 0x0a8e, 0x0a85, 0x0a7c, 0x0a73, 0x0a6a - .hword 0x0a61, 0x0a59, 0x0a50, 0x0a47, 0x0a3f, 0x0a37, 0x0a2e, 0x0a26 - .hword 0x0a1e, 0x0a16, 0x0a0e, 0x0a06, 0x09fe, 0x09f6, 0x09ef, 0x09e7 - .hword 0x09e0, 0x09d8, 0x09d1, 0x09c9, 0x09c2, 0x09bb, 0x09b4, 0x09ad - .hword 0x09a5, 0x099e, 0x0998, 0x0991, 0x098a, 0x0983, 0x097c, 0x0976 - .hword 0x096f, 0x0969, 0x0962, 0x095c, 0x0955, 0x094f, 0x0949, 0x0943 - .hword 0x093c, 0x0936, 0x0930, 0x092a, 0x0924, 0x091e, 0x0918, 0x0912 - .hword 0x090d, 0x0907, 0x0901, 0x08fb, 0x08f6, 0x08f0, 0x08eb, 0x08e5 - .hword 0x08e0, 0x08da, 0x08d5, 0x08cf, 0x08ca, 0x08c5, 0x08bf, 0x08ba - .hword 0x08b5, 0x08b0, 0x08ab, 0x08a6, 0x08a1, 0x089c, 0x0897, 0x0892 - .hword 0x088d, 0x0888, 0x0883, 0x087e, 0x087a, 0x0875, 0x0870, 0x086b - .hword 0x0867, 0x0862, 0x085e, 0x0859, 0x0855, 0x0850, 0x084c, 0x0847 - .hword 0x0843, 0x083e, 0x083a, 0x0836, 0x0831, 0x082d, 0x0829, 0x0824 - .hword 0x0820, 0x081c, 0x0818, 0x0814, 0x0810, 0x080c, 0x0808, 0x0804 diff --git a/libpsn00b/psxpress/vlc.S b/libpsn00b/psxpress/vlc.S new file mode 100644 index 0000000..e2efba4 --- /dev/null +++ b/libpsn00b/psxpress/vlc.S @@ -0,0 +1,577 @@ +# PSn00bSDK MDEC library (GTE-accelerated VLC decompressor) +# (C) 2022-2023 spicyjpeg - MPL licensed +# +# TODO: reduce the size of the v3 DC coefficient decoder; currently the code is +# duplicated for each block type, but it can probably be shortened with no +# performance impact... + +#include "gtereg.inc" + +.set noreorder +.set noat + +.set value, $v0 +.set length, $v1 +.set ctx, $a0 +.set output, $a1 +.set max_size, $a2 +.set input, $a3 +.set temp, $t0 +.set window, $t1 +.set next_window, $t2 +.set remaining, $t3 +.set is_v3, $t4 +.set bit_offset, $t5 +.set block_index, $t6 +.set coeff_index, $t7 +.set quant_scale, $s0 +.set last_y, $s1 +.set last_cr, $s2 +.set last_cb, $s3 +.set huffman_table, $t8 +.set ac_jump_area, $t9 + +.set VLC_Context_input, 0x0 +.set VLC_Context_window, 0x4 +.set VLC_Context_next_window, 0x8 +.set VLC_Context_remaining, 0xc +.set VLC_Context_is_v3, 0x10 +.set VLC_Context_bit_offset, 0x11 +.set VLC_Context_block_index, 0x12 +.set VLC_Context_coeff_index, 0x13 +.set VLC_Context_quant_scale, 0x14 +.set VLC_Context_last_y, 0x16 +.set VLC_Context_last_cr, 0x18 +.set VLC_Context_last_cb, 0x1a + +.set VLC_Table_ac0, 0x0 +.set VLC_Table_ac2, 0x4 +.set VLC_Table_ac3, 0x24 +.set VLC_Table_ac4, 0x124 +.set VLC_Table_ac5, 0x134 +.set VLC_Table_ac7, 0x144 +.set VLC_Table_ac8, 0x164 +.set VLC_Table_ac9, 0x1a4 +.set VLC_Table_ac10, 0x1e4 +.set VLC_Table_ac11, 0x224 +.set VLC_Table_ac12, 0x264 +.set VLC_Table_dc, 0x2a4 +.set VLC_Table_dc_len, 0x324 + +.section .text.DecDCTvlcStart, "ax", @progbits +.global DecDCTvlcStart +.type DecDCTvlcStart, @function + +DecDCTvlcStart: + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + # Create a new context on-the-fly without writing it to memory then jump + # into DecDCTvlcContinue(), skipping context loading. + lw window, 8(input) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) + li last_y, 0 + srl temp, window, 16 + sll window, 16 + or window, temp + + # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) + lw next_window, 12(input) + li last_cr, 0 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + + lhu remaining, 0(input) # remaining = bs->uncomp_length * 2 + li last_cb, 0 + sll remaining, 1 + + lw temp, 4(input) # quant_scale = (bs->quant_scale & 63) << 10 + li bit_offset, 32 + andi quant_scale, temp, 63 + sll quant_scale, 10 + + srl temp, 16 # is_v3 = !(bs->version < 3) + sltiu is_v3, temp, 3 + xori is_v3, 1 + + li block_index, 5 + li coeff_index, 0 + j _vlc_skip_context_load + addiu input, 16 # input = &(bs->data[2]) + +.section .text.DecDCTvlcContinue, "ax", @progbits +.global DecDCTvlcContinue +.type DecDCTvlcContinue, @function + +DecDCTvlcContinue: + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + lw input, VLC_Context_input(ctx) + lw window, VLC_Context_window(ctx) + lw next_window, VLC_Context_next_window(ctx) + lw remaining, VLC_Context_remaining(ctx) + lb is_v3, VLC_Context_is_v3(ctx) + lb bit_offset, VLC_Context_bit_offset(ctx) + lb block_index, VLC_Context_block_index(ctx) + lb coeff_index, VLC_Context_coeff_index(ctx) + lhu quant_scale, VLC_Context_quant_scale(ctx) + lh last_y, VLC_Context_last_y(ctx) + lh last_cr, VLC_Context_last_cr(ctx) + lh last_cb, VLC_Context_last_cb(ctx) + +_vlc_skip_context_load: + # Determine how many bytes to output. + ## if (max_size <= 0) max_size = 0x3fff0000 + ## max_size = min((max_size - 1) * 2, remaining) + ## remaining -= max_size + bgtz max_size, .Lmax_size_valid + addiu max_size, -1 + lui max_size, 0x3fff +.Lmax_size_valid: + sll max_size, 1 + + subu remaining, max_size + bgez remaining, .Lmax_size_ok + lui temp, 0x3800 + + addu max_size, remaining + li remaining, 0 + +.Lmax_size_ok: + # Write the length of the data that will be decoded to first 4 bytes of the + # output buffer, which will be then parsed by DecDCTin(). + srl value, max_size, 1 # output[0] = 0x38000000 | (max_size / 2) + or value, temp + sw value, 0(output) + + # Obtain the addresses of the lookup table and jump area in advance so that + # they don't have to be retrieved for each coefficient decoded. + lw huffman_table, _vlc_huffman_table + la ac_jump_area, .Lac_prefix_01 - 32 + + beqz max_size, .Lstop_processing + addiu output, 4 + +.Lprocess_next_code_loop: # while (max_size) + # This is the "hot" part of the decoder, executed for each code in the + # bitstream. The first step is to determine if the next code is a DC or AC + # coefficient; at the same time the GTE is given the task of counting the + # number of leading zeroes/ones in the code (which takes 2 more cycles). + mtc2 window, C2_LZCS + + bnez coeff_index, .Lprocess_ac_coefficient + addiu coeff_index, 1 + bnez is_v3, .Lprocess_dc_v3_coefficient + li temp, 0x1ff + +.Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) + # The DC coefficient in version 2 frames is not compressed. Value 0x1ff is + # used to signal the end of the bitstream. + ## prefix = window >> (32 - 10) + ## if (prefix == 0x1ff) break + ## *output = prefix | quant_scale + srl value, window, 22 + beq value, temp, .Lstop_processing + or value, quant_scale + sll window, 10 + addiu bit_offset, -10 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) + # Version 3 DC coefficients are variable-length deltas, prefixed with a + # Huffman code indicating their length. Since the prefix code is up to 7 + # bits long, it makes sense to decode it with a simple 128-byte lookup + # table rather than using the GTE. The codes are different for luma and + # chroma blocks, so each table entry contains the decoded length for both + # block types (packed as two nibbles). Prefix 111111111 is used to signal + # the end of the bitstream. + # prefix = window >> (32 - 9) + ## if (prefix == 0x1ff) break + # lengths = huffman_table->dc[prefix >> 2] + srl length, window, 23 + beq length, temp, .Lstop_processing + srl length, 2 + addu length, huffman_table + + addiu $at, block_index, -4 + bltz $at, .Ldc_block_y + lbu length, VLC_Table_dc(length) + beqz $at, .Ldc_block_cb + andi length, 15 # if (block_index >= Cb) dc_length = lengths & 15 + +.Ldc_block_cr: # if (block_index > Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cr_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + ## if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cr_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cr_positive: + sll value, 2 # last_cr = (last_cr + (value << 2)) & 0x3ff + addu last_cr, value + andi last_cr, 0x3ff + +.Ldc_cr_zero: + or temp, last_cr, quant_scale # *output = last_cr | quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) + +.Ldc_block_cb: # if (block_index == Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cb_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + ## if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cb_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cb_positive: + sll value, 2 # last_cb = (last_cb + (value << 2)) & 0x3ff + addu last_cb, value + andi last_cb, 0x3ff + +.Ldc_cb_zero: + or temp, last_cb, quant_scale # *output = last_cb | quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) + +.Ldc_block_y: # if (block_index < Cb) + nop + srl length, 4 # dc_length = lengths >> 4 + + # prefix_length = huffman_table->dc_len[dc_length] >> 4 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + srl temp, 4 + + sllv window, window, temp + beqz length, .Ldc_y_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + ## if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_y_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_y_positive: + sll value, 2 # last_y = (last_y + (value << 2)) & 0x3ff + addu last_y, value + andi last_y, 0x3ff + +.Ldc_y_zero: + or temp, last_y, quant_scale # *output = last_y | quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) + +.Lprocess_ac_coefficient: # if (coeff_index) + # Check whether the prefix code is 10 or 11 (i.e. if it starts with 1). If + # not, retrieve the number of leading zeroes from the GTE and use it as an + # index into the jump area. Each block in the area is 8 instructions long + # and handles decoding a specific prefix. + mfc2 temp, C2_LZCR + + bltz window, .Lac_prefix_1 # if (!(window >> 31)) + addiu $at, temp, -11 # if (prefix > 11) return -1 + bgtz $at, .Lreturn_error + sll temp, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(uint32_t)] + addu temp, ac_jump_area + jr temp + nop + +.Lreturn_error: + b .Lreturn + li $v0, -1 + +.Lac_prefix_1: # if (window >> 31) + sll window, 1 + bltz window, .Lac_prefix_11 + li temp, 0xfe00 + +.Lac_prefix_10: + # Prefix 10 marks the end of a block. + # *output = 0xfe00 + # coeff_index = 0 + ## if (--block_index < Y3) block_index = Cr + sll window, 1 + addiu bit_offset, -2 + sh temp, 0(output) + + addiu block_index, -1 + bgez block_index, .Lfeed_bitstream + li coeff_index, 0 + b .Lfeed_bitstream + li block_index, 5 + +.Lac_prefix_11: + # Prefix 11 is followed by a single bit. Note that the 10/11 prefix check + # already shifts the window by one bit (without updating the bit offset). + # index = ((window >> (32 - 1 - 1)) & 1) * sizeof(uint16_t) + # *output = huffman_table->ac0[index] + srl value, window, 29 + andi value, 2 + addu value, huffman_table + lhu value, VLC_Table_ac0(value) + sll window, 2 + addiu bit_offset, -3 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_01: + # Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a + # 3-bit lookup index starting with 0. A 32-bit lookup table is used, + # containing both MDEC codes and lengths. + # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(uint32_t) + # *output = huffman_table->ac2[index] & 0xffff + # length = huffman_table->ac2[index] >> 16 + srl value, window, 25 + andi value, 28 + addu value, huffman_table + lw value, VLC_Table_ac2(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) + .word 0, 0 + +.Lac_prefix_001: + # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a + # 3-bit lookup index starting with 01/10/11. + # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(uint32_t) + # *output = huffman_table->ac3[index] & 0xffff + # length = huffman_table->ac3[index] >> 16 + srl value, window, 21 + andi value, 252 + addu value, huffman_table + lw value, VLC_Table_ac3(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) + .word 0, 0 + +.Lac_prefix_0001: + # Prefix 0001 is followed by a 3-bit lookup index. + # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac4[index] + srl value, window, 24 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac4(value) + sll window, 7 + addiu bit_offset, -7 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_00001: + # Prefix 00001 is followed by a 3-bit lookup index. + # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac5[index] + srl value, window, 23 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac5(value) + sll window, 8 + addiu bit_offset, -8 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_000001: + # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. + # *output = window >> (32 - 6 - 16) + srl value, window, 10 + sll window, 22 + addiu bit_offset, -22 + + b .Lfeed_bitstream + sh value, 0(output) + .word 0, 0, 0 + +.Lac_prefix_0000001: + # Prefix 0000001 is followed by a 4-bit lookup index. + # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(uint16_t) + # *output = huffman_table->ac7[index] + srl value, window, 20 + andi value, 30 + addu value, huffman_table + lhu value, VLC_Table_ac7(value) + sll window, 11 + addiu bit_offset, -11 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_00000001: + # Prefix 00000001 is followed by a 5-bit lookup index. + # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac8[index] + srl value, window, 18 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac8(value) + sll window, 13 + addiu bit_offset, -13 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_000000001: + # Prefix 000000001 is followed by a 5-bit lookup index. + # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac9[index] + srl value, window, 17 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac9(value) + sll window, 14 + addiu bit_offset, -14 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_0000000001: + # Prefix 0000000001 is followed by a 5-bit lookup index. + # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac10[index] + srl value, window, 16 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac10(value) + sll window, 15 + addiu bit_offset, -15 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_00000000001: + # Prefix 00000000001 is followed by a 5-bit lookup index. + # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac11[index] + srl value, window, 15 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac11(value) + sll window, 16 + addiu bit_offset, -16 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lac_prefix_000000000001: + # Prefix 000000000001 is followed by a 5-bit lookup index. + # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac12[index] + srl value, window, 14 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac12(value) + sll window, 17 + addiu bit_offset, -17 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lupdate_window_ac: + srl length, value, 16 +.Lupdate_window_dc: + sllv window, window, length + subu bit_offset, length + +.Lfeed_bitstream: + # Update the window. This makes sure the next iteration of the loop will be + # able to read up to 32 bits from the bitstream. + bgez bit_offset, .Lskip_feeding # if (bit_offset < 0) + addiu max_size, -1 + + subu temp, $0, bit_offset # window = next_window << (-bit_offset) + sllv window, next_window, temp + lw next_window, 0(input) # next_window = (*input << 16) | (*input >> 16) + addiu bit_offset, 32 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + addiu input, 4 + +.Lskip_feeding: + srlv temp, next_window, bit_offset # window |= next_window >> bit_offset + or window, temp + + bnez max_size, .Lprocess_next_code_loop + addiu output, 2 + +.Lstop_processing: + ## If remaining = 0, skip flushing the context, pad the output buffer with + # end-of-block codes if necessary and return 0. Otherwise flush the context + # and return 1. + beqz remaining, .Lpad_output_buffer + li temp, 0xfe00 + + sw input, VLC_Context_input(ctx) + sw window, VLC_Context_window(ctx) + sw next_window, VLC_Context_next_window(ctx) + sw remaining, VLC_Context_remaining(ctx) + sb bit_offset, VLC_Context_bit_offset(ctx) + sb block_index, VLC_Context_block_index(ctx) + sb coeff_index, VLC_Context_coeff_index(ctx) + sh last_y, VLC_Context_last_y(ctx) + sh last_cr, VLC_Context_last_cr(ctx) + sh last_cb, VLC_Context_last_cb(ctx) + + b .Lreturn + li $v0, 1 + +.Lpad_output_buffer: + beqz max_size, .Lreturn + li $v0, 0 + +.Lpad_output_buffer_loop: # while (max_size) + sh temp, 0(output) + addiu max_size, -1 + bnez max_size, .Lpad_output_buffer_loop + addiu output, 2 + +.Lreturn: + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + jr $ra + addiu $sp, 16 diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s deleted file mode 100644 index 5707a21..0000000 --- a/libpsn00b/psxpress/vlc.s +++ /dev/null @@ -1,577 +0,0 @@ -# PSn00bSDK MDEC library (GTE-accelerated VLC decompressor) -# (C) 2022-2023 spicyjpeg - MPL licensed -# -# TODO: reduce the size of the v3 DC coefficient decoder; currently the code is -# duplicated for each block type, but it can probably be shortened with no -# performance impact... - -.include "gtereg.inc" - -.set noreorder -.set noat - -.set value, $v0 -.set length, $v1 -.set ctx, $a0 -.set output, $a1 -.set max_size, $a2 -.set input, $a3 -.set temp, $t0 -.set window, $t1 -.set next_window, $t2 -.set remaining, $t3 -.set is_v3, $t4 -.set bit_offset, $t5 -.set block_index, $t6 -.set coeff_index, $t7 -.set quant_scale, $s0 -.set last_y, $s1 -.set last_cr, $s2 -.set last_cb, $s3 -.set huffman_table, $t8 -.set ac_jump_area, $t9 - -.set VLC_Context_input, 0x0 -.set VLC_Context_window, 0x4 -.set VLC_Context_next_window, 0x8 -.set VLC_Context_remaining, 0xc -.set VLC_Context_is_v3, 0x10 -.set VLC_Context_bit_offset, 0x11 -.set VLC_Context_block_index, 0x12 -.set VLC_Context_coeff_index, 0x13 -.set VLC_Context_quant_scale, 0x14 -.set VLC_Context_last_y, 0x16 -.set VLC_Context_last_cr, 0x18 -.set VLC_Context_last_cb, 0x1a - -.set VLC_Table_ac0, 0x0 -.set VLC_Table_ac2, 0x4 -.set VLC_Table_ac3, 0x24 -.set VLC_Table_ac4, 0x124 -.set VLC_Table_ac5, 0x134 -.set VLC_Table_ac7, 0x144 -.set VLC_Table_ac8, 0x164 -.set VLC_Table_ac9, 0x1a4 -.set VLC_Table_ac10, 0x1e4 -.set VLC_Table_ac11, 0x224 -.set VLC_Table_ac12, 0x264 -.set VLC_Table_dc, 0x2a4 -.set VLC_Table_dc_len, 0x324 - -.section .text.DecDCTvlcStart, "ax", @progbits -.global DecDCTvlcStart -.type DecDCTvlcStart, @function - -DecDCTvlcStart: - addiu $sp, -16 - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - - # Create a new context on-the-fly without writing it to memory then jump - # into DecDCTvlcContinue(), skipping context loading. - lw window, 8(input) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) - li last_y, 0 - srl temp, window, 16 - sll window, 16 - or window, temp - - # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) - lw next_window, 12(input) - li last_cr, 0 - srl temp, next_window, 16 - sll next_window, 16 - or next_window, temp - - lhu remaining, 0(input) # remaining = bs->uncomp_length * 2 - li last_cb, 0 - sll remaining, 1 - - lw temp, 4(input) # quant_scale = (bs->quant_scale & 63) << 10 - li bit_offset, 32 - andi quant_scale, temp, 63 - sll quant_scale, 10 - - srl temp, 16 # is_v3 = !(bs->version < 3) - sltiu is_v3, temp, 3 - xori is_v3, 1 - - li block_index, 5 - li coeff_index, 0 - j _vlc_skip_context_load - addiu input, 16 # input = &(bs->data[2]) - -.section .text.DecDCTvlcContinue, "ax", @progbits -.global DecDCTvlcContinue -.type DecDCTvlcContinue, @function - -DecDCTvlcContinue: - addiu $sp, -16 - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - - lw input, VLC_Context_input(ctx) - lw window, VLC_Context_window(ctx) - lw next_window, VLC_Context_next_window(ctx) - lw remaining, VLC_Context_remaining(ctx) - lb is_v3, VLC_Context_is_v3(ctx) - lb bit_offset, VLC_Context_bit_offset(ctx) - lb block_index, VLC_Context_block_index(ctx) - lb coeff_index, VLC_Context_coeff_index(ctx) - lhu quant_scale, VLC_Context_quant_scale(ctx) - lh last_y, VLC_Context_last_y(ctx) - lh last_cr, VLC_Context_last_cr(ctx) - lh last_cb, VLC_Context_last_cb(ctx) - -_vlc_skip_context_load: - # Determine how many bytes to output. - # if (max_size <= 0) max_size = 0x3fff0000 - # max_size = min((max_size - 1) * 2, remaining) - # remaining -= max_size - bgtz max_size, .Lmax_size_valid - addiu max_size, -1 - lui max_size, 0x3fff -.Lmax_size_valid: - sll max_size, 1 - - subu remaining, max_size - bgez remaining, .Lmax_size_ok - lui temp, 0x3800 - - addu max_size, remaining - li remaining, 0 - -.Lmax_size_ok: - # Write the length of the data that will be decoded to first 4 bytes of the - # output buffer, which will be then parsed by DecDCTin(). - srl value, max_size, 1 # output[0] = 0x38000000 | (max_size / 2) - or value, temp - sw value, 0(output) - - # Obtain the addresses of the lookup table and jump area in advance so that - # they don't have to be retrieved for each coefficient decoded. - lw huffman_table, _vlc_huffman_table - la ac_jump_area, .Lac_prefix_01 - 32 - - beqz max_size, .Lstop_processing - addiu output, 4 - -.Lprocess_next_code_loop: # while (max_size) - # This is the "hot" part of the decoder, executed for each code in the - # bitstream. The first step is to determine if the next code is a DC or AC - # coefficient; at the same time the GTE is given the task of counting the - # number of leading zeroes/ones in the code (which takes 2 more cycles). - mtc2 window, C2_LZCS - - bnez coeff_index, .Lprocess_ac_coefficient - addiu coeff_index, 1 - bnez is_v3, .Lprocess_dc_v3_coefficient - li temp, 0x1ff - -.Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) - # The DC coefficient in version 2 frames is not compressed. Value 0x1ff is - # used to signal the end of the bitstream. - # prefix = window >> (32 - 10) - # if (prefix == 0x1ff) break - # *output = prefix | quant_scale - srl value, window, 22 - beq value, temp, .Lstop_processing - or value, quant_scale - sll window, 10 - addiu bit_offset, -10 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) - # Version 3 DC coefficients are variable-length deltas, prefixed with a - # Huffman code indicating their length. Since the prefix code is up to 7 - # bits long, it makes sense to decode it with a simple 128-byte lookup - # table rather than using the GTE. The codes are different for luma and - # chroma blocks, so each table entry contains the decoded length for both - # block types (packed as two nibbles). Prefix 111111111 is used to signal - # the end of the bitstream. - # prefix = window >> (32 - 9) - # if (prefix == 0x1ff) break - # lengths = huffman_table->dc[prefix >> 2] - srl length, window, 23 - beq length, temp, .Lstop_processing - srl length, 2 - addu length, huffman_table - - addiu $at, block_index, -4 - bltz $at, .Ldc_block_y - lbu length, VLC_Table_dc(length) - beqz $at, .Ldc_block_cb - andi length, 15 # if (block_index >= Cb) dc_length = lengths & 15 - -.Ldc_block_cr: # if (block_index > Cb) - # prefix_length = huffman_table->dc_len[dc_length] & 15 - addu temp, length, huffman_table - lbu temp, VLC_Table_dc_len(temp) - li $at, 32 - andi temp, 15 - - sllv window, window, temp - beqz length, .Ldc_cr_zero # if (dc_length) - subu bit_offset, temp - - subu $at, length # value = window >> (32 - dc_length) - srlv value, window, $at - - # Decode the sign bit, then add the decoded delta to the current value. - # if (!(window >> 31)) value -= (1 << dc_length) - 1 - bltz window, .Ldc_cr_positive - li temp, -1 - srlv temp, temp, $at - subu value, temp -.Ldc_cr_positive: - sll value, 2 # last_cr = (last_cr + (value << 2)) & 0x3ff - addu last_cr, value - andi last_cr, 0x3ff - -.Ldc_cr_zero: - or temp, last_cr, quant_scale # *output = last_cr | quant_scale - b .Lupdate_window_dc # update_window(dc_length) - sh temp, 0(output) - -.Ldc_block_cb: # if (block_index == Cb) - # prefix_length = huffman_table->dc_len[dc_length] & 15 - addu temp, length, huffman_table - lbu temp, VLC_Table_dc_len(temp) - li $at, 32 - andi temp, 15 - - sllv window, window, temp - beqz length, .Ldc_cb_zero # if (dc_length) - subu bit_offset, temp - - subu $at, length # value = window >> (32 - dc_length) - srlv value, window, $at - - # Decode the sign bit, then add the decoded delta to the current value. - # if (!(window >> 31)) value -= (1 << dc_length) - 1 - bltz window, .Ldc_cb_positive - li temp, -1 - srlv temp, temp, $at - subu value, temp -.Ldc_cb_positive: - sll value, 2 # last_cb = (last_cb + (value << 2)) & 0x3ff - addu last_cb, value - andi last_cb, 0x3ff - -.Ldc_cb_zero: - or temp, last_cb, quant_scale # *output = last_cb | quant_scale - b .Lupdate_window_dc # update_window(dc_length) - sh temp, 0(output) - -.Ldc_block_y: # if (block_index < Cb) - nop - srl length, 4 # dc_length = lengths >> 4 - - # prefix_length = huffman_table->dc_len[dc_length] >> 4 - addu temp, length, huffman_table - lbu temp, VLC_Table_dc_len(temp) - li $at, 32 - srl temp, 4 - - sllv window, window, temp - beqz length, .Ldc_y_zero # if (dc_length) - subu bit_offset, temp - - subu $at, length # value = window >> (32 - dc_length) - srlv value, window, $at - - # Decode the sign bit, then add the decoded delta to the current value. - # if (!(window >> 31)) value -= (1 << dc_length) - 1 - bltz window, .Ldc_y_positive - li temp, -1 - srlv temp, temp, $at - subu value, temp -.Ldc_y_positive: - sll value, 2 # last_y = (last_y + (value << 2)) & 0x3ff - addu last_y, value - andi last_y, 0x3ff - -.Ldc_y_zero: - or temp, last_y, quant_scale # *output = last_y | quant_scale - b .Lupdate_window_dc # update_window(dc_length) - sh temp, 0(output) - -.Lprocess_ac_coefficient: # if (coeff_index) - # Check whether the prefix code is 10 or 11 (i.e. if it starts with 1). If - # not, retrieve the number of leading zeroes from the GTE and use it as an - # index into the jump area. Each block in the area is 8 instructions long - # and handles decoding a specific prefix. - mfc2 temp, C2_LZCR - - bltz window, .Lac_prefix_1 # if (!(window >> 31)) - addiu $at, temp, -11 # if (prefix > 11) return -1 - bgtz $at, .Lreturn_error - sll temp, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(uint32_t)] - addu temp, ac_jump_area - jr temp - nop - -.Lreturn_error: - b .Lreturn - li $v0, -1 - -.Lac_prefix_1: # if (window >> 31) - sll window, 1 - bltz window, .Lac_prefix_11 - li temp, 0xfe00 - -.Lac_prefix_10: - # Prefix 10 marks the end of a block. - # *output = 0xfe00 - # coeff_index = 0 - # if (--block_index < Y3) block_index = Cr - sll window, 1 - addiu bit_offset, -2 - sh temp, 0(output) - - addiu block_index, -1 - bgez block_index, .Lfeed_bitstream - li coeff_index, 0 - b .Lfeed_bitstream - li block_index, 5 - -.Lac_prefix_11: - # Prefix 11 is followed by a single bit. Note that the 10/11 prefix check - # already shifts the window by one bit (without updating the bit offset). - # index = ((window >> (32 - 1 - 1)) & 1) * sizeof(uint16_t) - # *output = huffman_table->ac0[index] - srl value, window, 29 - andi value, 2 - addu value, huffman_table - lhu value, VLC_Table_ac0(value) - sll window, 2 - addiu bit_offset, -3 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_01: - # Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a - # 3-bit lookup index starting with 0. A 32-bit lookup table is used, - # containing both MDEC codes and lengths. - # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(uint32_t) - # *output = huffman_table->ac2[index] & 0xffff - # length = huffman_table->ac2[index] >> 16 - srl value, window, 25 - andi value, 28 - addu value, huffman_table - lw value, VLC_Table_ac2(value) - - b .Lupdate_window_ac # update_window(value >> 16) - sh value, 0(output) - .word 0, 0 - -.Lac_prefix_001: - # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a - # 3-bit lookup index starting with 01/10/11. - # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(uint32_t) - # *output = huffman_table->ac3[index] & 0xffff - # length = huffman_table->ac3[index] >> 16 - srl value, window, 21 - andi value, 252 - addu value, huffman_table - lw value, VLC_Table_ac3(value) - - b .Lupdate_window_ac # update_window(value >> 16) - sh value, 0(output) - .word 0, 0 - -.Lac_prefix_0001: - # Prefix 0001 is followed by a 3-bit lookup index. - # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(uint16_t) - # *output = huffman_table->ac4[index] - srl value, window, 24 - andi value, 14 - addu value, huffman_table - lhu value, VLC_Table_ac4(value) - sll window, 7 - addiu bit_offset, -7 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_00001: - # Prefix 00001 is followed by a 3-bit lookup index. - # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(uint16_t) - # *output = huffman_table->ac5[index] - srl value, window, 23 - andi value, 14 - addu value, huffman_table - lhu value, VLC_Table_ac5(value) - sll window, 8 - addiu bit_offset, -8 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_000001: - # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. - # *output = window >> (32 - 6 - 16) - srl value, window, 10 - sll window, 22 - addiu bit_offset, -22 - - b .Lfeed_bitstream - sh value, 0(output) - .word 0, 0, 0 - -.Lac_prefix_0000001: - # Prefix 0000001 is followed by a 4-bit lookup index. - # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(uint16_t) - # *output = huffman_table->ac7[index] - srl value, window, 20 - andi value, 30 - addu value, huffman_table - lhu value, VLC_Table_ac7(value) - sll window, 11 - addiu bit_offset, -11 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_00000001: - # Prefix 00000001 is followed by a 5-bit lookup index. - # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(uint16_t) - # *output = huffman_table->ac8[index] - srl value, window, 18 - andi value, 62 - addu value, huffman_table - lhu value, VLC_Table_ac8(value) - sll window, 13 - addiu bit_offset, -13 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_000000001: - # Prefix 000000001 is followed by a 5-bit lookup index. - # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(uint16_t) - # *output = huffman_table->ac9[index] - srl value, window, 17 - andi value, 62 - addu value, huffman_table - lhu value, VLC_Table_ac9(value) - sll window, 14 - addiu bit_offset, -14 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_0000000001: - # Prefix 0000000001 is followed by a 5-bit lookup index. - # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(uint16_t) - # *output = huffman_table->ac10[index] - srl value, window, 16 - andi value, 62 - addu value, huffman_table - lhu value, VLC_Table_ac10(value) - sll window, 15 - addiu bit_offset, -15 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_00000000001: - # Prefix 00000000001 is followed by a 5-bit lookup index. - # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(uint16_t) - # *output = huffman_table->ac11[index] - srl value, window, 15 - andi value, 62 - addu value, huffman_table - lhu value, VLC_Table_ac11(value) - sll window, 16 - addiu bit_offset, -16 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lac_prefix_000000000001: - # Prefix 000000000001 is followed by a 5-bit lookup index. - # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(uint16_t) - # *output = huffman_table->ac12[index] - srl value, window, 14 - andi value, 62 - addu value, huffman_table - lhu value, VLC_Table_ac12(value) - sll window, 17 - addiu bit_offset, -17 - - b .Lfeed_bitstream - sh value, 0(output) - -.Lupdate_window_ac: - srl length, value, 16 -.Lupdate_window_dc: - sllv window, window, length - subu bit_offset, length - -.Lfeed_bitstream: - # Update the window. This makes sure the next iteration of the loop will be - # able to read up to 32 bits from the bitstream. - bgez bit_offset, .Lskip_feeding # if (bit_offset < 0) - addiu max_size, -1 - - subu temp, $0, bit_offset # window = next_window << (-bit_offset) - sllv window, next_window, temp - lw next_window, 0(input) # next_window = (*input << 16) | (*input >> 16) - addiu bit_offset, 32 - srl temp, next_window, 16 - sll next_window, 16 - or next_window, temp - addiu input, 4 - -.Lskip_feeding: - srlv temp, next_window, bit_offset # window |= next_window >> bit_offset - or window, temp - - bnez max_size, .Lprocess_next_code_loop - addiu output, 2 - -.Lstop_processing: - # If remaining = 0, skip flushing the context, pad the output buffer with - # end-of-block codes if necessary and return 0. Otherwise flush the context - # and return 1. - beqz remaining, .Lpad_output_buffer - li temp, 0xfe00 - - sw input, VLC_Context_input(ctx) - sw window, VLC_Context_window(ctx) - sw next_window, VLC_Context_next_window(ctx) - sw remaining, VLC_Context_remaining(ctx) - sb bit_offset, VLC_Context_bit_offset(ctx) - sb block_index, VLC_Context_block_index(ctx) - sb coeff_index, VLC_Context_coeff_index(ctx) - sh last_y, VLC_Context_last_y(ctx) - sh last_cr, VLC_Context_last_cr(ctx) - sh last_cb, VLC_Context_last_cb(ctx) - - b .Lreturn - li $v0, 1 - -.Lpad_output_buffer: - beqz max_size, .Lreturn - li $v0, 0 - -.Lpad_output_buffer_loop: # while (max_size) - sh temp, 0(output) - addiu max_size, -1 - bnez max_size, .Lpad_output_buffer_loop - addiu output, 2 - -.Lreturn: - lw $s0, 0($sp) - lw $s1, 4($sp) - lw $s2, 8($sp) - lw $s3, 12($sp) - jr $ra - addiu $sp, 16 diff --git a/libpsn00b/smd/smd.S b/libpsn00b/smd/smd.S new file mode 100644 index 0000000..693f2cd --- /dev/null +++ b/libpsn00b/smd/smd.S @@ -0,0 +1,928 @@ +# PSn00bSDK .SMD model parser library +# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + +.set noreorder + +#include "gtereg.inc" +#include "inline_s.inc" +#include "smd/smd_s.inc" + +# Currently does not do header checks +.section .text.smdInitData, "ax", @progbits +.global smdInitData +.type smdInitData, @function + +smdInitData: + lw $a1, SMD_HEAD_PVERTS($a0) # Initialize header pointers + lw $a2, SMD_HEAD_PNORMS($a0) + lw $a3, SMD_HEAD_PPRIMS($a0) + + addu $a1, $a0 + addu $a2, $a0 + addu $a3, $a0 + + sw $a1, SMD_HEAD_PVERTS($a0) + sw $a2, SMD_HEAD_PNORMS($a0) + sw $a3, SMD_HEAD_PPRIMS($a0) + + jr $ra + move $v0, $a0 + +.section .text.scSetClipRect, "ax", @progbits +.global scSetClipRect +.type scSetClipRect, @function + +scSetClipRect: + sll $a1, 16 + or $a0, $a1 + sll $a3, 16 + or $a2, $a3 + la $a1, _sc_clip + sw $a0, 0( $a1 ) + jr $ra + sw $a2, 4( $a1 ) + +.section .text.smdSetBaseTPage, "ax", @progbits +.global smdSetBaseTPage +.type smdSetBaseTPage, @function + +smdSetBaseTPage: + la $v0, _smd_tpage_base + jr $ra + sw $a0, 0($v0) + +.section .text.smdSortModel, "ax", @progbits +.global smdSortModel +.type smdSortModel, @function + +smdSortModel: + # a0 - Pointer SC_OT structure + # a1 - Pointer to next primitive + # a2 - Pointer to SMD data address + # v0 - New pointer of primitive buffer (return) + + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + la $v0, _sc_clip + lw $t8, 0($v0) + lw $t9, 4($v0) + + lw $t0, OT_LEN($a0) + lw $a0, OT_ADDR($a0) + lw $t1, SMD_HEAD_PVERTS($a2) + lw $t2, SMD_HEAD_PNORMS($a2) + lw $t3, SMD_HEAD_PPRIMS($a2) + +.sort_loop: + + nop + lw $a3, 0($t3) # Get primitive ID word + move $t4, $t3 + + beqz $a3, .exit # Check if terminator (just zero) + addiu $t4, 4 + + lhu $t5, 0( $t4 ) # Load vertices + lhu $t6, 2( $t4 ) + lhu $t7, 4( $t4 ) + sll $t5, 3 + sll $t6, 3 + sll $t7, 3 + addu $t5, $t1 + addu $t6, $t1 + addu $t7, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + lwc2 C2_VXY1, 0( $t6 ) + lwc2 C2_VZ1 , 4( $t6 ) + lwc2 C2_VXY2, 0( $t7 ) + lwc2 C2_VZ2 , 4( $t7 ) + + srl $v1, $a3, 24 # Get primitive size + addu $t3, $v1 # Step main pointer to next primitive + + RTPT + + cfc2 $v0, C2_FLAG # Get GTE flag value + nop + + bltz $v0, .skip_prim # Skip primitive if Z overflow + andi $v0, $a3, 0x3 + + NCLIP # Backface culling + + srl $v1, $a3, 12 + andi $v1, 1 + + bnez $v1, .no_culling + nop + + mfc2 $v1, C2_MAC0 + nop + bltz $v1, .skip_prim + nop + +.no_culling: + + beq $v0, 0x1, .prim_tri # If primitive is a triangle + nop + beq $v0, 0x2, .prim_quad # If primitive is a quad + nop + + b .skip_prim + nop + +## Triangles + +.prim_tri: # Triangle processing + + addiu $t4, 8 # Advance from indices + + AVSZ3 # Calculate average Z + + srl $v0, $t0, 16 # Get Z divisor from OT_LEN value + andi $v0, 0xff + + mfc2 $t5, C2_OTZ # Get AVSZ3 result + + sra $v1, $t0, 24 # Get Z offset from OT_LEN value + + srl $t5, $v0 # Apply divisor and offset + sub $t5, $v1 + + blez $t5, .skip_prim # Skip primitive if less than zero + andi $v1, $t0, 0xffff + bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length + sll $t5, 2 + addu $t5, $a0 # Append OTZ to OT address + + ClipTestTri + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw + and $v0, $s2, $s0 # v2 & v0 + beqz $v0, .do_draw + nop + b .skip_prim + nop + +.do_draw: + + + srl $v0, $a3, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, .F3_light + nop + + andi $v0, $a3, 0x10 # Gouraud shaded + bnez $v0, .F3_gouraud + nop + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_textured + nop + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2000 + or $v0, $v1 + + b .sort_F3_pri + sw $v0, POLYF3_rgbc( $a1 ) + +.F3_gouraud: + + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3000 + or $v0, $at + .set at + sw $v0, POLYG3_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG3_rgbc1($a1) + b .sort_G3_pri + sw $v0, POLYG3_rgbc2($a1) + +.F3_textured: + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2400 + or $v0, $v1 + sw $v0, POLYFT3_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v0, 8( $t4 ) # Tpage + CLUT + nop + andi $v1, $v0, 0xffff + sh $v1, POLYFT3_tpage( $a1 ) + srl $v0, 16 + + b .sort_FT3_pri + sh $v0, POLYFT3_clut( $a1 ) + +.F3_light: + + lhu $v0, 0( $t4 ) # Load normal 0 + + srl $v1, $a3, 2 + andi $v1, $v1, 0x3 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + beq $v1, 0x2, .F3_light_smt + nop + + lw $v0, 4( $t4 ) + lui $v1, 0x2000 + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 8 + nop + + NCCS + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_light_tex + nop + + swc2 C2_RGB2, POLYF3_rgbc( $a1 ) + + b .sort_F3_pri + nop + +.F3_light_tex: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT3_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT3_clut( $a1 ) + + mfc2 $v0, C2_RGB2 + lui $v1, 0x2400 + or $v0, $v1 + + b .sort_FT3_pri + sw $v0, POLYFT3_rgbc( $a1 ) + +.F3_light_smt: + + lhu $v0, 2( $t4 ) # Load normals 1 and 2 + lhu $v1, 4( $t4 ) + sll $v0, 3 + sll $v1, 3 + addu $v0, $t2 + addu $v1, $t2 + lwc2 C2_VXY1, 0( $v0 ) + lwc2 C2_VZ1 , 4( $v0 ) + lw $v0, 8( $t4 ) + lwc2 C2_VXY2, 0( $v1 ) + lwc2 C2_VZ2 , 4( $v1 ) + lui $v1, 0x3000 # Load color + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 12 + nop + + NCCT + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_light_tex_smt + nop + + swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) + swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) + swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) + + b .sort_G3_pri + nop + +.F3_light_tex_smt: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYGT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYGT3_uv1( $a1 ) + sh $v0, POLYGT3_uv2( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYGT3_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYGT3_clut( $a1 ) + + mfc2 $v0, C2_RGB0 + lui $v1, 0x3400 + or $v0, $v1 + + swc2 C2_RGB1, POLYGT3_rgbc1( $a1 ) + swc2 C2_RGB2, POLYGT3_rgbc2( $a1 ) + + b .sort_GT3_pri + sw $v0, POLYGT3_rgbc0( $a1 ) + +.sort_F3_pri: + + swc2 C2_SXY0, POLYF3_xy0($a1) + swc2 C2_SXY1, POLYF3_xy1($a1) + swc2 C2_SXY2, POLYF3_xy2($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0500 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF3_len + +.sort_FT3_pri: + + swc2 C2_SXY0, POLYFT3_xy0( $a1 ) + swc2 C2_SXY1, POLYFT3_xy1( $a1 ) + swc2 C2_SXY2, POLYFT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT3_len + +.sort_G3_pri: + + swc2 C2_SXY0, POLYG3_xy0( $a1 ) + swc2 C2_SXY1, POLYG3_xy1( $a1 ) + swc2 C2_SXY2, POLYG3_xy2( $a1 ) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG3_len + +.sort_GT3_pri: + + swc2 C2_SXY0, POLYGT3_xy0( $a1 ) + swc2 C2_SXY1, POLYGT3_xy1( $a1 ) + swc2 C2_SXY2, POLYGT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT3_len + +## Quads + +.prim_quad: # Quad processing + + mfc2 $t6, C2_SXY0 # Retrieve first projected vertex + + lhu $t5, 6( $t4 ) # Project the last vertex + addiu $t4, 8 + sll $t5, 3 + addu $t5, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + + nRTPS + + cfc2 $v1, C2_FLAG # Get GTE flag value + + srl $v0, $t0, 16 # Get Z divisor from OT_LEN value + + bltz $v1, .skip_prim + nop + + AVSZ4 + + andi $v0, 0xff + + mfc2 $t5, C2_OTZ + + sra $v1, $t0, 24 # Get Z offset from OT_LEN value + + srl $t5, $v0 # Apply divisor and offset + sub $t5, $v1 + + blez $t5, .skip_prim # Skip primitive if less than zero + andi $v1, $t0, 0xffff + bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length + sll $t5, 2 + addu $t5, $a0 # Append OTZ to OT address + + # no touch: + # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0) + + ClipTestQuad + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw_q + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw_q + and $v0, $s2, $s3 # v2 & v3 + beqz $v0, .do_draw_q + and $v0, $s3, $s0 # v3 & v0 + beqz $v0, .do_draw_q + and $v0, $s0, $s2 # v0 & v2 + beqz $v0, .do_draw_q + and $v0, $s1, $s3 # v1 & v3 + beqz $v0, .do_draw_q + nop + b .skip_prim + nop + +.do_draw_q: + + srl $v0, $a3, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, .F4_light + nop + + andi $v0, $a3, 0x10 # Gouraud quad + bnez $v0, .F4_gouraud + nop + + andi $v0, $a3, 0x20 # Textured quad + bnez $v0, .F4_textured + nop + + lw $v0, 0($t4) + lui $v1, 0x2800 + or $v0, $v1 + + b .sort_F4_pri + sw $v0, POLYF4_rgbc($a1) + +.F4_gouraud: + + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3800 + or $v0, $at + .set at + sw $v0, POLYG4_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG4_rgbc1($a1) + lw $v1, 12($t4) + sw $v0, POLYG4_rgbc2($a1) + b .sort_G4_pri + sw $v1, POLYG4_rgbc3($a1) + +.F4_textured: + + lw $v0, 0($t4) + lui $v1, 0x2c00 + or $v0, $v1 + sw $v0, POLYFT4_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0($t4) # Load texture coordinates + lhu $v1, 2($t4) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + + b .sort_FT4_pri + sh $v0, POLYFT4_clut($a1) + +.F4_light: + + lhu $v0, 0( $t4 ) # Load normal 0 + + srl $v1, $a3, 2 + andi $v1, $v1, 0x3 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + beq $v1, 0x2, .F4_light_smt + nop + + lw $v0, 4( $t4 ) + lui $v1, 0x2800 + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 8 + nop + + NCCS + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F4_light_tex + nop + + swc2 C2_RGB2, POLYF4_rgbc( $a1 ) + + b .sort_F4_pri + nop + +.F4_light_tex: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT4_clut( $a1 ) + + mfc2 $v0, C2_RGB2 + lui $v1, 0x2c00 + or $v0, $v1 + + b .sort_FT4_pri + sw $v0, POLYFT4_rgbc( $a1 ) + +.F4_light_smt: + + lhu $v0, 2( $t4 ) # Load normals 1 and 2 + lhu $v1, 4( $t4 ) + sll $v0, 3 + sll $v1, 3 + addu $v0, $t2 + addu $v1, $t2 + lwc2 C2_VXY1, 0( $v0 ) + lwc2 C2_VZ1 , 4( $v0 ) + lwc2 C2_VXY2, 0( $v1 ) + lwc2 C2_VZ2 , 4( $v1 ) + + lw $v0, 8( $t4 ) + lui $v1, 0x3800 # Load color + or $v0, $v1 + mtc2 $v0, C2_RGB + + nNCCT + + lhu $v0, 6( $t4 ) # Load normal 3 + + addiu $t4, 12 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F4_light_tex_smt + nop + + swc2 C2_RGB0, POLYG4_rgbc0( $a1 ) + swc2 C2_RGB1, POLYG4_rgbc1( $a1 ) + swc2 C2_RGB2, POLYG4_rgbc2( $a1 ) + + nNCCS + + swc2 C2_RGB2, POLYG4_rgbc3( $a1 ) + + b .sort_G4_pri + nop + +.F4_light_tex_smt: + + mfc2 $v0, C2_RGB0 + lui $v1, 0x3400 + or $v0, $v1 + sw $v0, POLYGT4_rgbc0( $a1 ) + swc2 C2_RGB1, POLYGT4_rgbc1( $a1 ) + swc2 C2_RGB2, POLYGT4_rgbc2( $a1 ) + + NCCS + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYGT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYGT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYGT4_uv2( $a1 ) + sh $v1, POLYGT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + swc2 C2_RGB2, POLYGT4_rgbc3( $a1 ) + + andi $v0, $v1, 0xffff + sh $v0, POLYGT4_tpage( $a1 ) + srl $v0, $v1, 16 + + b .sort_GT4_pri + sh $v0, POLYGT4_clut( $a1 ) + +.sort_F4_pri: + + sw $t6, POLYF4_xy0($a1) + swc2 C2_SXY0, POLYF4_xy1($a1) + swc2 C2_SXY1, POLYF4_xy2($a1) + swc2 C2_SXY2, POLYF4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0600 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF4_len + +.sort_FT4_pri: + + sw $t6, POLYFT4_xy0($a1) + swc2 C2_SXY0, POLYFT4_xy1($a1) + swc2 C2_SXY1, POLYFT4_xy2($a1) + swc2 C2_SXY2, POLYFT4_xy3($a1) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT4_len + +.sort_G4_pri: + + sw $t6, POLYG4_xy0($a1) + swc2 C2_SXY0, POLYG4_xy1($a1) + swc2 C2_SXY1, POLYG4_xy2($a1) + swc2 C2_SXY2, POLYG4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG4_len + +.sort_GT4_pri: + + sw $t6, POLYGT4_xy0($a1) + swc2 C2_SXY0, POLYGT4_xy1($a1) + swc2 C2_SXY1, POLYGT4_xy2($a1) + swc2 C2_SXY2, POLYGT4_xy3($a1) + + .set noat + + lui $v1, 0x0c00 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT4_len + +.skip_prim: + + b .sort_loop + nop + +.exit: + + lw $s0, 0( $sp ) + lw $s1, 4( $sp ) + lw $s2, 8( $sp ) + lw $s3, 12( $sp ) + addiu $sp, 16 + jr $ra + move $v0, $a1 + +.section .bss._smd_tpage_base, "w" +.comm _smd_tpage_base, 4, 4 + +.section .bss._sc_clip, "w" +.comm _sc_clip, 8, 4 diff --git a/libpsn00b/smd/smd.s b/libpsn00b/smd/smd.s deleted file mode 100644 index 3c87a5e..0000000 --- a/libpsn00b/smd/smd.s +++ /dev/null @@ -1,928 +0,0 @@ -# PSn00bSDK .SMD model parser library -# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed - -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" -.include "smd/smd_s.inc" - -# Currently does not do header checks -.section .text.smdInitData, "ax", @progbits -.global smdInitData -.type smdInitData, @function - -smdInitData: - lw $a1, SMD_HEAD_PVERTS($a0) # Initialize header pointers - lw $a2, SMD_HEAD_PNORMS($a0) - lw $a3, SMD_HEAD_PPRIMS($a0) - - addu $a1, $a0 - addu $a2, $a0 - addu $a3, $a0 - - sw $a1, SMD_HEAD_PVERTS($a0) - sw $a2, SMD_HEAD_PNORMS($a0) - sw $a3, SMD_HEAD_PPRIMS($a0) - - jr $ra - move $v0, $a0 - -.section .text.scSetClipRect, "ax", @progbits -.global scSetClipRect -.type scSetClipRect, @function - -scSetClipRect: - sll $a1, 16 - or $a0, $a1 - sll $a3, 16 - or $a2, $a3 - la $a1, _sc_clip - sw $a0, 0( $a1 ) - jr $ra - sw $a2, 4( $a1 ) - -.section .text.smdSetBaseTPage, "ax", @progbits -.global smdSetBaseTPage -.type smdSetBaseTPage, @function - -smdSetBaseTPage: - la $v0, _smd_tpage_base - jr $ra - sw $a0, 0($v0) - -.section .text.smdSortModel, "ax", @progbits -.global smdSortModel -.type smdSortModel, @function - -smdSortModel: - # a0 - Pointer SC_OT structure - # a1 - Pointer to next primitive - # a2 - Pointer to SMD data address - # v0 - New pointer of primitive buffer (return) - - addiu $sp, -16 - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - - la $v0, _sc_clip - lw $t8, 0($v0) - lw $t9, 4($v0) - - lw $t0, OT_LEN($a0) - lw $a0, OT_ADDR($a0) - lw $t1, SMD_HEAD_PVERTS($a2) - lw $t2, SMD_HEAD_PNORMS($a2) - lw $t3, SMD_HEAD_PPRIMS($a2) - -.sort_loop: - - nop - lw $a3, 0($t3) # Get primitive ID word - move $t4, $t3 - - beqz $a3, .exit # Check if terminator (just zero) - addiu $t4, 4 - - lhu $t5, 0( $t4 ) # Load vertices - lhu $t6, 2( $t4 ) - lhu $t7, 4( $t4 ) - sll $t5, 3 - sll $t6, 3 - sll $t7, 3 - addu $t5, $t1 - addu $t6, $t1 - addu $t7, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - lwc2 C2_VXY1, 0( $t6 ) - lwc2 C2_VZ1 , 4( $t6 ) - lwc2 C2_VXY2, 0( $t7 ) - lwc2 C2_VZ2 , 4( $t7 ) - - srl $v1, $a3, 24 # Get primitive size - addu $t3, $v1 # Step main pointer to next primitive - - RTPT - - cfc2 $v0, C2_FLAG # Get GTE flag value - nop - - bltz $v0, .skip_prim # Skip primitive if Z overflow - andi $v0, $a3, 0x3 - - NCLIP # Backface culling - - srl $v1, $a3, 12 - andi $v1, 1 - - bnez $v1, .no_culling - nop - - mfc2 $v1, C2_MAC0 - nop - bltz $v1, .skip_prim - nop - -.no_culling: - - beq $v0, 0x1, .prim_tri # If primitive is a triangle - nop - beq $v0, 0x2, .prim_quad # If primitive is a quad - nop - - b .skip_prim - nop - -## Triangles - -.prim_tri: # Triangle processing - - addiu $t4, 8 # Advance from indices - - AVSZ3 # Calculate average Z - - srl $v0, $t0, 16 # Get Z divisor from OT_LEN value - andi $v0, 0xff - - mfc2 $t5, C2_OTZ # Get AVSZ3 result - - sra $v1, $t0, 24 # Get Z offset from OT_LEN value - - srl $t5, $v0 # Apply divisor and offset - sub $t5, $v1 - - blez $t5, .skip_prim # Skip primitive if less than zero - andi $v1, $t0, 0xffff - bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length - sll $t5, 2 - addu $t5, $a0 # Append OTZ to OT address - - ClipTestTri - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw - and $v0, $s2, $s0 # v2 & v0 - beqz $v0, .do_draw - nop - b .skip_prim - nop - -.do_draw: - - - srl $v0, $a3, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, .F3_light - nop - - andi $v0, $a3, 0x10 # Gouraud shaded - bnez $v0, .F3_gouraud - nop - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_textured - nop - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2000 - or $v0, $v1 - - b .sort_F3_pri - sw $v0, POLYF3_rgbc( $a1 ) - -.F3_gouraud: - - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3000 - or $v0, $at - .set at - sw $v0, POLYG3_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG3_rgbc1($a1) - b .sort_G3_pri - sw $v0, POLYG3_rgbc2($a1) - -.F3_textured: - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2400 - or $v0, $v1 - sw $v0, POLYFT3_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v0, 8( $t4 ) # Tpage + CLUT - nop - andi $v1, $v0, 0xffff - sh $v1, POLYFT3_tpage( $a1 ) - srl $v0, 16 - - b .sort_FT3_pri - sh $v0, POLYFT3_clut( $a1 ) - -.F3_light: - - lhu $v0, 0( $t4 ) # Load normal 0 - - srl $v1, $a3, 2 - andi $v1, $v1, 0x3 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - beq $v1, 0x2, .F3_light_smt - nop - - lw $v0, 4( $t4 ) - lui $v1, 0x2000 - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 8 - nop - - NCCS - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_light_tex - nop - - swc2 C2_RGB2, POLYF3_rgbc( $a1 ) - - b .sort_F3_pri - nop - -.F3_light_tex: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT3_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT3_clut( $a1 ) - - mfc2 $v0, C2_RGB2 - lui $v1, 0x2400 - or $v0, $v1 - - b .sort_FT3_pri - sw $v0, POLYFT3_rgbc( $a1 ) - -.F3_light_smt: - - lhu $v0, 2( $t4 ) # Load normals 1 and 2 - lhu $v1, 4( $t4 ) - sll $v0, 3 - sll $v1, 3 - addu $v0, $t2 - addu $v1, $t2 - lwc2 C2_VXY1, 0( $v0 ) - lwc2 C2_VZ1 , 4( $v0 ) - lw $v0, 8( $t4 ) - lwc2 C2_VXY2, 0( $v1 ) - lwc2 C2_VZ2 , 4( $v1 ) - lui $v1, 0x3000 # Load color - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 12 - nop - - NCCT - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_light_tex_smt - nop - - swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) - swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) - swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) - - b .sort_G3_pri - nop - -.F3_light_tex_smt: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYGT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYGT3_uv1( $a1 ) - sh $v0, POLYGT3_uv2( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYGT3_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYGT3_clut( $a1 ) - - mfc2 $v0, C2_RGB0 - lui $v1, 0x3400 - or $v0, $v1 - - swc2 C2_RGB1, POLYGT3_rgbc1( $a1 ) - swc2 C2_RGB2, POLYGT3_rgbc2( $a1 ) - - b .sort_GT3_pri - sw $v0, POLYGT3_rgbc0( $a1 ) - -.sort_F3_pri: - - swc2 C2_SXY0, POLYF3_xy0($a1) - swc2 C2_SXY1, POLYF3_xy1($a1) - swc2 C2_SXY2, POLYF3_xy2($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0500 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF3_len - -.sort_FT3_pri: - - swc2 C2_SXY0, POLYFT3_xy0( $a1 ) - swc2 C2_SXY1, POLYFT3_xy1( $a1 ) - swc2 C2_SXY2, POLYFT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT3_len - -.sort_G3_pri: - - swc2 C2_SXY0, POLYG3_xy0( $a1 ) - swc2 C2_SXY1, POLYG3_xy1( $a1 ) - swc2 C2_SXY2, POLYG3_xy2( $a1 ) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG3_len - -.sort_GT3_pri: - - swc2 C2_SXY0, POLYGT3_xy0( $a1 ) - swc2 C2_SXY1, POLYGT3_xy1( $a1 ) - swc2 C2_SXY2, POLYGT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT3_len - -## Quads - -.prim_quad: # Quad processing - - mfc2 $t6, C2_SXY0 # Retrieve first projected vertex - - lhu $t5, 6( $t4 ) # Project the last vertex - addiu $t4, 8 - sll $t5, 3 - addu $t5, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - - nRTPS - - cfc2 $v1, C2_FLAG # Get GTE flag value - - srl $v0, $t0, 16 # Get Z divisor from OT_LEN value - - bltz $v1, .skip_prim - nop - - AVSZ4 - - andi $v0, 0xff - - mfc2 $t5, C2_OTZ - - sra $v1, $t0, 24 # Get Z offset from OT_LEN value - - srl $t5, $v0 # Apply divisor and offset - sub $t5, $v1 - - blez $t5, .skip_prim # Skip primitive if less than zero - andi $v1, $t0, 0xffff - bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length - sll $t5, 2 - addu $t5, $a0 # Append OTZ to OT address - - # no touch: - # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0) - - ClipTestQuad - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw_q - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw_q - and $v0, $s2, $s3 # v2 & v3 - beqz $v0, .do_draw_q - and $v0, $s3, $s0 # v3 & v0 - beqz $v0, .do_draw_q - and $v0, $s0, $s2 # v0 & v2 - beqz $v0, .do_draw_q - and $v0, $s1, $s3 # v1 & v3 - beqz $v0, .do_draw_q - nop - b .skip_prim - nop - -.do_draw_q: - - srl $v0, $a3, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, .F4_light - nop - - andi $v0, $a3, 0x10 # Gouraud quad - bnez $v0, .F4_gouraud - nop - - andi $v0, $a3, 0x20 # Textured quad - bnez $v0, .F4_textured - nop - - lw $v0, 0($t4) - lui $v1, 0x2800 - or $v0, $v1 - - b .sort_F4_pri - sw $v0, POLYF4_rgbc($a1) - -.F4_gouraud: - - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3800 - or $v0, $at - .set at - sw $v0, POLYG4_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG4_rgbc1($a1) - lw $v1, 12($t4) - sw $v0, POLYG4_rgbc2($a1) - b .sort_G4_pri - sw $v1, POLYG4_rgbc3($a1) - -.F4_textured: - - lw $v0, 0($t4) - lui $v1, 0x2c00 - or $v0, $v1 - sw $v0, POLYFT4_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0($t4) # Load texture coordinates - lhu $v1, 2($t4) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - - b .sort_FT4_pri - sh $v0, POLYFT4_clut($a1) - -.F4_light: - - lhu $v0, 0( $t4 ) # Load normal 0 - - srl $v1, $a3, 2 - andi $v1, $v1, 0x3 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - beq $v1, 0x2, .F4_light_smt - nop - - lw $v0, 4( $t4 ) - lui $v1, 0x2800 - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 8 - nop - - NCCS - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F4_light_tex - nop - - swc2 C2_RGB2, POLYF4_rgbc( $a1 ) - - b .sort_F4_pri - nop - -.F4_light_tex: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT4_clut( $a1 ) - - mfc2 $v0, C2_RGB2 - lui $v1, 0x2c00 - or $v0, $v1 - - b .sort_FT4_pri - sw $v0, POLYFT4_rgbc( $a1 ) - -.F4_light_smt: - - lhu $v0, 2( $t4 ) # Load normals 1 and 2 - lhu $v1, 4( $t4 ) - sll $v0, 3 - sll $v1, 3 - addu $v0, $t2 - addu $v1, $t2 - lwc2 C2_VXY1, 0( $v0 ) - lwc2 C2_VZ1 , 4( $v0 ) - lwc2 C2_VXY2, 0( $v1 ) - lwc2 C2_VZ2 , 4( $v1 ) - - lw $v0, 8( $t4 ) - lui $v1, 0x3800 # Load color - or $v0, $v1 - mtc2 $v0, C2_RGB - - nNCCT - - lhu $v0, 6( $t4 ) # Load normal 3 - - addiu $t4, 12 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F4_light_tex_smt - nop - - swc2 C2_RGB0, POLYG4_rgbc0( $a1 ) - swc2 C2_RGB1, POLYG4_rgbc1( $a1 ) - swc2 C2_RGB2, POLYG4_rgbc2( $a1 ) - - nNCCS - - swc2 C2_RGB2, POLYG4_rgbc3( $a1 ) - - b .sort_G4_pri - nop - -.F4_light_tex_smt: - - mfc2 $v0, C2_RGB0 - lui $v1, 0x3400 - or $v0, $v1 - sw $v0, POLYGT4_rgbc0( $a1 ) - swc2 C2_RGB1, POLYGT4_rgbc1( $a1 ) - swc2 C2_RGB2, POLYGT4_rgbc2( $a1 ) - - NCCS - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYGT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYGT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYGT4_uv2( $a1 ) - sh $v1, POLYGT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - swc2 C2_RGB2, POLYGT4_rgbc3( $a1 ) - - andi $v0, $v1, 0xffff - sh $v0, POLYGT4_tpage( $a1 ) - srl $v0, $v1, 16 - - b .sort_GT4_pri - sh $v0, POLYGT4_clut( $a1 ) - -.sort_F4_pri: - - sw $t6, POLYF4_xy0($a1) - swc2 C2_SXY0, POLYF4_xy1($a1) - swc2 C2_SXY1, POLYF4_xy2($a1) - swc2 C2_SXY2, POLYF4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0600 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF4_len - -.sort_FT4_pri: - - sw $t6, POLYFT4_xy0($a1) - swc2 C2_SXY0, POLYFT4_xy1($a1) - swc2 C2_SXY1, POLYFT4_xy2($a1) - swc2 C2_SXY2, POLYFT4_xy3($a1) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT4_len - -.sort_G4_pri: - - sw $t6, POLYG4_xy0($a1) - swc2 C2_SXY0, POLYG4_xy1($a1) - swc2 C2_SXY1, POLYG4_xy2($a1) - swc2 C2_SXY2, POLYG4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG4_len - -.sort_GT4_pri: - - sw $t6, POLYGT4_xy0($a1) - swc2 C2_SXY0, POLYGT4_xy1($a1) - swc2 C2_SXY1, POLYGT4_xy2($a1) - swc2 C2_SXY2, POLYGT4_xy3($a1) - - .set noat - - lui $v1, 0x0c00 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT4_len - -.skip_prim: - - b .sort_loop - nop - -.exit: - - lw $s0, 0( $sp ) - lw $s1, 4( $sp ) - lw $s2, 8( $sp ) - lw $s3, 12( $sp ) - addiu $sp, 16 - jr $ra - move $v0, $a1 - -.section .bss._smd_tpage_base, "w" -.comm _smd_tpage_base, 4, 4 - -.section .bss._sc_clip, "w" -.comm _sc_clip, 8, 4 diff --git a/libpsn00b/smd/smd_cel.S b/libpsn00b/smd/smd_cel.S new file mode 100644 index 0000000..e8cf911 --- /dev/null +++ b/libpsn00b/smd/smd_cel.S @@ -0,0 +1,1078 @@ +# PSn00bSDK .SMD model parser library +# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + +.set noreorder + +#include "gtereg.inc" +#include "inline_s.inc" +#include "smd/smd_s.inc" + +.section .text.smdSetCelTex, "ax", @progbits +.global smdSetCelTex +.type smdSetCelTex, @function + +smdSetCelTex: + # a0 - TPage value + # a1 - CLUT value + la $v0, _smd_cel_tpage + andi $a0, 0xffff + sll $a1, 16 + or $a0, $a1 + jr $ra + sw $a0, 0($v0) + +.section .text.smdSetCelParam, "ax", @progbits +.global smdSetCelParam +.type smdSetCelParam, @function + +smdSetCelParam: + # a0 - Shading texture U offset + # a1 - Shading texture V offset + # a2 - Shading primitive color + andi $a1, 0xff + sll $a1, 8 + andi $a0, 0xff + or $a0, $a1 + la $v0, _smd_cel_param + sw $a0, 0($v0) + la $v0, _smd_cel_col + lui $v1, 0x0200 + or $a3, $v1 + jr $ra + sw $a2, 0($v0) + +.section .text.smdSortModelCel, "ax", @progbits +.global smdSortModelCel +.type smdSortModelCel, @function + +smdSortModelCel: + # a0 - Pointer SC_OT structure + # a1 - Pointer to next primitive + # a2 - Pointer to SMD data address + # v0 - New pointer of primitive buffer (return) + + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + la $v0, _sc_clip + lw $t8, 0($v0) + lw $t9, 4($v0) + + lw $t0, OT_LEN($a0) + lw $a0, OT_ADDR($a0) + lw $t1, SMD_HEAD_PVERTS($a2) + lw $t2, SMD_HEAD_PNORMS($a2) + lw $t3, SMD_HEAD_PPRIMS($a2) + +.sort_loop: + + nop + lw $a3, 0($t3) # Get primitive ID word + move $t4, $t3 + + beqz $a3, .exit # Check if terminator (just zero) + addiu $t4, 4 + + lhu $t5, 0( $t4 ) # Load vertices + lhu $t6, 2( $t4 ) + lhu $t7, 4( $t4 ) + sll $t5, 3 + sll $t6, 3 + sll $t7, 3 + addu $t5, $t1 + addu $t6, $t1 + addu $t7, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + lwc2 C2_VXY1, 0( $t6 ) + lwc2 C2_VZ1 , 4( $t6 ) + lwc2 C2_VXY2, 0( $t7 ) + lwc2 C2_VZ2 , 4( $t7 ) + + srl $v1, $a3, 24 # Get primitive size + addu $t3, $v1 # Step main pointer to next primitive + + RTPT + + cfc2 $v0, C2_FLAG # Get GTE flag value + nop + + bltz $v0, .skip_prim # Skip primitive if Z overflow + andi $v0, $a3, 0x3 + + NCLIP # Backface culling + + srl $v1, $a3, 12 + andi $v1, 1 + + bnez $v1, .no_culling + nop + + mfc2 $v1, C2_MAC0 + nop + bltz $v1, .skip_prim + nop + +.no_culling: + + beq $v0, 0x1, .prim_tri # If primitive is a triangle + nop + beq $v0, 0x2, .prim_quad # If primitive is a quad + nop + + b .skip_prim + nop + +## Triangles + +.prim_tri: # Triangle processing + + addiu $t4, 8 # Advance from indices + + AVSZ3 # Calculate average Z + + srl $v0, $t0, 16 # Get Z divisor from OT_LEN value + andi $v0, 0xff + + mfc2 $t5, C2_OTZ # Get AVSZ3 result + + sra $v1, $t0, 24 # Get Z offset from OT_LEN value + + srl $t5, $v0 # Apply divisor and offset + sub $t5, $v1 + + blez $t5, .skip_prim # Skip primitive if less than zero + andi $v1, $t0, 0xffff + bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length + sll $t5, 2 + addu $t5, $a0 # Append OTZ to OT address + + ClipTestTri + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw + and $v0, $s2, $s0 # v2 & v0 + beqz $v0, .do_draw + nop + b .skip_prim + nop + +.do_draw: + + + srl $v0, $a3, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, .F3_light + nop + + andi $v0, $a3, 0x10 # Gouraud shaded + bnez $v0, .F3_gouraud + nop + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_textured + nop + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2000 + or $v0, $v1 + + b .sort_F3_pri + sw $v0, POLYF3_rgbc( $a1 ) + +.F3_gouraud: + + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3000 + or $v0, $at + .set at + sw $v0, POLYG3_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG3_rgbc1($a1) + b .sort_G3_pri + sw $v0, POLYG3_rgbc2($a1) + +.F3_textured: + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2400 + or $v0, $v1 + sw $v0, POLYFT3_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v0, 8( $t4 ) # Tpage + CLUT + nop + andi $v1, $v0, 0xffff + sh $v1, POLYFT3_tpage( $a1 ) + srl $v0, 16 + + b .sort_FT3_pri + sh $v0, POLYFT3_clut( $a1 ) + +.F3_light: + + lhu $v0, 0( $t4 ) # Load normal 0 + + srl $v1, $a3, 2 + andi $v1, $v1, 0x3 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + beq $v1, 0x2, .F3_light_smt + nop + + lw $v0, 4( $t4 ) + lui $v1, 0x2000 + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 8 + nop + + NCCS + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_light_tex + nop + + swc2 C2_RGB2, POLYF3_rgbc( $a1 ) + + b .sort_F3_pri + nop + +.F3_light_tex: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT3_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT3_clut( $a1 ) + + mfc2 $v0, C2_RGB2 + lui $v1, 0x2400 + or $v0, $v1 + + b .sort_FT3_pri + sw $v0, POLYFT3_rgbc( $a1 ) + +.F3_light_smt: + + lhu $v0, 2($t4) # Load normals 1 and 2 + lhu $v1, 4($t4) + sll $v0, 3 + sll $v1, 3 + addu $v0, $t2 + addu $v1, $t2 + lwc2 C2_VXY1, 0($v0) + lwc2 C2_VZ1 , 4($v0) + #la $v0, _smd_cel_col + #lw $v0, 0($v0) + lwc2 C2_VXY2, 0($v1) + lwc2 C2_VZ2 , 4($v1) + mtc2 $v0, C2_RGB + + swc2 C2_SXY0, POLYFT3_xy0($a1) + swc2 C2_SXY1, POLYFT3_xy1($a1) + swc2 C2_SXY2, POLYFT3_xy2($a1) + + la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values + lw $v0, 0($v0) + + NCT + + andi $v1, $v0, 0xffff + sh $v1, POLYFT3_tpage($a1) + srl $v1, $v0, 16 + sh $v1, POLYFT3_clut($a1) + + # Usable regs: v0, v1, at, t6, t7 + + .set noat + + la $at, _smd_cel_param # Load cel shader parameters + lhu $at, 0($at) + + mfc2 $t7, C2_RGB0 # Get first shaded color + andi $v1, $at, 0xff # Get U divisor value + andi $t7, 0xffff # Only keep R and G colors + + andi $v0, $t7, 0xff # U0 + srl $v0, $v1 + sb $v0, POLYFT3_uv0($a1) + srl $v0, $t7, 8 # V0 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT3_uv0+1($a1) + + mfc2 $t7, C2_RGB1 + andi $v1, $at, 0xff + andi $t7, 0xffff + andi $v0, $t7, 0xff # U1 + srl $v0, $v1 + sb $v0, POLYFT3_uv1($a1) + srl $v0, $t7, 8 # V1 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT3_uv1+1($a1) + + mfc2 $t7, C2_RGB2 + andi $v1, $at, 0xff + andi $t7, 0xffff + andi $v0, $t7, 0xff # U2 + srl $v0, $v1 + sb $v0, POLYFT3_uv2($a1) + srl $v0, $t7, 8 # V2 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT3_uv2+1($a1) + + la $v0, _smd_cel_col + lw $v0, 0($v0) + lui $v1, 0x2600 + or $v0, $v1 + sw $v0, POLYFT3_rgbc($a1) + + lw $t7, 8($t4) + addiu $t4, 12 + + lui $v1, 0x0700 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $v1, $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $v1 + sw $v0, 0($t5) + + lui $v0, 0x8000 + or $a1, $v0 + addiu $a1, POLYFT3_len + + .set at + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_light_tex_smt + nop + + #swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) + #swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) + #swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) + + lui $v0, 0x2000 + or $t7, $v0 + b .sort_F3_pri + sw $t7, POLYF3_rgbc($a1) + +.F3_light_tex_smt: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT3_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT3_clut( $a1 ) + + lui $v1, 0x2400 + or $t7, $v1 + + b .sort_FT3_pri + sw $t7, POLYFT3_rgbc( $a1 ) + +.sort_F3_pri: + + swc2 C2_SXY0, POLYF3_xy0($a1) + swc2 C2_SXY1, POLYF3_xy1($a1) + swc2 C2_SXY2, POLYF3_xy2($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0500 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF3_len + +.sort_FT3_pri: + + swc2 C2_SXY0, POLYFT3_xy0( $a1 ) + swc2 C2_SXY1, POLYFT3_xy1( $a1 ) + swc2 C2_SXY2, POLYFT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT3_len + +.sort_G3_pri: + + swc2 C2_SXY0, POLYG3_xy0( $a1 ) + swc2 C2_SXY1, POLYG3_xy1( $a1 ) + swc2 C2_SXY2, POLYG3_xy2( $a1 ) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG3_len + +.sort_GT3_pri: + + swc2 C2_SXY0, POLYGT3_xy0( $a1 ) + swc2 C2_SXY1, POLYGT3_xy1( $a1 ) + swc2 C2_SXY2, POLYGT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT3_len + +## Quads + +.prim_quad: # Quad processing + + mfc2 $t6, C2_SXY0 # Retrieve first projected vertex + + lhu $t5, 6( $t4 ) # Project the last vertex + addiu $t4, 8 + sll $t5, 3 + addu $t5, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + + nRTPS + + cfc2 $v1, C2_FLAG # Get GTE flag value + + srl $v0, $t0, 16 # Get Z divisor from OT_LEN value + + bltz $v1, .skip_prim + nop + + AVSZ4 + + andi $v0, 0xff + + mfc2 $t5, C2_OTZ + + sra $v1, $t0, 24 # Get Z offset from OT_LEN value + + srl $t5, $v0 # Apply divisor and offset + sub $t5, $v1 + + blez $t5, .skip_prim # Skip primitive if less than zero + andi $v1, $t0, 0xffff + bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length + sll $t5, 2 + addu $t5, $a0 # Append OTZ to OT address + + # no touch: + # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0) + + ClipTestQuad + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw_q + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw_q + and $v0, $s2, $s3 # v2 & v3 + beqz $v0, .do_draw_q + and $v0, $s3, $s0 # v3 & v0 + beqz $v0, .do_draw_q + and $v0, $s0, $s2 # v0 & v2 + beqz $v0, .do_draw_q + and $v0, $s1, $s3 # v1 & v3 + beqz $v0, .do_draw_q + nop + b .skip_prim + nop + +.do_draw_q: + + srl $v0, $a3, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, .F4_light + nop + + andi $v0, $a3, 0x10 # Gouraud quad + bnez $v0, .F4_gouraud + nop + + andi $v0, $a3, 0x20 # Textured quad + bnez $v0, .F4_textured + nop + + lw $v0, 0($t4) + lui $v1, 0x2800 + or $v0, $v1 + + b .sort_F4_pri + sw $v0, POLYF4_rgbc($a1) + +.F4_gouraud: + + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3800 + or $v0, $at + .set at + sw $v0, POLYG4_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG4_rgbc1($a1) + lw $v1, 12($t4) + sw $v0, POLYG4_rgbc2($a1) + b .sort_G4_pri + sw $v1, POLYG4_rgbc3($a1) + +.F4_textured: + + lw $v0, 0($t4) + lui $v1, 0x2c00 + or $v0, $v1 + sw $v0, POLYFT4_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0($t4) # Load texture coordinates + lhu $v1, 2($t4) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + + b .sort_FT4_pri + sh $v0, POLYFT4_clut($a1) + +.F4_light: + + lhu $v0, 0( $t4 ) # Load normal 0 + + srl $v1, $a3, 2 + andi $v1, $v1, 0x3 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + beq $v1, 0x2, .F4_light_smt + nop + + lw $v0, 4( $t4 ) + lui $v1, 0x2800 + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 8 + nop + + NCS + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F4_light_tex + nop + + swc2 C2_RGB2, POLYF4_rgbc( $a1 ) + + b .sort_F4_pri + nop + +.F4_light_tex: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT4_clut( $a1 ) + + mfc2 $v0, C2_RGB2 + lui $v1, 0x2c00 + or $v0, $v1 + + b .sort_FT4_pri + nop + sw $v0, POLYFT4_rgbc( $a1 ) + +.F4_light_smt: + + lhu $v0, 2( $t4 ) # Load normals 1 and 2 + lhu $v1, 4( $t4 ) + sll $v0, 3 + sll $v1, 3 + addu $v0, $t2 + addu $v1, $t2 + lwc2 C2_VXY1, 0( $v0 ) + lwc2 C2_VZ1 , 4( $v0 ) + lwc2 C2_VXY2, 0( $v1 ) + lwc2 C2_VZ2 , 4( $v1 ) + + sw $t6, POLYFT4_xy0($a1) + swc2 C2_SXY0, POLYFT4_xy1($a1) + swc2 C2_SXY1, POLYFT4_xy2($a1) + swc2 C2_SXY2, POLYFT4_xy3($a1) + + la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values + lw $v0, 0($v0) + + NCT + + andi $v1, $v0, 0xffff + sh $v1, POLYFT4_tpage($a1) + srl $v1, $v0, 16 + sh $v1, POLYFT4_clut($a1) + + # Usable regs: v0, v1, at, t7 + + .set noat + + la $at, _smd_cel_param # Load cel shader parameters + lhu $at, 0($at) + + mfc2 $t7, C2_RGB0 + andi $v1, $at, 0xff # Get U divisor value + andi $t7, 0xffff # Only keep R and G colors + + andi $v0, $t7, 0xff # U0 + srl $v0, $v1 + sb $v0, POLYFT4_uv0($a1) + srl $v0, $t7, 8 # V0 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT4_uv0+1($a1) + + mfc2 $t7, C2_RGB1 + andi $v1, $at, 0xff + andi $t7, 0xffff + andi $v0, $t7, 0xff # U1 + srl $v0, $v1 + sb $v0, POLYFT4_uv1($a1) + srl $v0, $t7, 8 # V1 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT4_uv1+1($a1) + + mfc2 $t7, C2_RGB2 + andi $v1, $at, 0xff + andi $t7, 0xffff + andi $v0, $t7, 0xff # U2 + srl $v0, $v1 + sb $v0, POLYFT4_uv2($a1) + srl $v0, $t7, 8 # V2 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT4_uv2+1($a1) + + la $v0, _smd_cel_col + lw $v0, 0($v0) + lui $v1, 0x2E00 + or $v0, $v1 + sw $v0, POLYFT4_rgbc($a1) + + lw $t7, 8($t4) + + lhu $v0, 6($t4) # Load normal 3 + addiu $t4, 12 + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + nNCS + + mfc2 $s0, C2_RGB2 + andi $v1, $at, 0xff + andi $s0, 0xffff + andi $v0, $s0, 0xff # U3 + srl $v0, $v1 + sb $v0, POLYFT4_uv3($a1) + srl $v0, $s0, 8 # V3 + srl $v1, $at, 8 + srl $v0, $v1 + sb $v0, POLYFT4_uv3+1($a1) + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $v1, $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $v1 + sw $v0, 0($t5) + + lui $v0, 0x8000 + or $a1, $v0 + addiu $a1, POLYFT4_len + + .set at + + andi $v0, $a3, 0x20 # Textured quad + bnez $v0, .F4_light_tex_smt + nop + + lui $v0, 0x2800 + or $t7, $v0 + b .sort_F4_pri + sw $t7, POLYF4_rgbc($a1) + +.F4_light_tex_smt: + + lhu $v0, 0($t4) # Load texture coordinates + lhu $v1, 2($t4) + sh $v0, POLYFT4_uv0($a1) + lhu $v0, 4($t4) + sh $v1, POLYFT4_uv1($a1) + lhu $v1, 6($t4) + sh $v0, POLYFT4_uv2($a1) + sh $v1, POLYFT4_uv3($a1) + + lw $v1, 8($t4) + + lui $v0, 0x2E00 + or $t7, $v0 + sw $t7, POLYFT4_rgbc($a1) + + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage($a1) + srl $v0, $v1, 16 + + b .sort_FT4_pri + sh $v0, POLYFT4_clut($a1) + +.sort_F4_pri: + + sw $t6, POLYF4_xy0($a1) + swc2 C2_SXY0, POLYF4_xy1($a1) + swc2 C2_SXY1, POLYF4_xy2($a1) + swc2 C2_SXY2, POLYF4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0600 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF4_len + +.sort_FT4_pri: + + sw $t6, POLYFT4_xy0($a1) + swc2 C2_SXY0, POLYFT4_xy1($a1) + swc2 C2_SXY1, POLYFT4_xy2($a1) + swc2 C2_SXY2, POLYFT4_xy3($a1) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT4_len + +.sort_G4_pri: + + sw $t6, POLYG4_xy0($a1) + swc2 C2_SXY0, POLYG4_xy1($a1) + swc2 C2_SXY1, POLYG4_xy2($a1) + swc2 C2_SXY2, POLYG4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG4_len + +.sort_GT4_pri: + + sw $t6, POLYGT4_xy0($a1) + swc2 C2_SXY0, POLYGT4_xy1($a1) + swc2 C2_SXY1, POLYGT4_xy2($a1) + swc2 C2_SXY2, POLYGT4_xy3($a1) + + .set noat + + lui $v1, 0x0c00 + lw $v0, 0($t5) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($t5) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($t5) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT4_len + +.skip_prim: + + b .sort_loop + nop + +.exit: + + lw $s0, 0( $sp ) + lw $s1, 4( $sp ) + lw $s2, 8( $sp ) + lw $s3, 12( $sp ) + addiu $sp, 16 + jr $ra + move $v0, $a1 + +.section .bss._smd_cel_col, "w" +.comm _smd_cel_col, 4, 4 # STP shading polygon color + +.section .bss._smd_cel_param, "w" +.comm _smd_cel_param, 4, 4 # U divisor, V divisor, shading clip + +.section .bss._smd_cel_tpage, "w" +.comm _smd_cel_tpage, 4, 4 # CEL shader texture page & CLUT diff --git a/libpsn00b/smd/smd_cel.s b/libpsn00b/smd/smd_cel.s deleted file mode 100644 index 8a39f01..0000000 --- a/libpsn00b/smd/smd_cel.s +++ /dev/null @@ -1,1078 +0,0 @@ -# PSn00bSDK .SMD model parser library -# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed - -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" -.include "smd/smd_s.inc" - -.section .text.smdSetCelTex, "ax", @progbits -.global smdSetCelTex -.type smdSetCelTex, @function - -smdSetCelTex: - # a0 - TPage value - # a1 - CLUT value - la $v0, _smd_cel_tpage - andi $a0, 0xffff - sll $a1, 16 - or $a0, $a1 - jr $ra - sw $a0, 0($v0) - -.section .text.smdSetCelParam, "ax", @progbits -.global smdSetCelParam -.type smdSetCelParam, @function - -smdSetCelParam: - # a0 - Shading texture U offset - # a1 - Shading texture V offset - # a2 - Shading primitive color - andi $a1, 0xff - sll $a1, 8 - andi $a0, 0xff - or $a0, $a1 - la $v0, _smd_cel_param - sw $a0, 0($v0) - la $v0, _smd_cel_col - lui $v1, 0x0200 - or $a3, $v1 - jr $ra - sw $a2, 0($v0) - -.section .text.smdSortModelCel, "ax", @progbits -.global smdSortModelCel -.type smdSortModelCel, @function - -smdSortModelCel: - # a0 - Pointer SC_OT structure - # a1 - Pointer to next primitive - # a2 - Pointer to SMD data address - # v0 - New pointer of primitive buffer (return) - - addiu $sp, -16 - sw $s0, 0($sp) - sw $s1, 4($sp) - sw $s2, 8($sp) - sw $s3, 12($sp) - - la $v0, _sc_clip - lw $t8, 0($v0) - lw $t9, 4($v0) - - lw $t0, OT_LEN($a0) - lw $a0, OT_ADDR($a0) - lw $t1, SMD_HEAD_PVERTS($a2) - lw $t2, SMD_HEAD_PNORMS($a2) - lw $t3, SMD_HEAD_PPRIMS($a2) - -.sort_loop: - - nop - lw $a3, 0($t3) # Get primitive ID word - move $t4, $t3 - - beqz $a3, .exit # Check if terminator (just zero) - addiu $t4, 4 - - lhu $t5, 0( $t4 ) # Load vertices - lhu $t6, 2( $t4 ) - lhu $t7, 4( $t4 ) - sll $t5, 3 - sll $t6, 3 - sll $t7, 3 - addu $t5, $t1 - addu $t6, $t1 - addu $t7, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - lwc2 C2_VXY1, 0( $t6 ) - lwc2 C2_VZ1 , 4( $t6 ) - lwc2 C2_VXY2, 0( $t7 ) - lwc2 C2_VZ2 , 4( $t7 ) - - srl $v1, $a3, 24 # Get primitive size - addu $t3, $v1 # Step main pointer to next primitive - - RTPT - - cfc2 $v0, C2_FLAG # Get GTE flag value - nop - - bltz $v0, .skip_prim # Skip primitive if Z overflow - andi $v0, $a3, 0x3 - - NCLIP # Backface culling - - srl $v1, $a3, 12 - andi $v1, 1 - - bnez $v1, .no_culling - nop - - mfc2 $v1, C2_MAC0 - nop - bltz $v1, .skip_prim - nop - -.no_culling: - - beq $v0, 0x1, .prim_tri # If primitive is a triangle - nop - beq $v0, 0x2, .prim_quad # If primitive is a quad - nop - - b .skip_prim - nop - -## Triangles - -.prim_tri: # Triangle processing - - addiu $t4, 8 # Advance from indices - - AVSZ3 # Calculate average Z - - srl $v0, $t0, 16 # Get Z divisor from OT_LEN value - andi $v0, 0xff - - mfc2 $t5, C2_OTZ # Get AVSZ3 result - - sra $v1, $t0, 24 # Get Z offset from OT_LEN value - - srl $t5, $v0 # Apply divisor and offset - sub $t5, $v1 - - blez $t5, .skip_prim # Skip primitive if less than zero - andi $v1, $t0, 0xffff - bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length - sll $t5, 2 - addu $t5, $a0 # Append OTZ to OT address - - ClipTestTri - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw - and $v0, $s2, $s0 # v2 & v0 - beqz $v0, .do_draw - nop - b .skip_prim - nop - -.do_draw: - - - srl $v0, $a3, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, .F3_light - nop - - andi $v0, $a3, 0x10 # Gouraud shaded - bnez $v0, .F3_gouraud - nop - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_textured - nop - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2000 - or $v0, $v1 - - b .sort_F3_pri - sw $v0, POLYF3_rgbc( $a1 ) - -.F3_gouraud: - - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3000 - or $v0, $at - .set at - sw $v0, POLYG3_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG3_rgbc1($a1) - b .sort_G3_pri - sw $v0, POLYG3_rgbc2($a1) - -.F3_textured: - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2400 - or $v0, $v1 - sw $v0, POLYFT3_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v0, 8( $t4 ) # Tpage + CLUT - nop - andi $v1, $v0, 0xffff - sh $v1, POLYFT3_tpage( $a1 ) - srl $v0, 16 - - b .sort_FT3_pri - sh $v0, POLYFT3_clut( $a1 ) - -.F3_light: - - lhu $v0, 0( $t4 ) # Load normal 0 - - srl $v1, $a3, 2 - andi $v1, $v1, 0x3 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - beq $v1, 0x2, .F3_light_smt - nop - - lw $v0, 4( $t4 ) - lui $v1, 0x2000 - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 8 - nop - - NCCS - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_light_tex - nop - - swc2 C2_RGB2, POLYF3_rgbc( $a1 ) - - b .sort_F3_pri - nop - -.F3_light_tex: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT3_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT3_clut( $a1 ) - - mfc2 $v0, C2_RGB2 - lui $v1, 0x2400 - or $v0, $v1 - - b .sort_FT3_pri - sw $v0, POLYFT3_rgbc( $a1 ) - -.F3_light_smt: - - lhu $v0, 2($t4) # Load normals 1 and 2 - lhu $v1, 4($t4) - sll $v0, 3 - sll $v1, 3 - addu $v0, $t2 - addu $v1, $t2 - lwc2 C2_VXY1, 0($v0) - lwc2 C2_VZ1 , 4($v0) - #la $v0, _smd_cel_col - #lw $v0, 0($v0) - lwc2 C2_VXY2, 0($v1) - lwc2 C2_VZ2 , 4($v1) - mtc2 $v0, C2_RGB - - swc2 C2_SXY0, POLYFT3_xy0($a1) - swc2 C2_SXY1, POLYFT3_xy1($a1) - swc2 C2_SXY2, POLYFT3_xy2($a1) - - la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values - lw $v0, 0($v0) - - NCT - - andi $v1, $v0, 0xffff - sh $v1, POLYFT3_tpage($a1) - srl $v1, $v0, 16 - sh $v1, POLYFT3_clut($a1) - - # Usable regs: v0, v1, at, t6, t7 - - .set noat - - la $at, _smd_cel_param # Load cel shader parameters - lhu $at, 0($at) - - mfc2 $t7, C2_RGB0 # Get first shaded color - andi $v1, $at, 0xff # Get U divisor value - andi $t7, 0xffff # Only keep R and G colors - - andi $v0, $t7, 0xff # U0 - srl $v0, $v1 - sb $v0, POLYFT3_uv0($a1) - srl $v0, $t7, 8 # V0 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT3_uv0+1($a1) - - mfc2 $t7, C2_RGB1 - andi $v1, $at, 0xff - andi $t7, 0xffff - andi $v0, $t7, 0xff # U1 - srl $v0, $v1 - sb $v0, POLYFT3_uv1($a1) - srl $v0, $t7, 8 # V1 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT3_uv1+1($a1) - - mfc2 $t7, C2_RGB2 - andi $v1, $at, 0xff - andi $t7, 0xffff - andi $v0, $t7, 0xff # U2 - srl $v0, $v1 - sb $v0, POLYFT3_uv2($a1) - srl $v0, $t7, 8 # V2 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT3_uv2+1($a1) - - la $v0, _smd_cel_col - lw $v0, 0($v0) - lui $v1, 0x2600 - or $v0, $v1 - sw $v0, POLYFT3_rgbc($a1) - - lw $t7, 8($t4) - addiu $t4, 12 - - lui $v1, 0x0700 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $v1, $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $v1 - sw $v0, 0($t5) - - lui $v0, 0x8000 - or $a1, $v0 - addiu $a1, POLYFT3_len - - .set at - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_light_tex_smt - nop - - #swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) - #swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) - #swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) - - lui $v0, 0x2000 - or $t7, $v0 - b .sort_F3_pri - sw $t7, POLYF3_rgbc($a1) - -.F3_light_tex_smt: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT3_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT3_clut( $a1 ) - - lui $v1, 0x2400 - or $t7, $v1 - - b .sort_FT3_pri - sw $t7, POLYFT3_rgbc( $a1 ) - -.sort_F3_pri: - - swc2 C2_SXY0, POLYF3_xy0($a1) - swc2 C2_SXY1, POLYF3_xy1($a1) - swc2 C2_SXY2, POLYF3_xy2($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0500 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF3_len - -.sort_FT3_pri: - - swc2 C2_SXY0, POLYFT3_xy0( $a1 ) - swc2 C2_SXY1, POLYFT3_xy1( $a1 ) - swc2 C2_SXY2, POLYFT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT3_len - -.sort_G3_pri: - - swc2 C2_SXY0, POLYG3_xy0( $a1 ) - swc2 C2_SXY1, POLYG3_xy1( $a1 ) - swc2 C2_SXY2, POLYG3_xy2( $a1 ) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG3_len - -.sort_GT3_pri: - - swc2 C2_SXY0, POLYGT3_xy0( $a1 ) - swc2 C2_SXY1, POLYGT3_xy1( $a1 ) - swc2 C2_SXY2, POLYGT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT3_len - -## Quads - -.prim_quad: # Quad processing - - mfc2 $t6, C2_SXY0 # Retrieve first projected vertex - - lhu $t5, 6( $t4 ) # Project the last vertex - addiu $t4, 8 - sll $t5, 3 - addu $t5, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - - nRTPS - - cfc2 $v1, C2_FLAG # Get GTE flag value - - srl $v0, $t0, 16 # Get Z divisor from OT_LEN value - - bltz $v1, .skip_prim - nop - - AVSZ4 - - andi $v0, 0xff - - mfc2 $t5, C2_OTZ - - sra $v1, $t0, 24 # Get Z offset from OT_LEN value - - srl $t5, $v0 # Apply divisor and offset - sub $t5, $v1 - - blez $t5, .skip_prim # Skip primitive if less than zero - andi $v1, $t0, 0xffff - bge $t5, $v1, .skip_prim # Skip primitive if greater than OT length - sll $t5, 2 - addu $t5, $a0 # Append OTZ to OT address - - # no touch: - # a0, a1, a2, a3, t0, t1, t2, t3, t4, t5(ot), t6(sxy0) - - ClipTestQuad - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw_q - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw_q - and $v0, $s2, $s3 # v2 & v3 - beqz $v0, .do_draw_q - and $v0, $s3, $s0 # v3 & v0 - beqz $v0, .do_draw_q - and $v0, $s0, $s2 # v0 & v2 - beqz $v0, .do_draw_q - and $v0, $s1, $s3 # v1 & v3 - beqz $v0, .do_draw_q - nop - b .skip_prim - nop - -.do_draw_q: - - srl $v0, $a3, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, .F4_light - nop - - andi $v0, $a3, 0x10 # Gouraud quad - bnez $v0, .F4_gouraud - nop - - andi $v0, $a3, 0x20 # Textured quad - bnez $v0, .F4_textured - nop - - lw $v0, 0($t4) - lui $v1, 0x2800 - or $v0, $v1 - - b .sort_F4_pri - sw $v0, POLYF4_rgbc($a1) - -.F4_gouraud: - - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3800 - or $v0, $at - .set at - sw $v0, POLYG4_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG4_rgbc1($a1) - lw $v1, 12($t4) - sw $v0, POLYG4_rgbc2($a1) - b .sort_G4_pri - sw $v1, POLYG4_rgbc3($a1) - -.F4_textured: - - lw $v0, 0($t4) - lui $v1, 0x2c00 - or $v0, $v1 - sw $v0, POLYFT4_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0($t4) # Load texture coordinates - lhu $v1, 2($t4) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - - b .sort_FT4_pri - sh $v0, POLYFT4_clut($a1) - -.F4_light: - - lhu $v0, 0( $t4 ) # Load normal 0 - - srl $v1, $a3, 2 - andi $v1, $v1, 0x3 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - beq $v1, 0x2, .F4_light_smt - nop - - lw $v0, 4( $t4 ) - lui $v1, 0x2800 - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 8 - nop - - NCS - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F4_light_tex - nop - - swc2 C2_RGB2, POLYF4_rgbc( $a1 ) - - b .sort_F4_pri - nop - -.F4_light_tex: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT4_clut( $a1 ) - - mfc2 $v0, C2_RGB2 - lui $v1, 0x2c00 - or $v0, $v1 - - b .sort_FT4_pri - nop - sw $v0, POLYFT4_rgbc( $a1 ) - -.F4_light_smt: - - lhu $v0, 2( $t4 ) # Load normals 1 and 2 - lhu $v1, 4( $t4 ) - sll $v0, 3 - sll $v1, 3 - addu $v0, $t2 - addu $v1, $t2 - lwc2 C2_VXY1, 0( $v0 ) - lwc2 C2_VZ1 , 4( $v0 ) - lwc2 C2_VXY2, 0( $v1 ) - lwc2 C2_VZ2 , 4( $v1 ) - - sw $t6, POLYFT4_xy0($a1) - swc2 C2_SXY0, POLYFT4_xy1($a1) - swc2 C2_SXY1, POLYFT4_xy2($a1) - swc2 C2_SXY2, POLYFT4_xy3($a1) - - la $v0, _smd_cel_tpage # Load cel shader TPage and CLUT values - lw $v0, 0($v0) - - NCT - - andi $v1, $v0, 0xffff - sh $v1, POLYFT4_tpage($a1) - srl $v1, $v0, 16 - sh $v1, POLYFT4_clut($a1) - - # Usable regs: v0, v1, at, t7 - - .set noat - - la $at, _smd_cel_param # Load cel shader parameters - lhu $at, 0($at) - - mfc2 $t7, C2_RGB0 - andi $v1, $at, 0xff # Get U divisor value - andi $t7, 0xffff # Only keep R and G colors - - andi $v0, $t7, 0xff # U0 - srl $v0, $v1 - sb $v0, POLYFT4_uv0($a1) - srl $v0, $t7, 8 # V0 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT4_uv0+1($a1) - - mfc2 $t7, C2_RGB1 - andi $v1, $at, 0xff - andi $t7, 0xffff - andi $v0, $t7, 0xff # U1 - srl $v0, $v1 - sb $v0, POLYFT4_uv1($a1) - srl $v0, $t7, 8 # V1 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT4_uv1+1($a1) - - mfc2 $t7, C2_RGB2 - andi $v1, $at, 0xff - andi $t7, 0xffff - andi $v0, $t7, 0xff # U2 - srl $v0, $v1 - sb $v0, POLYFT4_uv2($a1) - srl $v0, $t7, 8 # V2 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT4_uv2+1($a1) - - la $v0, _smd_cel_col - lw $v0, 0($v0) - lui $v1, 0x2E00 - or $v0, $v1 - sw $v0, POLYFT4_rgbc($a1) - - lw $t7, 8($t4) - - lhu $v0, 6($t4) # Load normal 3 - addiu $t4, 12 - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - nNCS - - mfc2 $s0, C2_RGB2 - andi $v1, $at, 0xff - andi $s0, 0xffff - andi $v0, $s0, 0xff # U3 - srl $v0, $v1 - sb $v0, POLYFT4_uv3($a1) - srl $v0, $s0, 8 # V3 - srl $v1, $at, 8 - srl $v0, $v1 - sb $v0, POLYFT4_uv3+1($a1) - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $v1, $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $v1 - sw $v0, 0($t5) - - lui $v0, 0x8000 - or $a1, $v0 - addiu $a1, POLYFT4_len - - .set at - - andi $v0, $a3, 0x20 # Textured quad - bnez $v0, .F4_light_tex_smt - nop - - lui $v0, 0x2800 - or $t7, $v0 - b .sort_F4_pri - sw $t7, POLYF4_rgbc($a1) - -.F4_light_tex_smt: - - lhu $v0, 0($t4) # Load texture coordinates - lhu $v1, 2($t4) - sh $v0, POLYFT4_uv0($a1) - lhu $v0, 4($t4) - sh $v1, POLYFT4_uv1($a1) - lhu $v1, 6($t4) - sh $v0, POLYFT4_uv2($a1) - sh $v1, POLYFT4_uv3($a1) - - lw $v1, 8($t4) - - lui $v0, 0x2E00 - or $t7, $v0 - sw $t7, POLYFT4_rgbc($a1) - - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage($a1) - srl $v0, $v1, 16 - - b .sort_FT4_pri - sh $v0, POLYFT4_clut($a1) - -.sort_F4_pri: - - sw $t6, POLYF4_xy0($a1) - swc2 C2_SXY0, POLYF4_xy1($a1) - swc2 C2_SXY1, POLYF4_xy2($a1) - swc2 C2_SXY2, POLYF4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0600 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF4_len - -.sort_FT4_pri: - - sw $t6, POLYFT4_xy0($a1) - swc2 C2_SXY0, POLYFT4_xy1($a1) - swc2 C2_SXY1, POLYFT4_xy2($a1) - swc2 C2_SXY2, POLYFT4_xy3($a1) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT4_len - -.sort_G4_pri: - - sw $t6, POLYG4_xy0($a1) - swc2 C2_SXY0, POLYG4_xy1($a1) - swc2 C2_SXY1, POLYG4_xy2($a1) - swc2 C2_SXY2, POLYG4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG4_len - -.sort_GT4_pri: - - sw $t6, POLYGT4_xy0($a1) - swc2 C2_SXY0, POLYGT4_xy1($a1) - swc2 C2_SXY1, POLYGT4_xy2($a1) - swc2 C2_SXY2, POLYGT4_xy3($a1) - - .set noat - - lui $v1, 0x0c00 - lw $v0, 0($t5) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($t5) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($t5) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT4_len - -.skip_prim: - - b .sort_loop - nop - -.exit: - - lw $s0, 0( $sp ) - lw $s1, 4( $sp ) - lw $s2, 8( $sp ) - lw $s3, 12( $sp ) - addiu $sp, 16 - jr $ra - move $v0, $a1 - -.section .bss._smd_cel_col, "w" -.comm _smd_cel_col, 4, 4 # STP shading polygon color - -.section .bss._smd_cel_param, "w" -.comm _smd_cel_param, 4, 4 # U divisor, V divisor, shading clip - -.section .bss._smd_cel_tpage, "w" -.comm _smd_cel_tpage, 4, 4 # CEL shader texture page & CLUT diff --git a/libpsn00b/smd/smd_flat.S b/libpsn00b/smd/smd_flat.S new file mode 100644 index 0000000..b9235e9 --- /dev/null +++ b/libpsn00b/smd/smd_flat.S @@ -0,0 +1,833 @@ +# PSn00bSDK .SMD model parser library +# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + +.set noreorder + +#include "gtereg.inc" +#include "inline_s.inc" +#include "smd/smd_s.inc" + +.section .text.smdSortModelFlat, "ax", @progbits +.global smdSortModelFlat +.type smdSortModelFlat, @function + +smdSortModelFlat: + # a0 - Pointer SC_OT structure + # a1 - Pointer to next primitive + # a2 - Pointer to SMD data address + # v0 - New pointer of primitive buffer (return) + + addiu $sp, -16 + sw $s0, 0( $sp ) + sw $s1, 4( $sp ) + sw $s2, 8( $sp ) + sw $s3, 12( $sp ) + + la $v0, _sc_clip + lw $t8, 0($v0) + lw $t9, 4($v0) + + lw $t1, SMD_HEAD_PVERTS( $a2 ) + lw $t2, SMD_HEAD_PNORMS( $a2 ) + lw $t3, SMD_HEAD_PPRIMS( $a2 ) + +.sort_loop: + + nop + lw $a3, 0($t3) # Get primitive ID word + move $t4, $t3 + + beqz $a3, .exit # Check if terminator (just zero) + addiu $t4, 4 + + lhu $t5, 0( $t4 ) # Load vertices + lhu $t6, 2( $t4 ) + lhu $t7, 4( $t4 ) + sll $t5, 3 + sll $t6, 3 + sll $t7, 3 + addu $t5, $t1 + addu $t6, $t1 + addu $t7, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + lwc2 C2_VXY1, 0( $t6 ) + lwc2 C2_VZ1 , 4( $t6 ) + lwc2 C2_VXY2, 0( $t7 ) + lwc2 C2_VZ2 , 4( $t7 ) + + srl $v1, $a3, 24 # Get primitive size + addu $t3, $v1 # Step main pointer to next primitive + + RTPT + + cfc2 $v0, C2_FLAG # Get GTE flag value + nop + + bltz $v0, .skip_prim # Skip primitive if Z overflow + nop + + #NCLIP # Backface culling + + #mfc2 $v1, C2_MAC0 + + andi $v0, $a3, 0x3 + + #bltz $v1, .skip_prim + #nop + + beq $v0, 0x1, .prim_tri # If primitive is a triangle + nop + beq $v0, 0x2, .prim_quad # If primitive is a quad + nop + + b .skip_prim + nop + +## Triangles + +.prim_tri: # Triangle processing + + addiu $t4, 8 # Advance from indices + + #AVSZ3 # Calculate average Z + + ClipTestTri + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw + and $v0, $s2, $s0 # v2 & v0 + beqz $v0, .do_draw + nop + b .skip_prim + nop + +.do_draw: + + #srl $v0, $a3, 2 # Lighting enabled? + #andi $v0, 0x3 + #bnez $v0, .F3_light + #nop + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F3_textured + nop + + andi $v0, $a3, 0x10 # Gouraud shaded + bnez $v0, .F3_gouraud + nop + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2000 + or $v0, $v1 + + b .sort_F3_pri + sw $v0, POLYF3_rgbc( $a1 ) + +.F3_textured: + + lw $v0, 0( $t4 ) # Flat color, no lighting + lui $v1, 0x2400 + or $v0, $v1 + sw $v0, POLYFT3_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT3_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT3_uv1( $a1 ) + sh $v0, POLYFT3_uv2( $a1 ) + + lw $v0, 8( $t4 ) # Tpage + CLUT + nop + andi $v1, $v0, 0xffff + sh $v1, POLYFT3_tpage( $a1 ) + srl $v0, 16 + + b .sort_FT3_pri + sh $v0, POLYFT3_clut( $a1 ) + +.F3_gouraud: + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3000 + or $v0, $at + .set at + sw $v0, POLYG3_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG3_rgbc1($a1) + b .sort_G3_pri + sw $v0, POLYG3_rgbc2($a1) + +#.F3_light: + + #lhu $v0, 0( $t4 ) # Load normal 0 + + #srl $v1, $a3, 2 + #andi $v1, $v1, 0x3 + + #sll $v0, 3 + #addu $v0, $t2 + #lwc2 C2_VXY0, 0( $v0 ) + #lwc2 C2_VZ0 , 4( $v0 ) + + #beq $v1, 0x2, .F3_light_smt + #nop + + #lw $v0, 4( $t4 ) + #lui $v1, 0x2000 + #or $v0, $v1 + #mtc2 $v0, C2_RGB + + #addiu $t4, 8 + #nop + + #NCCS + + #andi $v0, $a3, 0x20 # Textured triangle + #bnez $v0, .F3_light_tex + #nop + + #swc2 C2_RGB2, POLYF3_rgbc( $a1 ) + + #b .sort_F3_pri + #nop + +#.F3_light_tex: + + #lhu $v0, 0( $t4 ) # Load texture coordinates + #lhu $v1, 2( $t4 ) + #sh $v0, POLYFT3_uv0( $a1 ) + #lhu $v0, 4( $t4 ) + #sh $v1, POLYFT3_uv1( $a1 ) + #sh $v0, POLYFT3_uv2( $a1 ) + + #lw $v1, 8( $t4 ) + #nop + #andi $v0, $v1, 0xffff + #sh $v0, POLYFT3_tpage( $a1 ) + #srl $v0, $v1, 16 + #sh $v0, POLYFT3_clut( $a1 ) + + #mfc2 $v0, C2_RGB2 + #lui $v1, 0x2400 + #or $v0, $v1 + + #b .sort_FT3_pri + #sw $v0, POLYFT3_rgbc( $a1 ) + +#.F3_light_smt: + + #lhu $v0, 2( $t4 ) # Load normals 1 and 2 + #lhu $v1, 4( $t4 ) + #sll $v0, 3 + #sll $v1, 3 + #addu $v0, $t2 + #addu $v1, $t2 + #lwc2 C2_VXY1, 0( $v0 ) + #lwc2 C2_VZ1 , 4( $v0 ) + #lw $v0, 8( $t4 ) + #lwc2 C2_VXY2, 0( $v1 ) + #lwc2 C2_VZ2 , 4( $v1 ) + #lui $v1, 0x3000 # Load color + #or $v0, $v1 + #mtc2 $v0, C2_RGB + + #addiu $t4, 12 + #nop + + #NCCT + + #andi $v0, $a3, 0x20 # Textured triangle + #bnez $v0, .F3_light_tex_smt + #nop + + #swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) + #swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) + #swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) + + #b .sort_G3_pri + #nop + +# .F3_light_tex_smt: + + # lhu $v0, 0( $t4 ) # Load texture coordinates + # lhu $v1, 2( $t4 ) + # sh $v0, POLYGT3_uv0( $a1 ) + # lhu $v0, 4( $t4 ) + # sh $v1, POLYGT3_uv1( $a1 ) + # sh $v0, POLYGT3_uv2( $a1 ) + + # lw $v1, 8( $t4 ) + # nop + # andi $v0, $v1, 0xffff + # sh $v0, POLYGT3_tpage( $a1 ) + # srl $v0, $v1, 16 + # sh $v0, POLYGT3_clut( $a1 ) + + # mfc2 $v0, C2_RGB0 + # lui $v1, 0x3400 + # or $v0, $v1 + + # swc2 C2_RGB1, POLYGT3_rgbc1( $a1 ) + # swc2 C2_RGB2, POLYGT3_rgbc2( $a1 ) + + # b .sort_GT3_pri + # sw $v0, POLYGT3_rgbc0( $a1 ) + +.sort_F3_pri: + + swc2 C2_SXY0, POLYF3_xy0($a1) + swc2 C2_SXY1, POLYF3_xy1($a1) + swc2 C2_SXY2, POLYF3_xy2($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0500 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF3_len + +.sort_FT3_pri: + + swc2 C2_SXY0, POLYFT3_xy0( $a1 ) + swc2 C2_SXY1, POLYFT3_xy1( $a1 ) + swc2 C2_SXY2, POLYFT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT3_len + +.sort_G3_pri: + + swc2 C2_SXY0, POLYG3_xy0( $a1 ) + swc2 C2_SXY1, POLYG3_xy1( $a1 ) + swc2 C2_SXY2, POLYG3_xy2( $a1 ) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG3_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0700 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG3_len + +.sort_GT3_pri: + + swc2 C2_SXY0, POLYGT3_xy0( $a1 ) + swc2 C2_SXY1, POLYGT3_xy1( $a1 ) + swc2 C2_SXY2, POLYGT3_xy2( $a1 ) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT3_len + +## Quads + +.prim_quad: # Quad processing + + mfc2 $t6, C2_SXY0 # Retrieve first projected vertex + + lhu $t5, 6( $t4 ) # Project the last vertex + addiu $t4, 8 + sll $t5, 3 + addu $t5, $t1 + lwc2 C2_VXY0, 0( $t5 ) + lwc2 C2_VZ0 , 4( $t5 ) + + nRTPS + + cfc2 $v1, C2_FLAG # Get GTE flag value + + nop + + bltz $v1, .skip_prim + nop + + ClipTestQuad + + and $v0, $s0, $s1 # v0 & v1 + beqz $v0, .do_draw_q + and $v0, $s1, $s2 # v1 & v2 + beqz $v0, .do_draw_q + and $v0, $s2, $s3 # v2 & v3 + beqz $v0, .do_draw_q + and $v0, $s3, $s0 # v3 & v0 + beqz $v0, .do_draw_q + and $v0, $s0, $s2 # v0 & v2 + beqz $v0, .do_draw_q + and $v0, $s1, $s3 # v1 & v3 + beqz $v0, .do_draw_q + nop + b .skip_prim + nop + +.do_draw_q: + + srl $v0, $a3, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, .F4_light + nop + + andi $v0, $a3, 0x10 # Gouraud quad + bnez $v0, .F4_gouraud + nop + + andi $v0, $a3, 0x20 # Textured quad + bnez $v0, .F4_textured + nop + + lw $v0, 0($t4) + lui $v1, 0x2800 + or $v0, $v1 + + b .sort_F4_pri + sw $v0, POLYF4_rgbc($a1) + +.F4_textured: + + lw $v0, 0($t4) + lui $v1, 0x2c00 + or $v0, $v1 + sw $v0, POLYFT4_rgbc( $a1 ) + addiu $t4, 4 + + lhu $v0, 0($t4) # Load texture coordinates + lhu $v1, 2($t4) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + + b .sort_FT4_pri + sh $v0, POLYFT4_clut($a1) + +.F4_gouraud: + + lw $v0, 0($t4) + lw $v1, 4($t4) + .set noat + lui $at, 0x3800 + or $v0, $at + .set at + sw $v0, POLYG4_rgbc0($a1) + lw $v0, 8($t4) + sw $v1, POLYG4_rgbc1($a1) + lw $v1, 12($t4) + sw $v0, POLYG4_rgbc2($a1) + b .sort_G4_pri + sw $v1, POLYG4_rgbc3($a1) + + +.F4_light: + + lhu $v0, 0( $t4 ) # Load normal 0 + + srl $v1, $a3, 2 + andi $v1, $v1, 0x3 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + beq $v1, 0x2, .F4_light_smt + nop + + lw $v0, 4( $t4 ) + lui $v1, 0x2800 + or $v0, $v1 + mtc2 $v0, C2_RGB + + addiu $t4, 8 + nop + + NCCS + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F4_light_tex + nop + + swc2 C2_RGB2, POLYF4_rgbc( $a1 ) + + b .sort_F4_pri + nop + +.F4_light_tex: + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYFT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYFT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYFT4_uv2( $a1 ) + sh $v1, POLYFT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + nop + andi $v0, $v1, 0xffff + sh $v0, POLYFT4_tpage( $a1 ) + srl $v0, $v1, 16 + sh $v0, POLYFT4_clut( $a1 ) + + mfc2 $v0, C2_RGB2 + lui $v1, 0x2c00 + or $v0, $v1 + + b .sort_FT4_pri + sw $v0, POLYFT4_rgbc( $a1 ) + +.F4_light_smt: + + lhu $v0, 2( $t4 ) # Load normals 1 and 2 + lhu $v1, 4( $t4 ) + sll $v0, 3 + sll $v1, 3 + addu $v0, $t2 + addu $v1, $t2 + lwc2 C2_VXY1, 0( $v0 ) + lwc2 C2_VZ1 , 4( $v0 ) + lwc2 C2_VXY2, 0( $v1 ) + lwc2 C2_VZ2 , 4( $v1 ) + + lw $v0, 8( $t4 ) + lui $v1, 0x3800 # Load color + or $v0, $v1 + mtc2 $v0, C2_RGB + + nNCCT + + lhu $v0, 6( $t4 ) # Load normal 3 + + addiu $t4, 12 + + sll $v0, 3 + addu $v0, $t2 + lwc2 C2_VXY0, 0( $v0 ) + lwc2 C2_VZ0 , 4( $v0 ) + + andi $v0, $a3, 0x20 # Textured triangle + bnez $v0, .F4_light_tex_smt + nop + + swc2 C2_RGB0, POLYG4_rgbc0( $a1 ) + swc2 C2_RGB1, POLYG4_rgbc1( $a1 ) + swc2 C2_RGB2, POLYG4_rgbc2( $a1 ) + + nNCCS + + swc2 C2_RGB2, POLYG4_rgbc3( $a1 ) + + b .sort_G4_pri + nop + +.F4_light_tex_smt: + + mfc2 $v0, C2_RGB0 + lui $v1, 0x3400 + or $v0, $v1 + sw $v0, POLYGT4_rgbc0( $a1 ) + swc2 C2_RGB1, POLYGT4_rgbc1( $a1 ) + swc2 C2_RGB2, POLYGT4_rgbc2( $a1 ) + + NCCS + + lhu $v0, 0( $t4 ) # Load texture coordinates + lhu $v1, 2( $t4 ) + sh $v0, POLYGT4_uv0( $a1 ) + lhu $v0, 4( $t4 ) + sh $v1, POLYGT4_uv1( $a1 ) + lhu $v1, 6( $t4 ) + sh $v0, POLYGT4_uv2( $a1 ) + sh $v1, POLYGT4_uv3( $a1 ) + + lw $v1, 8( $t4 ) + swc2 C2_RGB2, POLYGT4_rgbc3( $a1 ) + + andi $v0, $v1, 0xffff + sh $v0, POLYGT4_tpage( $a1 ) + srl $v0, $v1, 16 + + b .sort_GT4_pri + sh $v0, POLYGT4_clut( $a1 ) + +.sort_F4_pri: + + sw $t6, POLYF4_xy0($a1) + swc2 C2_SXY0, POLYF4_xy1($a1) + swc2 C2_SXY1, POLYF4_xy2($a1) + swc2 C2_SXY2, POLYF4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYF4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0600 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYF4_len + +.sort_FT4_pri: + + sw $t6, POLYFT4_xy0($a1) + swc2 C2_SXY0, POLYFT4_xy1($a1) + swc2 C2_SXY1, POLYFT4_xy2($a1) + swc2 C2_SXY2, POLYFT4_xy3($a1) + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYFT4_len + +.sort_G4_pri: + + sw $t6, POLYG4_xy0($a1) + swc2 C2_SXY0, POLYG4_xy1($a1) + swc2 C2_SXY1, POLYG4_xy2($a1) + swc2 C2_SXY2, POLYG4_xy3($a1) + + la $v0, _smd_tpage_base + lhu $v0, 0($v0) + srl $v1, $a3, 6 # Get blend mode + andi $v1, 0x3 + sll $v1, 5 + or $v0, $v1 + lui $v1, 0xe100 + or $v0, $v1 + sw $v0, POLYG4_tpage($a1) # Store TPage + + .set noat + + lui $v1, 0x0900 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYG4_len + +.sort_GT4_pri: + + sw $t6, POLYGT4_xy0($a1) + swc2 C2_SXY0, POLYGT4_xy1($a1) + swc2 C2_SXY1, POLYGT4_xy2($a1) + swc2 C2_SXY2, POLYGT4_xy3($a1) + + .set noat + + lui $v1, 0x0c00 + lw $v0, 0($a0) + lui $at, 0xff00 + and $v1, $at + lui $at, 0x00ff + or $at, 0xffff + and $v0, $at + or $v1, $v0 + sw $v1, 0($a1) + lw $v0, 0($a0) + and $a1, $at + lui $at, 0xff00 + and $v0, $at + or $v0, $a1 + sw $v0, 0($a0) + + .set at + + lui $v0, 0x8000 + or $a1, $v0 + + b .sort_loop + addiu $a1, POLYGT4_len + +.skip_prim: + + b .sort_loop + nop + +.exit: + + lw $s0, 0( $sp ) + lw $s1, 4( $sp ) + lw $s2, 8( $sp ) + lw $s3, 12( $sp ) + addiu $sp, 16 + jr $ra + move $v0, $a1 diff --git a/libpsn00b/smd/smd_flat.s b/libpsn00b/smd/smd_flat.s deleted file mode 100644 index 843b8d3..0000000 --- a/libpsn00b/smd/smd_flat.s +++ /dev/null @@ -1,833 +0,0 @@ -# PSn00bSDK .SMD model parser library -# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed - -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" -.include "smd/smd_s.inc" - -.section .text.smdSortModelFlat, "ax", @progbits -.global smdSortModelFlat -.type smdSortModelFlat, @function - -smdSortModelFlat: - # a0 - Pointer SC_OT structure - # a1 - Pointer to next primitive - # a2 - Pointer to SMD data address - # v0 - New pointer of primitive buffer (return) - - addiu $sp, -16 - sw $s0, 0( $sp ) - sw $s1, 4( $sp ) - sw $s2, 8( $sp ) - sw $s3, 12( $sp ) - - la $v0, _sc_clip - lw $t8, 0($v0) - lw $t9, 4($v0) - - lw $t1, SMD_HEAD_PVERTS( $a2 ) - lw $t2, SMD_HEAD_PNORMS( $a2 ) - lw $t3, SMD_HEAD_PPRIMS( $a2 ) - -.sort_loop: - - nop - lw $a3, 0($t3) # Get primitive ID word - move $t4, $t3 - - beqz $a3, .exit # Check if terminator (just zero) - addiu $t4, 4 - - lhu $t5, 0( $t4 ) # Load vertices - lhu $t6, 2( $t4 ) - lhu $t7, 4( $t4 ) - sll $t5, 3 - sll $t6, 3 - sll $t7, 3 - addu $t5, $t1 - addu $t6, $t1 - addu $t7, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - lwc2 C2_VXY1, 0( $t6 ) - lwc2 C2_VZ1 , 4( $t6 ) - lwc2 C2_VXY2, 0( $t7 ) - lwc2 C2_VZ2 , 4( $t7 ) - - srl $v1, $a3, 24 # Get primitive size - addu $t3, $v1 # Step main pointer to next primitive - - RTPT - - cfc2 $v0, C2_FLAG # Get GTE flag value - nop - - bltz $v0, .skip_prim # Skip primitive if Z overflow - nop - - #NCLIP # Backface culling - - #mfc2 $v1, C2_MAC0 - - andi $v0, $a3, 0x3 - - #bltz $v1, .skip_prim - #nop - - beq $v0, 0x1, .prim_tri # If primitive is a triangle - nop - beq $v0, 0x2, .prim_quad # If primitive is a quad - nop - - b .skip_prim - nop - -## Triangles - -.prim_tri: # Triangle processing - - addiu $t4, 8 # Advance from indices - - #AVSZ3 # Calculate average Z - - ClipTestTri - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw - and $v0, $s2, $s0 # v2 & v0 - beqz $v0, .do_draw - nop - b .skip_prim - nop - -.do_draw: - - #srl $v0, $a3, 2 # Lighting enabled? - #andi $v0, 0x3 - #bnez $v0, .F3_light - #nop - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F3_textured - nop - - andi $v0, $a3, 0x10 # Gouraud shaded - bnez $v0, .F3_gouraud - nop - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2000 - or $v0, $v1 - - b .sort_F3_pri - sw $v0, POLYF3_rgbc( $a1 ) - -.F3_textured: - - lw $v0, 0( $t4 ) # Flat color, no lighting - lui $v1, 0x2400 - or $v0, $v1 - sw $v0, POLYFT3_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT3_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT3_uv1( $a1 ) - sh $v0, POLYFT3_uv2( $a1 ) - - lw $v0, 8( $t4 ) # Tpage + CLUT - nop - andi $v1, $v0, 0xffff - sh $v1, POLYFT3_tpage( $a1 ) - srl $v0, 16 - - b .sort_FT3_pri - sh $v0, POLYFT3_clut( $a1 ) - -.F3_gouraud: - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3000 - or $v0, $at - .set at - sw $v0, POLYG3_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG3_rgbc1($a1) - b .sort_G3_pri - sw $v0, POLYG3_rgbc2($a1) - -#.F3_light: - - #lhu $v0, 0( $t4 ) # Load normal 0 - - #srl $v1, $a3, 2 - #andi $v1, $v1, 0x3 - - #sll $v0, 3 - #addu $v0, $t2 - #lwc2 C2_VXY0, 0( $v0 ) - #lwc2 C2_VZ0 , 4( $v0 ) - - #beq $v1, 0x2, .F3_light_smt - #nop - - #lw $v0, 4( $t4 ) - #lui $v1, 0x2000 - #or $v0, $v1 - #mtc2 $v0, C2_RGB - - #addiu $t4, 8 - #nop - - #NCCS - - #andi $v0, $a3, 0x20 # Textured triangle - #bnez $v0, .F3_light_tex - #nop - - #swc2 C2_RGB2, POLYF3_rgbc( $a1 ) - - #b .sort_F3_pri - #nop - -#.F3_light_tex: - - #lhu $v0, 0( $t4 ) # Load texture coordinates - #lhu $v1, 2( $t4 ) - #sh $v0, POLYFT3_uv0( $a1 ) - #lhu $v0, 4( $t4 ) - #sh $v1, POLYFT3_uv1( $a1 ) - #sh $v0, POLYFT3_uv2( $a1 ) - - #lw $v1, 8( $t4 ) - #nop - #andi $v0, $v1, 0xffff - #sh $v0, POLYFT3_tpage( $a1 ) - #srl $v0, $v1, 16 - #sh $v0, POLYFT3_clut( $a1 ) - - #mfc2 $v0, C2_RGB2 - #lui $v1, 0x2400 - #or $v0, $v1 - - #b .sort_FT3_pri - #sw $v0, POLYFT3_rgbc( $a1 ) - -#.F3_light_smt: - - #lhu $v0, 2( $t4 ) # Load normals 1 and 2 - #lhu $v1, 4( $t4 ) - #sll $v0, 3 - #sll $v1, 3 - #addu $v0, $t2 - #addu $v1, $t2 - #lwc2 C2_VXY1, 0( $v0 ) - #lwc2 C2_VZ1 , 4( $v0 ) - #lw $v0, 8( $t4 ) - #lwc2 C2_VXY2, 0( $v1 ) - #lwc2 C2_VZ2 , 4( $v1 ) - #lui $v1, 0x3000 # Load color - #or $v0, $v1 - #mtc2 $v0, C2_RGB - - #addiu $t4, 12 - #nop - - #NCCT - - #andi $v0, $a3, 0x20 # Textured triangle - #bnez $v0, .F3_light_tex_smt - #nop - - #swc2 C2_RGB0, POLYG3_rgbc0( $a1 ) - #swc2 C2_RGB1, POLYG3_rgbc1( $a1 ) - #swc2 C2_RGB2, POLYG3_rgbc2( $a1 ) - - #b .sort_G3_pri - #nop - -# .F3_light_tex_smt: - - # lhu $v0, 0( $t4 ) # Load texture coordinates - # lhu $v1, 2( $t4 ) - # sh $v0, POLYGT3_uv0( $a1 ) - # lhu $v0, 4( $t4 ) - # sh $v1, POLYGT3_uv1( $a1 ) - # sh $v0, POLYGT3_uv2( $a1 ) - - # lw $v1, 8( $t4 ) - # nop - # andi $v0, $v1, 0xffff - # sh $v0, POLYGT3_tpage( $a1 ) - # srl $v0, $v1, 16 - # sh $v0, POLYGT3_clut( $a1 ) - - # mfc2 $v0, C2_RGB0 - # lui $v1, 0x3400 - # or $v0, $v1 - - # swc2 C2_RGB1, POLYGT3_rgbc1( $a1 ) - # swc2 C2_RGB2, POLYGT3_rgbc2( $a1 ) - - # b .sort_GT3_pri - # sw $v0, POLYGT3_rgbc0( $a1 ) - -.sort_F3_pri: - - swc2 C2_SXY0, POLYF3_xy0($a1) - swc2 C2_SXY1, POLYF3_xy1($a1) - swc2 C2_SXY2, POLYF3_xy2($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0500 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF3_len - -.sort_FT3_pri: - - swc2 C2_SXY0, POLYFT3_xy0( $a1 ) - swc2 C2_SXY1, POLYFT3_xy1( $a1 ) - swc2 C2_SXY2, POLYFT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT3_len - -.sort_G3_pri: - - swc2 C2_SXY0, POLYG3_xy0( $a1 ) - swc2 C2_SXY1, POLYG3_xy1( $a1 ) - swc2 C2_SXY2, POLYG3_xy2( $a1 ) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG3_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0700 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG3_len - -.sort_GT3_pri: - - swc2 C2_SXY0, POLYGT3_xy0( $a1 ) - swc2 C2_SXY1, POLYGT3_xy1( $a1 ) - swc2 C2_SXY2, POLYGT3_xy2( $a1 ) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT3_len - -## Quads - -.prim_quad: # Quad processing - - mfc2 $t6, C2_SXY0 # Retrieve first projected vertex - - lhu $t5, 6( $t4 ) # Project the last vertex - addiu $t4, 8 - sll $t5, 3 - addu $t5, $t1 - lwc2 C2_VXY0, 0( $t5 ) - lwc2 C2_VZ0 , 4( $t5 ) - - nRTPS - - cfc2 $v1, C2_FLAG # Get GTE flag value - - nop - - bltz $v1, .skip_prim - nop - - ClipTestQuad - - and $v0, $s0, $s1 # v0 & v1 - beqz $v0, .do_draw_q - and $v0, $s1, $s2 # v1 & v2 - beqz $v0, .do_draw_q - and $v0, $s2, $s3 # v2 & v3 - beqz $v0, .do_draw_q - and $v0, $s3, $s0 # v3 & v0 - beqz $v0, .do_draw_q - and $v0, $s0, $s2 # v0 & v2 - beqz $v0, .do_draw_q - and $v0, $s1, $s3 # v1 & v3 - beqz $v0, .do_draw_q - nop - b .skip_prim - nop - -.do_draw_q: - - srl $v0, $a3, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, .F4_light - nop - - andi $v0, $a3, 0x10 # Gouraud quad - bnez $v0, .F4_gouraud - nop - - andi $v0, $a3, 0x20 # Textured quad - bnez $v0, .F4_textured - nop - - lw $v0, 0($t4) - lui $v1, 0x2800 - or $v0, $v1 - - b .sort_F4_pri - sw $v0, POLYF4_rgbc($a1) - -.F4_textured: - - lw $v0, 0($t4) - lui $v1, 0x2c00 - or $v0, $v1 - sw $v0, POLYFT4_rgbc( $a1 ) - addiu $t4, 4 - - lhu $v0, 0($t4) # Load texture coordinates - lhu $v1, 2($t4) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - - b .sort_FT4_pri - sh $v0, POLYFT4_clut($a1) - -.F4_gouraud: - - lw $v0, 0($t4) - lw $v1, 4($t4) - .set noat - lui $at, 0x3800 - or $v0, $at - .set at - sw $v0, POLYG4_rgbc0($a1) - lw $v0, 8($t4) - sw $v1, POLYG4_rgbc1($a1) - lw $v1, 12($t4) - sw $v0, POLYG4_rgbc2($a1) - b .sort_G4_pri - sw $v1, POLYG4_rgbc3($a1) - - -.F4_light: - - lhu $v0, 0( $t4 ) # Load normal 0 - - srl $v1, $a3, 2 - andi $v1, $v1, 0x3 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - beq $v1, 0x2, .F4_light_smt - nop - - lw $v0, 4( $t4 ) - lui $v1, 0x2800 - or $v0, $v1 - mtc2 $v0, C2_RGB - - addiu $t4, 8 - nop - - NCCS - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F4_light_tex - nop - - swc2 C2_RGB2, POLYF4_rgbc( $a1 ) - - b .sort_F4_pri - nop - -.F4_light_tex: - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYFT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYFT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYFT4_uv2( $a1 ) - sh $v1, POLYFT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - nop - andi $v0, $v1, 0xffff - sh $v0, POLYFT4_tpage( $a1 ) - srl $v0, $v1, 16 - sh $v0, POLYFT4_clut( $a1 ) - - mfc2 $v0, C2_RGB2 - lui $v1, 0x2c00 - or $v0, $v1 - - b .sort_FT4_pri - sw $v0, POLYFT4_rgbc( $a1 ) - -.F4_light_smt: - - lhu $v0, 2( $t4 ) # Load normals 1 and 2 - lhu $v1, 4( $t4 ) - sll $v0, 3 - sll $v1, 3 - addu $v0, $t2 - addu $v1, $t2 - lwc2 C2_VXY1, 0( $v0 ) - lwc2 C2_VZ1 , 4( $v0 ) - lwc2 C2_VXY2, 0( $v1 ) - lwc2 C2_VZ2 , 4( $v1 ) - - lw $v0, 8( $t4 ) - lui $v1, 0x3800 # Load color - or $v0, $v1 - mtc2 $v0, C2_RGB - - nNCCT - - lhu $v0, 6( $t4 ) # Load normal 3 - - addiu $t4, 12 - - sll $v0, 3 - addu $v0, $t2 - lwc2 C2_VXY0, 0( $v0 ) - lwc2 C2_VZ0 , 4( $v0 ) - - andi $v0, $a3, 0x20 # Textured triangle - bnez $v0, .F4_light_tex_smt - nop - - swc2 C2_RGB0, POLYG4_rgbc0( $a1 ) - swc2 C2_RGB1, POLYG4_rgbc1( $a1 ) - swc2 C2_RGB2, POLYG4_rgbc2( $a1 ) - - nNCCS - - swc2 C2_RGB2, POLYG4_rgbc3( $a1 ) - - b .sort_G4_pri - nop - -.F4_light_tex_smt: - - mfc2 $v0, C2_RGB0 - lui $v1, 0x3400 - or $v0, $v1 - sw $v0, POLYGT4_rgbc0( $a1 ) - swc2 C2_RGB1, POLYGT4_rgbc1( $a1 ) - swc2 C2_RGB2, POLYGT4_rgbc2( $a1 ) - - NCCS - - lhu $v0, 0( $t4 ) # Load texture coordinates - lhu $v1, 2( $t4 ) - sh $v0, POLYGT4_uv0( $a1 ) - lhu $v0, 4( $t4 ) - sh $v1, POLYGT4_uv1( $a1 ) - lhu $v1, 6( $t4 ) - sh $v0, POLYGT4_uv2( $a1 ) - sh $v1, POLYGT4_uv3( $a1 ) - - lw $v1, 8( $t4 ) - swc2 C2_RGB2, POLYGT4_rgbc3( $a1 ) - - andi $v0, $v1, 0xffff - sh $v0, POLYGT4_tpage( $a1 ) - srl $v0, $v1, 16 - - b .sort_GT4_pri - sh $v0, POLYGT4_clut( $a1 ) - -.sort_F4_pri: - - sw $t6, POLYF4_xy0($a1) - swc2 C2_SXY0, POLYF4_xy1($a1) - swc2 C2_SXY1, POLYF4_xy2($a1) - swc2 C2_SXY2, POLYF4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYF4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0600 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYF4_len - -.sort_FT4_pri: - - sw $t6, POLYFT4_xy0($a1) - swc2 C2_SXY0, POLYFT4_xy1($a1) - swc2 C2_SXY1, POLYFT4_xy2($a1) - swc2 C2_SXY2, POLYFT4_xy3($a1) - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYFT4_len - -.sort_G4_pri: - - sw $t6, POLYG4_xy0($a1) - swc2 C2_SXY0, POLYG4_xy1($a1) - swc2 C2_SXY1, POLYG4_xy2($a1) - swc2 C2_SXY2, POLYG4_xy3($a1) - - la $v0, _smd_tpage_base - lhu $v0, 0($v0) - srl $v1, $a3, 6 # Get blend mode - andi $v1, 0x3 - sll $v1, 5 - or $v0, $v1 - lui $v1, 0xe100 - or $v0, $v1 - sw $v0, POLYG4_tpage($a1) # Store TPage - - .set noat - - lui $v1, 0x0900 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYG4_len - -.sort_GT4_pri: - - sw $t6, POLYGT4_xy0($a1) - swc2 C2_SXY0, POLYGT4_xy1($a1) - swc2 C2_SXY1, POLYGT4_xy2($a1) - swc2 C2_SXY2, POLYGT4_xy3($a1) - - .set noat - - lui $v1, 0x0c00 - lw $v0, 0($a0) - lui $at, 0xff00 - and $v1, $at - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at - or $v1, $v0 - sw $v1, 0($a1) - lw $v0, 0($a0) - and $a1, $at - lui $at, 0xff00 - and $v0, $at - or $v0, $a1 - sw $v0, 0($a0) - - .set at - - lui $v0, 0x8000 - or $a1, $v0 - - b .sort_loop - addiu $a1, POLYGT4_len - -.skip_prim: - - b .sort_loop - nop - -.exit: - - lw $s0, 0( $sp ) - lw $s1, 4( $sp ) - lw $s2, 8( $sp ) - lw $s3, 12( $sp ) - addiu $sp, 16 - jr $ra - move $v0, $a1 diff --git a/libpsn00b/smd/smdparser.S b/libpsn00b/smd/smdparser.S new file mode 100644 index 0000000..857f861 --- /dev/null +++ b/libpsn00b/smd/smdparser.S @@ -0,0 +1,130 @@ +# PSn00bSDK .SMD model parser library +# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + +.set noreorder + +#include "smd/smd_s.inc" + +.set SMD_PRI_ID, 0 +.set SMD_PRI_v0, 4 +.set SMD_PRI_v1, 6 +.set SMD_PRI_v2, 8 +.set SMD_PRI_v3, 10 +.set SMD_PRI_n0, 12 +.set SMD_PRI_n1, 14 +.set SMD_PRI_n2, 16 +.set SMD_PRI_n3, 18 +.set SMD_PRI_rgbc0, 20 +.set SMD_PRI_rgbc1, 24 +.set SMD_PRI_rgbc2, 28 +.set SMD_PRI_rgbc3, 32 +.set SMD_PRI_tuv0, 36 +.set SMD_PRI_tuv1, 38 +.set SMD_PRI_tuv2, 40 +.set SMD_PRI_tuv3, 42 +.set SMD_PRI_tpage, 44 +.set SMD_PRI_clut, 46 + +.section .text.OpenSMD, "ax", @progbits +.global OpenSMD +.type OpenSMD, @function + +OpenSMD: + lw $v0, SMD_HEAD_ID($a0) + li $v1, 0x01444d53 + + bne $v0, $v1, .not_smd + nop + + lw $v0, SMD_HEAD_PPRIMS($a0) + la $v1, _smd_parse_addr + sw $v0, 0($v1) + + jr $ra + lhu $v0, SMD_HEAD_NPRIMS($a0) + +.not_smd: + jr $ra + move $v0, $0 + +.section .text.ReadSMD, "ax", @progbits +.global ReadSMD +.type ReadSMD, @function + +ReadSMD: + la $v0, _smd_parse_addr + lw $v0, 0($v0) + nop + + lw $a2, 0($v0) # Load primitive ID + addiu $a1, $v0, 4 + + sw $a2, SMD_PRI_ID($a0) + + beqz $a2, $end_prim + nop + + srl $v1, $a2, 24 # Get primitive size + addu $v0, $v1 + la $v1, _smd_parse_addr + sw $v0, 0($v1) + + lw $v0, 0($a1) # Copy vertex coords + lw $v1, 4($a1) + sw $v0, SMD_PRI_v0($a0) + sw $v1, SMD_PRI_v2($a0) + addiu $a1, 8 + + srl $v0, $a2, 2 # Lighting enabled? + andi $v0, 0x3 + bnez $v0, $light + nop + + b $no_light + nop + +$light: + srl $v1, $a2, 2 + lw $v0, 0($a1) # Copy vertex coords + andi $v1, 0x3 + sw $v0, SMD_PRI_n0($a0) + + bne $v1, 0x2, $light_flat + addiu $a1, 4 + + lw $v1, 0($a1) + addiu $a1, 4 + sw $v1, SMD_PRI_n2($a0) + +$light_flat: +$no_light: + + lw $v0, 0($a1) + nop + sw $v0, SMD_PRI_rgbc0($a0) + addiu $a1, 4 + + srl $v0, $a2, 5 + andi $v0, 0x1 + beqz $v0, $not_textured + nop + + lw $v0, 0($a1) + lw $v1, 4($a1) + sw $v0, SMD_PRI_tuv0($a0) + lw $v0, 8($a1) + sw $v1, SMD_PRI_tuv2($a0) + sw $v0, SMD_PRI_tpage($a0) + +$not_textured: + + jr $ra + move $v0, $a0 + +$end_prim: + + jr $ra + move $v0, $0 + +.section .bss._smd_parse_addr, "w" +.comm _smd_parse_addr, 4, 4 diff --git a/libpsn00b/smd/smdparser.s b/libpsn00b/smd/smdparser.s deleted file mode 100644 index 656e509..0000000 --- a/libpsn00b/smd/smdparser.s +++ /dev/null @@ -1,130 +0,0 @@ -# PSn00bSDK .SMD model parser library -# (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed - -.set noreorder - -.include "smd/smd_s.inc" - -.set SMD_PRI_ID, 0 -.set SMD_PRI_v0, 4 -.set SMD_PRI_v1, 6 -.set SMD_PRI_v2, 8 -.set SMD_PRI_v3, 10 -.set SMD_PRI_n0, 12 -.set SMD_PRI_n1, 14 -.set SMD_PRI_n2, 16 -.set SMD_PRI_n3, 18 -.set SMD_PRI_rgbc0, 20 -.set SMD_PRI_rgbc1, 24 -.set SMD_PRI_rgbc2, 28 -.set SMD_PRI_rgbc3, 32 -.set SMD_PRI_tuv0, 36 -.set SMD_PRI_tuv1, 38 -.set SMD_PRI_tuv2, 40 -.set SMD_PRI_tuv3, 42 -.set SMD_PRI_tpage, 44 -.set SMD_PRI_clut, 46 - -.section .text.OpenSMD, "ax", @progbits -.global OpenSMD -.type OpenSMD, @function - -OpenSMD: - lw $v0, SMD_HEAD_ID($a0) - li $v1, 0x01444d53 - - bne $v0, $v1, .not_smd - nop - - lw $v0, SMD_HEAD_PPRIMS($a0) - la $v1, _smd_parse_addr - sw $v0, 0($v1) - - jr $ra - lhu $v0, SMD_HEAD_NPRIMS($a0) - -.not_smd: - jr $ra - move $v0, $0 - -.section .text.ReadSMD, "ax", @progbits -.global ReadSMD -.type ReadSMD, @function - -ReadSMD: - la $v0, _smd_parse_addr - lw $v0, 0($v0) - nop - - lw $a2, 0($v0) # Load primitive ID - addiu $a1, $v0, 4 - - sw $a2, SMD_PRI_ID($a0) - - beqz $a2, $end_prim - nop - - srl $v1, $a2, 24 # Get primitive size - addu $v0, $v1 - la $v1, _smd_parse_addr - sw $v0, 0($v1) - - lw $v0, 0($a1) # Copy vertex coords - lw $v1, 4($a1) - sw $v0, SMD_PRI_v0($a0) - sw $v1, SMD_PRI_v2($a0) - addiu $a1, 8 - - srl $v0, $a2, 2 # Lighting enabled? - andi $v0, 0x3 - bnez $v0, $light - nop - - b $no_light - nop - -$light: - srl $v1, $a2, 2 - lw $v0, 0($a1) # Copy vertex coords - andi $v1, 0x3 - sw $v0, SMD_PRI_n0($a0) - - bne $v1, 0x2, $light_flat - addiu $a1, 4 - - lw $v1, 0($a1) - addiu $a1, 4 - sw $v1, SMD_PRI_n2($a0) - -$light_flat: -$no_light: - - lw $v0, 0($a1) - nop - sw $v0, SMD_PRI_rgbc0($a0) - addiu $a1, 4 - - srl $v0, $a2, 5 - andi $v0, 0x1 - beqz $v0, $not_textured - nop - - lw $v0, 0($a1) - lw $v1, 4($a1) - sw $v0, SMD_PRI_tuv0($a0) - lw $v0, 8($a1) - sw $v1, SMD_PRI_tuv2($a0) - sw $v0, SMD_PRI_tpage($a0) - -$not_textured: - - jr $ra - move $v0, $a0 - -$end_prim: - - jr $ra - move $v0, $0 - -.section .bss._smd_parse_addr, "w" -.comm _smd_parse_addr, 4, 4 -- cgit v1.2.3