diff options
| author | John "Lameguy" Wilbert Villamor <lameguy64@gmail.com> | 2022-09-26 16:49:56 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-09-26 16:49:56 +0800 |
| commit | c4a2533d21dfd05cde841ea48c67b05e0e6a853f (patch) | |
| tree | c7ef61653b157b69fb0956709366996ddbc4ecfa /libpsn00b | |
| parent | a8b404b3400c3ebd8e0b923dcaefcc49ea563e36 (diff) | |
| parent | 86f0064afb8200e60dd80827535cac30d0eab028 (diff) | |
| download | psn00bsdk-c4a2533d21dfd05cde841ea48c67b05e0e6a853f.tar.gz | |
Merge pull request #55 from spicyjpeg/psxmdec
Full MDEC support, C library refactors, cleanups and bugfixes (v0.20)
Diffstat (limited to 'libpsn00b')
104 files changed, 5059 insertions, 4058 deletions
diff --git a/libpsn00b/CMakeLists.txt b/libpsn00b/CMakeLists.txt index f9c4f9d..a662448 100644 --- a/libpsn00b/CMakeLists.txt +++ b/libpsn00b/CMakeLists.txt @@ -40,6 +40,8 @@ foreach(_library IN LISTS PSN00BSDK_LIBRARIES) psn00bsdk_add_library(${_library} STATIC ${_sources}) endforeach() +psn00bsdk_target_incbin(psxgpu PRIVATE _gpu_debug_font psxgpu/dbugfont.tim) + # Extract libgcc's contents and merge them into libc after building. # Unfortunately glob expressions won't work on Windows, so we have to manually # enumerate the contents of libgcc and save the list to a temporary file (as it diff --git a/libpsn00b/cmake/flags.cmake b/libpsn00b/cmake/flags.cmake index e31773f..5d9c751 100644 --- a/libpsn00b/cmake/flags.cmake +++ b/libpsn00b/cmake/flags.cmake @@ -42,6 +42,8 @@ target_compile_options( -march=r3000 -mtune=r3000 -mabi=32 + -mno-mt + -mno-llsc -mdivide-breaks -O2 # Standard library options @@ -49,6 +51,7 @@ target_compile_options( -fno-builtin -nostdlib # Other options + -g -fdata-sections -ffunction-sections -fsigned-char diff --git a/libpsn00b/cmake/sdk.cmake b/libpsn00b/cmake/sdk.cmake index d6d9bcd..8965e79 100644 --- a/libpsn00b/cmake/sdk.cmake +++ b/libpsn00b/cmake/sdk.cmake @@ -38,6 +38,8 @@ find_program( HINTS ${PSN00BSDK_TC}/bin ${PSN00BSDK_TC}/../bin + # Same as ${CMAKE_INSTALL_PREFIX}/bin + ${CMAKE_CURRENT_LIST_DIR}/../../../bin # Same as ${CMAKE_INSTALL_PREFIX}/${PSN00BSDK_TARGET}/bin ${CMAKE_CURRENT_LIST_DIR}/../../../${PSN00BSDK_TARGET}/bin PATHS diff --git a/libpsn00b/include/ctype.h b/libpsn00b/include/ctype.h index b79498a..24ee9d9 100644 --- a/libpsn00b/include/ctype.h +++ b/libpsn00b/include/ctype.h @@ -1,7 +1,20 @@ -#ifndef _CTYPE_H -#define _CTYPE_H +/* + * PSn00bSDK standard library + * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ -extern int tolower(int chr); -extern int toupper(int chr); +#ifndef __CTYPE_H +#define __CTYPE_H -#endif
\ No newline at end of file +#ifdef __cplusplus +extern "C" { +#endif + +int tolower(int chr); +int toupper(int chr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libpsn00b/include/gtereg.h b/libpsn00b/include/gtereg.inc index 0d051fc..0d051fc 100644 --- a/libpsn00b/include/gtereg.h +++ b/libpsn00b/include/gtereg.inc diff --git a/libpsn00b/include/hwregs_a.h b/libpsn00b/include/hwregs_a.inc index 8a504f5..c78b41a 100644 --- a/libpsn00b/include/hwregs_a.h +++ b/libpsn00b/include/hwregs_a.inc @@ -1,10 +1,11 @@ -# Hardware register definitions for GNU assembler (as) -# -# Part of the PSn00bSDK Project by Lameguy64 -# 2019 Meido-Tek Productions +# PSn00bSDK hardware registers definitions +# (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed +## Constants -.set IOBASE, 0x1f80 # IO segment base +.set IOBASE, 0x1f80 +.set F_CPU, 33868800 +.set F_GPU, 53222400 ## GPU diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h index e533c56..b205b87 100644 --- a/libpsn00b/include/hwregs_c.h +++ b/libpsn00b/include/hwregs_c.h @@ -14,116 +14,117 @@ /* Constants */ +#define IOBASE 0xbf800000 #define F_CPU 33868800UL #define F_GPU 53222400UL /* GPU */ -#define GPU_GP0 _MMIO32(0x1f801810) -#define GPU_GP1 _MMIO32(0x1f801814) +#define GPU_GP0 _MMIO32(IOBASE | 0x1810) +#define GPU_GP1 _MMIO32(IOBASE | 0x1814) /* CD drive */ -#define CD_STAT _MMIO8(0x1f801800) -#define CD_CMD _MMIO8(0x1f801801) -#define CD_DATA _MMIO8(0x1f801802) -#define CD_IRQ _MMIO8(0x1f801803) +#define CD_STAT _MMIO8(IOBASE | 0x1800) +#define CD_CMD _MMIO8(IOBASE | 0x1801) +#define CD_DATA _MMIO8(IOBASE | 0x1802) +#define CD_IRQ _MMIO8(IOBASE | 0x1803) -#define CD_REG(N) _MMIO8(0x1f801800 + (N)) +#define CD_REG(N) _MMIO8(IOBASE | 0x1800 + (N)) /* SPU */ -#define SPU_MASTER_VOL_L _MMIO16(0x1f801d80) -#define SPU_MASTER_VOL_R _MMIO16(0x1f801d82) -#define SPU_REVERB_VOL_L _MMIO16(0x1f801d84) -#define SPU_REVERB_VOL_R _MMIO16(0x1f801d86) -#define SPU_KEY_ON _MMIO32(0x1f801d88) -#define SPU_KEY_OFF _MMIO32(0x1f801d8c) -#define SPU_FM_MODE _MMIO32(0x1f801d90) -#define SPU_NOISE_MODE _MMIO32(0x1f801d94) -#define SPU_REVERB_ON _MMIO32(0x1f801d98) -#define SPU_CHAN_STATUS _MMIO32(0x1f801d9c) - -#define SPU_REVERB_ADDR _MMIO16(0x1f801da2) -#define SPU_IRQ_ADDR _MMIO16(0x1f801da4) -#define SPU_ADDR _MMIO16(0x1f801da6) -#define SPU_DATA _MMIO16(0x1f801da8) - -#define SPU_CTRL _MMIO16(0x1f801daa) -#define SPU_DMA_CTRL _MMIO16(0x1f801dac) -#define SPU_STAT _MMIO16(0x1f801dae) - -#define SPU_CD_VOL_L _MMIO16(0x1f801db0) -#define SPU_CD_VOL_R _MMIO16(0x1f801db2) -#define SPU_EXT_VOL_L _MMIO16(0x1f801db4) -#define SPU_EXT_VOL_R _MMIO16(0x1f801db6) -#define SPU_CURRENT_VOL_L _MMIO16(0x1f801db8) -#define SPU_CURRENT_VOL_R _MMIO16(0x1f801dba) +#define SPU_MASTER_VOL_L _MMIO16(IOBASE | 0x1d80) +#define SPU_MASTER_VOL_R _MMIO16(IOBASE | 0x1d82) +#define SPU_REVERB_VOL_L _MMIO16(IOBASE | 0x1d84) +#define SPU_REVERB_VOL_R _MMIO16(IOBASE | 0x1d86) +#define SPU_KEY_ON _MMIO32(IOBASE | 0x1d88) +#define SPU_KEY_OFF _MMIO32(IOBASE | 0x1d8c) +#define SPU_FM_MODE _MMIO32(IOBASE | 0x1d90) +#define SPU_NOISE_MODE _MMIO32(IOBASE | 0x1d94) +#define SPU_REVERB_ON _MMIO32(IOBASE | 0x1d98) +#define SPU_CHAN_STATUS _MMIO32(IOBASE | 0x1d9c) + +#define SPU_REVERB_ADDR _MMIO16(IOBASE | 0x1da2) +#define SPU_IRQ_ADDR _MMIO16(IOBASE | 0x1da4) +#define SPU_ADDR _MMIO16(IOBASE | 0x1da6) +#define SPU_DATA _MMIO16(IOBASE | 0x1da8) + +#define SPU_CTRL _MMIO16(IOBASE | 0x1daa) +#define SPU_DMA_CTRL _MMIO16(IOBASE | 0x1dac) +#define SPU_STAT _MMIO16(IOBASE | 0x1dae) + +#define SPU_CD_VOL_L _MMIO16(IOBASE | 0x1db0) +#define SPU_CD_VOL_R _MMIO16(IOBASE | 0x1db2) +#define SPU_EXT_VOL_L _MMIO16(IOBASE | 0x1db4) +#define SPU_EXT_VOL_R _MMIO16(IOBASE | 0x1db6) +#define SPU_CURRENT_VOL_L _MMIO16(IOBASE | 0x1db8) +#define SPU_CURRENT_VOL_R _MMIO16(IOBASE | 0x1dba) // These are not named SPU_VOICE_* to avoid name clashes with SPU attribute // flags defined in psxspu.h. -#define SPU_CH_VOL_L(N) _MMIO16(0x1f801c00 + 16 * (N)) -#define SPU_CH_VOL_R(N) _MMIO16(0x1f801c02 + 16 * (N)) -#define SPU_CH_FREQ(N) _MMIO16(0x1f801c04 + 16 * (N)) -#define SPU_CH_ADDR(N) _MMIO16(0x1f801c06 + 16 * (N)) -#define SPU_CH_ADSR(N) _MMIO32(0x1f801c08 + 16 * (N)) -#define SPU_CH_LOOP_ADDR(N) _MMIO16(0x1f801c0e + 16 * (N)) +#define SPU_CH_VOL_L(N) _MMIO16(IOBASE | 0x1c00 + 16 * (N)) +#define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N)) +#define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N)) +#define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N)) +#define SPU_CH_ADSR(N) _MMIO32(IOBASE | 0x1c08 + 16 * (N)) +#define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N)) /* MDEC */ -#define MDEC0 _MMIO32(0x1f801820) -#define MDEC1 _MMIO32(0x1f801824) +#define MDEC0 _MMIO32(IOBASE | 0x1820) +#define MDEC1 _MMIO32(IOBASE | 0x1824) /* SPI controller port */ // IMPORTANT: even though JOY_TXRX is a 32-bit register, it should only be // accessed as 8-bit. Reading it as 16 or 32-bit works fine on real hardware, // but leads to problems in some emulators. -#define JOY_TXRX _MMIO8(0x1f801040) -#define JOY_STAT _MMIO16(0x1f801044) -#define JOY_MODE _MMIO16(0x1f801048) -#define JOY_CTRL _MMIO16(0x1f80104a) -#define JOY_BAUD _MMIO16(0x1f80104e) +#define JOY_TXRX _MMIO8 (IOBASE | 0x1040) +#define JOY_STAT _MMIO16(IOBASE | 0x1044) +#define JOY_MODE _MMIO16(IOBASE | 0x1048) +#define JOY_CTRL _MMIO16(IOBASE | 0x104a) +#define JOY_BAUD _MMIO16(IOBASE | 0x104e) /* Serial port */ -#define SIO_TXRX _MMIO8(0x1f801050) -#define SIO_STAT _MMIO16(0x1f801054) -#define SIO_MODE _MMIO16(0x1f801058) -#define SIO_CTRL _MMIO16(0x1f80105a) -#define SIO_BAUD _MMIO16(0x1f80105e) +#define SIO_TXRX _MMIO8 (IOBASE | 0x1050) +#define SIO_STAT _MMIO16(IOBASE | 0x1054) +#define SIO_MODE _MMIO16(IOBASE | 0x1058) +#define SIO_CTRL _MMIO16(IOBASE | 0x105a) +#define SIO_BAUD _MMIO16(IOBASE | 0x105e) /* IRQ controller */ -#define IRQ_STAT _MMIO32(0x1f801070) -#define IRQ_MASK _MMIO32(0x1f801074) +#define IRQ_STAT _MMIO16(IOBASE | 0x1070) +#define IRQ_MASK _MMIO16(IOBASE | 0x1074) /* DMA */ -#define DMA_DPCR _MMIO32(0x1f8010f0) -#define DMA_DICR _MMIO32(0x1f8010f4) +#define DMA_DPCR _MMIO32(IOBASE | 0x10f0) +#define DMA_DICR _MMIO32(IOBASE | 0x10f4) -#define DMA_MADR(N) _MMIO32(0x1f801080 + 16 * (N)) -#define DMA_BCR(N) _MMIO32(0x1f801084 + 16 * (N)) -#define DMA_CHCR(N) _MMIO32(0x1f801088 + 16 * (N)) +#define DMA_MADR(N) _MMIO32(IOBASE | 0x1080 + 16 * (N)) +#define DMA_BCR(N) _MMIO32(IOBASE | 0x1084 + 16 * (N)) +#define DMA_CHCR(N) _MMIO32(IOBASE | 0x1088 + 16 * (N)) /* Timers */ -#define TIMER_VALUE(N) _MMIO32(0x1f801100 + 16 * (N)) -#define TIMER_CTRL(N) _MMIO32(0x1f801104 + 16 * (N)) -#define TIMER_RELOAD(N) _MMIO32(0x1f801108 + 16 * (N)) +#define TIMER_VALUE(N) _MMIO32(IOBASE | 0x1100 + 16 * (N)) +#define TIMER_CTRL(N) _MMIO32(IOBASE | 0x1104 + 16 * (N)) +#define TIMER_RELOAD(N) _MMIO32(IOBASE | 0x1108 + 16 * (N)) /* Memory control */ -#define EXP1_ADDR _MMIO32(0x1f801000) -#define EXP2_ADDR _MMIO32(0x1f801004) -#define EXP1_DELAY_SIZE _MMIO32(0x1f801008) -#define EXP3_DELAY_SIZE _MMIO32(0x1f80100c) -#define BIOS_DELAY_SIZE _MMIO32(0x1f801010) -#define SPU_DELAY_SIZE _MMIO32(0x1f801014) -#define CD_DELAY_SIZE _MMIO32(0x1f801018) -#define EXP2_DELAY_SIZE _MMIO32(0x1f80101c) -#define COM_DELAY_CFG _MMIO32(0x1f801020) -#define RAM_SIZE_CFG _MMIO32(0x1f801060) +#define EXP1_ADDR _MMIO32(IOBASE | 0x1000) +#define EXP2_ADDR _MMIO32(IOBASE | 0x1004) +#define EXP1_DELAY_SIZE _MMIO32(IOBASE | 0x1008) +#define EXP3_DELAY_SIZE _MMIO32(IOBASE | 0x100c) +#define BIOS_DELAY_SIZE _MMIO32(IOBASE | 0x1010) +#define SPU_DELAY_SIZE _MMIO32(IOBASE | 0x1014) +#define CD_DELAY_SIZE _MMIO32(IOBASE | 0x1018) +#define EXP2_DELAY_SIZE _MMIO32(IOBASE | 0x101c) +#define COM_DELAY_CFG _MMIO32(IOBASE | 0x1020) +#define RAM_SIZE_CFG _MMIO32(IOBASE | 0x1060) #endif diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h index 177faf1..c5eaa59 100644 --- a/libpsn00b/include/inline_c.h +++ b/libpsn00b/include/inline_c.h @@ -1,32 +1,23 @@ -/* Inline GTE macros for the GNU C compiler. - * - * Part of the PSn00bSDK Project by Lameguy64. - * 2019 Meido-Tek Production - * - * All GTE commands can be used without having to pass your object file - * through some stupid tool such as DMPSX. Perhaps it was Sony's attempt - * to prevent people from quickly discovering the GTE commands from the - * official SDK easily? Though people could just extract the cop2 opcodes - * of an object file after it has been passed through DMPSX. - * - * Todo: A couple of GTE operation macros are still missing such as - * gte_rtv*() though they appear to be just variants of gte_mvmva more or - * less (gte_rtv0() is actually gte_mvmva(1, 0, 0, 3, 0) for example). +/* + * PSn00bSDK GTE macros + * (C) 2019 Lameguy64 + * (C) 2021-2022 Soapy (tweaked by spicyjpeg) * + * This header is basically identical to Nugget's inline_n.h. All GTE commands + * can be used right away without having to run DMPSX or any other tool on + * object files. */ #ifndef _INLINE_C_H #define _INLINE_C_H -/* - * GTE load macros - */ +/* GTE load macros */ /* Load a SVECTOR (passed as a pointer) to GTE V0 */ #define gte_ldv0( r0 ) __asm__ volatile ( \ - "lwc2 $0 , 0( %0 );" \ - "lwc2 $1 , 4( %0 );" \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ : \ : "r"( r0 ) \ : "$t0" ) @@ -34,8 +25,8 @@ /* Load a SVECTOR (passed as a pointer) to GTE V1 */ #define gte_ldv1( r0 ) __asm__ volatile ( \ - "lwc2 $2 , 0( %0 );" \ - "lwc2 $3 , 4( %0 );" \ + "lwc2 $2, 0( %0 );" \ + "lwc2 $3, 4( %0 );" \ : \ : "r"( r0 ) \ : "$t0" ) @@ -43,8 +34,8 @@ /* Load a SVECTOR (passed as a pointer) to GTE V2 */ #define gte_ldv2( r0 ) __asm__ volatile ( \ - "lwc2 $4 , 0( %0 );" \ - "lwc2 $5 , 4( %0 );" \ + "lwc2 $4, 0( %0 );" \ + "lwc2 $5, 4( %0 );" \ : \ : "r"( r0 ) \ : "$t0" ) @@ -52,20 +43,187 @@ /* Load three SVECTORs (passed as a pointer) to the GTE at once */ #define gte_ldv3( r0, r1, r2 ) __asm__ volatile ( \ - "lwc2 $0 , 0( %0 );" \ - "lwc2 $1 , 4( %0 );" \ - "lwc2 $2 , 0( %1 );" \ - "lwc2 $3 , 4( %1 );" \ - "lwc2 $4 , 0( %2 );" \ - "lwc2 $5 , 4( %2 );" \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 0( %1 );" \ + "lwc2 $3, 4( %1 );" \ + "lwc2 $4, 0( %2 );" \ + "lwc2 $5, 4( %2 );" \ : \ : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) - + +#define gte_ldv3c( r0 ) __asm__ volatile ( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 8( %0 );" \ + "lwc2 $3, 12( %0 );" \ + "lwc2 $4, 16( %0 );" \ + "lwc2 $5, 20( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldv3c_vertc( r0 ) __asm__ volatile ( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 12( %0 );" \ + "lwc2 $3, 16( %0 );" \ + "lwc2 $4, 24( %0 );" \ + "lwc2 $5, 28( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldv01( r0, r1 ) __asm__ volatile ( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 0( %1 );" \ + "lwc2 $3, 4( %1 );" \ + : \ + : "r"( r0 ), "r"( r1 ) ) + +#define gte_ldv01c( r0 ) __asm__ volatile ( \ + "lwc2 $0, 0( %0 );" \ + "lwc2 $1, 4( %0 );" \ + "lwc2 $2, 8( %0 );" \ + "lwc2 $3, 12( %0 );" \ + : \ + : "r"( r0 ) ) + #define gte_ldrgb( r0 ) __asm__ volatile ( \ "lwc2 $6 , 0( %0 );" \ : \ : "r"( r0 ) ) - + +#define gte_ldrgb3( r0, r1, r2 ) __asm__ volatile ( \ + "lwc2 $20, 0( %0 );" \ + "lwc2 $21, 0( %1 );" \ + "lwc2 $22, 0( %2 );" \ + "lwc2 $6, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_ldrgb3c( r0 ) __asm__ volatile ( \ + "lwc2 $20, 0( %0 );" \ + "lwc2 $21, 4( %0 );" \ + "lwc2 $22, 8( %0 );" \ + "lwc2 $6, 8( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldlv0( r0 ) __asm__ volatile ( \ + "lhu $13, 4( %0 );" \ + "lhu $12, 0( %0 );" \ + "sll $13, $13, 16;" \ + "or $12, $12, $13;" \ + "mtc2 $12, $0;" \ + "lwc2 $1, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +#define gte_ldlvl( r0 ) __asm__ volatile ( \ + "lwc2 $9, 0( %0 );" \ + "lwc2 $10, 4( %0 );" \ + "lwc2 $11, 8( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldsv( r0 ) __asm__ volatile ( \ + "lhu $12, 0( %0 );" \ + "lhu $13, 2( %0 );" \ + "lhu $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldbv( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +#define gte_ldcv( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "lbu $14, 2( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldclmv( r0 ) __asm__ volatile ( \ + "lhu $12, 0( %0 );" \ + "lhu $13, 6( %0 );" \ + "lhu $14, 12( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldsxy0( r0 ) __asm__ volatile ( \ + "mtc2 %0, $12;" \ + : \ + : "r"( r0 ) ) + +#define gte_ldsxy1( r0 ) __asm__ volatile ( \ + "mtc2 %0, $13;" \ + : \ + : "r"( r0 ) ) + +#define gte_ldsxy2( r0 ) __asm__ volatile ( \ + "mtc2 %0, $14;" \ + : \ + : "r"( r0 ) ) + +#define gte_ldsxy3( r0, r1, r2 ) __asm__ volatile ( \ + "mtc2 %0, $12;" \ + "mtc2 %2, $14;" \ + "mtc2 %1, $13;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_ldsxy3c( r0 ) __asm__ volatile ( \ + "lwc2 $12, 0( %0 );" \ + "lwc2 $13, 4( %0 );" \ + "lwc2 $14, 8( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldsz3( r0, r1, r2 ) __asm__ volatile ( \ + "mtc2 %0, $17;" \ + "mtc2 %1, $18;" \ + "mtc2 %2, $19;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_ldsz4( r0, r1, r2, r3 ) __asm__ volatile ( \ + "mtc2 %0, $16;" \ + "mtc2 %1, $17;" \ + "mtc2 %2, $18;" \ + "mtc2 %3, $19;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ), "r"( r3 ) ) + +#define gte_ldopv1( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "lw $14, 8( %0 );" \ + "ctc2 $13, $2;" \ + "ctc2 $14, $4;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + #define gte_ldopv2( r0 ) __asm__ volatile ( \ "lwc2 $11, 8( %0 );" \ "lwc2 $9 , 0( %0 );" \ @@ -77,9 +235,53 @@ "mtc2 %0, $8;" \ : \ : "r"( r0 ) ) - -/* Sets the GTE offset - */ + +#define gte_ldlzc( r0 ) __asm__ volatile ( \ + "mtc2 %0, $30;" \ + : \ + : "r"( r0 ) ) + +#define gte_SetRGBcd( r0 ) __asm__ volatile ( \ + "lwc2 $6, 0( %0 );" \ + : \ + : "r"( r0 ) ) + +#define gte_ldbkdir( r0, r1, r2 ) __asm__ volatile ( \ + "ctc2 %0, $13;" \ + "ctc2 %1, $14;" \ + "ctc2 %2, $15;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \ + "sll $t0, %0, 4;" \ + "sll $t1, %1, 4;" \ + "sll $t2, %2, 4;" \ + "ctc2 $t0, $13;" \ + "ctc2 $t1, $14;" \ + "ctc2 $t2, $15;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$t0", "$t1", "$t2" ) + +#define gte_ldfcdir( r0, r1, r2 ) __asm__ volatile ( \ + "ctc2 %0, $21;" \ + "ctc2 %1, $22;" \ + "ctc2 %2, $23;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_SetFarColor( r0, r1, r2 ) __asm__ volatile ( \ + "sll $12, %0, 4;" \ + "sll $13, %1, 4;" \ + "sll $14, %2, 4;" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "$13", "$14" ) + #define gte_SetGeomOffset( r0, r1 ) __asm__ volatile ( \ "sll $t0, %0, 16;" \ "sll $t1, %1, 16;" \ @@ -88,23 +290,21 @@ : \ : "r"( r0 ), "r"( r1 ) \ : "$t0", "$t1" ) - + #define gte_SetGeomScreen( r0 ) __asm__ volatile ( \ "ctc2 %0, $26;" \ : \ : "r"( r0 ) ) -#define gte_SetTransMatrix( r0 ) __asm__ volatile ( \ - "lw $t0, 20( %0 );" \ - "lw $t1, 24( %0 );" \ - "ctc2 $t0, $5;" \ - "lw $t2, 28( %0 );" \ - "ctc2 $t1, $6;" \ - "ctc2 $t2, $7;" \ +#define gte_ldsvrtrow0( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $0;" \ + "ctc2 $13, $1;" \ : \ : "r"( r0 ) \ - : "$t2" ) - + : "$12", "$13" ) + #define gte_SetRotMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -120,6 +320,15 @@ : "r"( r0 ) \ : "$t2" ) +#define gte_ldsvllrow0( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $8;" \ + "ctc2 $13, $9;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + #define gte_SetLightMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -134,7 +343,16 @@ : \ : "r"( r0 ) \ : "$t2" ) - + +#define gte_ldsvlcrow0( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "ctc2 $12, $16;" \ + "ctc2 $13, $17;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + #define gte_SetColorMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -149,44 +367,153 @@ : \ : "r"( r0 ) \ : "$t2" ) - -#define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \ - "sll $t0, %0, 4;" \ - "sll $t1, %1, 4;" \ - "sll $t2, %2, 4;" \ - "ctc2 $t0, $13;" \ - "ctc2 $t1, $14;" \ - "ctc2 $t2, $15;" \ + +#define gte_SetTransMatrix( r0 ) __asm__ volatile ( \ + "lw $t0, 20( %0 );" \ + "lw $t1, 24( %0 );" \ + "ctc2 $t0, $5;" \ + "lw $t2, 28( %0 );" \ + "ctc2 $t1, $6;" \ + "ctc2 $t2, $7;" \ : \ - : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ - : "$t0", "$t1", "$t2" ) - -/* - * GTE store macros - */ - -#define gte_otz( r0 ) __asm__ volatile ( \ - "swc2 $7, 0( %0 );" \ + : "r"( r0 ) \ + : "$t2" ) + +#define gte_ldtr( r0, r1, r2 ) __asm__ volatile ( \ + "ctc2 %0, $5;" \ + "ctc2 %1, $6;" \ + "ctc2 %2, $7;" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) + +#define gte_SetTransVector( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "lw $14, 8( %0 );" \ + "ctc2 $12, $5;" \ + "ctc2 $13, $6;" \ + "ctc2 $14, $7;" \ : \ : "r"( r0 ) \ - : "memory" ) - -#define gte_stflg( r0 ) __asm__ volatile ( \ - "cfc2 $t0, $31;" \ - "nop;" \ - "sw $t0, 0( %0 );" \ + : "$12", "$13", "$14" ) + +#define gte_ld_intpol_uv0( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ : \ : "r"( r0 ) \ - : "memory" ) - + : "$12", "$13" ) + +#define gte_ld_intpol_uv1( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +#define gte_ld_intpol_bv0( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +#define gte_ld_intpol_bv1( r0 ) __asm__ volatile ( \ + "lbu $12, 0( %0 );" \ + "lbu $13, 1( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13" ) + +#define gte_ld_intpol_sv0( r0 ) __asm__ volatile ( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ld_intpol_sv1( r0 ) __asm__ volatile ( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldfc( r0 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 4( %0 );" \ + "lw $14, 8( %0 );" \ + "ctc2 $12, $21;" \ + "ctc2 $13, $22;" \ + "ctc2 $14, $23;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldopv2SV( r0 ) __asm__ volatile ( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "lh $14, 4( %0 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "mtc2 $14, $11;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +#define gte_ldopv1SV( r0 ) __asm__ volatile ( \ + "lh $12, 0( %0 );" \ + "lh $13, 2( %0 );" \ + "ctc2 $12, $0;" \ + "lh $14, 4( %0 );" \ + "ctc2 $13, $2;" \ + "ctc2 $14, $4;" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14" ) + +/* GTE store macros */ + #define gte_stsxy( r0 ) __asm__ volatile ( \ "swc2 $14, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - -#define gte_stsxy0( r0 ) __asm__ volatile ( \ + +#define gte_stsxy3( r0, r1, r2 ) __asm__ volatile ( \ "swc2 $12, 0( %0 );" \ + "swc2 $13, 0( %1 );" \ + "swc2 $14, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "memory" ) + +#define gte_stsxy3c( r0 ) __asm__ volatile ( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 4( %0 );" \ + "swc2 $14, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsxy2( r0 ) __asm__ volatile ( \ + "swc2 $14, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) @@ -197,140 +524,526 @@ : "r"( r0 ) \ : "memory" ) -#define gte_stsxy2( r0 ) __asm__ volatile ( \ - "swc2 $14, 0( %0 );" \ +#define gte_stsxy0( r0 ) __asm__ volatile ( \ + "swc2 $12, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3( r0, r1, r2 ) __asm__ volatile ( \ +#define gte_stsxy01( r0, r1 ) __asm__ volatile ( \ "swc2 $12, 0( %0 );" \ "swc2 $13, 0( %1 );" \ - "swc2 $14, 0( %2 );" \ : \ - : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "r"( r0 ), "r"( r1 ) \ + : "memory" ) + +#define gte_stsxy01c( r0 ) __asm__ volatile ( \ + "swc2 $12, 0( %0 );" \ + "swc2 $13, 4( %0 );" \ + : \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_f3( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_f3( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 12( %0 );" \ - "swc2 $14, 16( %0 )" \ + "swc2 $14, 16( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_g3( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_g3( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 16( %0 );" \ - "swc2 $14, 24( %0 )" \ + "swc2 $14, 24( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_ft3( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_ft3( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 16( %0 );" \ - "swc2 $14, 24( %0 )" \ + "swc2 $14, 24( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_gt3( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_gt3( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 20( %0 );" \ - "swc2 $14, 32( %0 )" \ + "swc2 $14, 32( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_f4( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_f4( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 12( %0 );" \ - "swc2 $14, 16( %0 )" \ + "swc2 $14, 16( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_g4( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_g4( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 16( %0 );" \ - "swc2 $14, 24( %0 )" \ + "swc2 $14, 24( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_ft4( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_ft4( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 16( %0 );" \ - "swc2 $14, 24( %0 )" \ + "swc2 $14, 24( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) -#define gte_stsxy3_gt4( r0 ) __asm__ volatile ( \ +#define gte_stsxy3_gt4( r0 ) __asm__ volatile ( \ "swc2 $12, 8( %0 );" \ "swc2 $13, 20( %0 );" \ - "swc2 $14, 32( %0 )" \ + "swc2 $14, 32( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) +#define gte_stdp( r0 ) __asm__ volatile ( \ + "swc2 $8, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stflg( r0 ) __asm__ volatile ( \ + "cfc2 $12, $31;" \ + "nop;" \ + "sw $12, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +#define gte_stflg_4( r0 ) __asm__ volatile ( \ + "cfc2 $12, $31;" \ + "addi $13, $0, 4;" \ + "sll $13, $13, 16;" \ + "and $12, $12, $13;" \ + "sw $12, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "memory" ) + #define gte_stsz( r0 ) __asm__ volatile ( \ "swc2 $19, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - + +#define gte_stsz3( r0, r1, r2 ) __asm__ volatile ( \ + "swc2 $17, 0( %0 );" \ + "swc2 $18, 0( %1 );" \ + "swc2 $19, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "memory" ) + +#define gte_stsz4( r0, r1, r2, r3 ) __asm__ volatile ( \ + "swc2 $16, 0( %0 );" \ + "swc2 $17, 0( %1 );" \ + "swc2 $18, 0( %2 );" \ + "swc2 $19, 0( %3 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ), "r"( r3 ) \ + : "memory" ) + +#define gte_stsz3c( r0 ) __asm__ volatile ( \ + "swc2 $17, 0( %0 );" \ + "swc2 $18, 4( %0 );" \ + "swc2 $19, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsz4c( r0 ) __asm__ volatile ( \ + "swc2 $16, 0( %0 );" \ + "swc2 $17, 4( %0 );" \ + "swc2 $18, 8( %0 );" \ + "swc2 $19, 12( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stszotz( r0 ) __asm__ volatile ( \ + "mfc2 $12, $19;" \ + "nop;" \ + "sra $12, $12, 2;" \ + "sw $12, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + #define gte_stotz( r0 ) __asm__ volatile ( \ "swc2 $7, 0( %0 );" \ : \ - : "r"( r0 ) \ + : "r"( r0 ) \ : "memory" ) - + #define gte_stopz( r0 ) __asm__ volatile ( \ "swc2 $24, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - + +#define gte_stlvl( r0 ) __asm__ volatile ( \ + "swc2 $9, 0( %0 );" \ + "swc2 $10, 4( %0 );" \ + "swc2 $11, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stlvnl( r0 ) __asm__ volatile ( \ + "swc2 $25, 0( %0 );" \ + "swc2 $26, 4( %0 );" \ + "swc2 $27, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stlvnl0( r0 ) __asm__ volatile ( \ + "swc2 $25, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stlvnl1( r0 ) __asm__ volatile ( \ + "swc2 $26, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stlvnl2( r0 ) __asm__ volatile ( \ + "swc2 $27, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stsv( r0 ) __asm__ volatile ( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sh $12, 0( %0 );" \ + "sh $13, 2( %0 );" \ + "sh $14, 4( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_stclmv( r0 ) __asm__ volatile ( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sh $12, 0( %0 );" \ + "sh $13, 6( %0 );" \ + "sh $14, 12( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_stbv( r0 ) __asm__ volatile ( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sb $12, 0( %0 );" \ + "sb $13, 1( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "memory" ) + +#define gte_stcv( r0 ) __asm__ volatile ( \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "mfc2 $14, $11;" \ + "sb $12, 0( %0 );" \ + "sb $13, 1( %0 );" \ + "sb $14, 2( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + #define gte_strgb( r0 ) __asm__ volatile ( \ "swc2 $22, 0( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - + #define gte_strgb3( r0, r1, r2 ) __asm__ volatile ( \ "swc2 $20, 0( %0 );" \ "swc2 $21, 0( %1 );" \ "swc2 $22, 0( %2 );" \ : \ - : "r"( r0 ), "r"( r1 ), "r" ( r2 ) \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ : "memory" ) -#define gte_stsv( r0 ) __asm__ volatile ( \ - "mfc2 $t0, $9;" \ - "mfc2 $t1, $10;" \ - "mfc2 $t2, $11;" \ - "sh $t0, 0( %0 );" \ - "sh $t1, 2( %0 );" \ - "sh $t2, 4( %0 );" \ +#define gte_strgb3_g3( r0 ) __asm__ volatile ( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 12( %0 );" \ + "swc2 $22, 20( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - -#define gte_stlvnl( r0 ) __asm__ volatile ( \ - "swc2 $25, 0( %0 );" \ - "swc2 $26, 4( %0 );" \ - "swc2 $27, 8( %0 );" \ + +#define gte_strgb3_gt3( r0 ) __asm__ volatile ( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 16( %0 );" \ + "swc2 $22, 28( %0 );" \ : \ : "r"( r0 ) \ : "memory" ) - - -/* - * GTE operation macros (does not need a stupid tool such as dmpsx!) - */ - + +#define gte_strgb3_g4( r0 ) __asm__ volatile ( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 12( %0 );" \ + "swc2 $22, 20( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_strgb3_gt4( r0 ) __asm__ volatile ( \ + "swc2 $20, 4( %0 );" \ + "swc2 $21, 16( %0 );" \ + "swc2 $22, 28( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_ReadGeomOffset( r0, r1 ) __asm__ volatile ( \ + "cfc2 $12, $24;" \ + "cfc2 $13, $25;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "sw $12, 0( %0 );" \ + "sw $13, 0( %1 );" \ + : \ + : "r"( r0 ), "r"( r1 ) \ + : "$12", "$13", "memory" ) + +#define gte_ReadGeomScreen( r0 ) __asm__ volatile ( \ + "cfc2 $12, $26;" \ + "nop;" \ + "sw $12, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "memory" ) + +#define gte_ReadRotMatrix( r0 ) __asm__ volatile ( \ + "cfc2 $12, $0;" \ + "cfc2 $13, $1;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $2;" \ + "cfc2 $13, $3;" \ + "cfc2 $14, $4;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $5;" \ + "cfc2 $13, $6;" \ + "cfc2 $14, $7;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_sttr( r0 ) __asm__ volatile ( \ + "cfc2 $12, $5;" \ + "cfc2 $13, $6;" \ + "cfc2 $14, $7;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "sw $14, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_ReadLightMatrix( r0 ) __asm__ volatile ( \ + "cfc2 $12, $8;" \ + "cfc2 $13, $9;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $10;" \ + "cfc2 $13, $11;" \ + "cfc2 $14, $12;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $13;" \ + "cfc2 $13, $14;" \ + "cfc2 $14, $15;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_ReadColorMatrix( r0 ) __asm__ volatile ( \ + "cfc2 $12, $16;" \ + "cfc2 $13, $17;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "cfc2 $12, $18;" \ + "cfc2 $13, $19;" \ + "cfc2 $14, $20;" \ + "sw $12, 8( %0 );" \ + "sw $13, 12( %0 );" \ + "sw $14, 16( %0 );" \ + "cfc2 $12, $21;" \ + "cfc2 $13, $22;" \ + "cfc2 $14, $23;" \ + "sw $12, 20( %0 );" \ + "sw $13, 24( %0 );" \ + "sw $14, 28( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_stlzc( r0 ) __asm__ volatile ( \ + "swc2 $31, 0( %0 );" \ + : \ + : "r"( r0 ) \ + : "memory" ) + +#define gte_stfc( r0 ) __asm__ volatile ( \ + "cfc2 $12, $21;" \ + "cfc2 $13, $22;" \ + "cfc2 $14, $23;" \ + "sw $12, 0( %0 );" \ + "sw $13, 4( %0 );" \ + "sw $14, 8( %0 );" \ + : \ + : "r"( r0 ) \ + : "$12", "$13", "$14", "memory" ) + +#define gte_mvlvtr() __asm__ volatile ( \ + "mfc2 $12, $25;" \ + "mfc2 $13, $26;" \ + "mfc2 $14, $27;" \ + "ctc2 $12, $5;" \ + "ctc2 $13, $6;" \ + "ctc2 $14, $7;" \ + : \ + : \ + : "$12", "$13", "$14" ) + +/*#define gte_nop() __asm__ volatile ( \ + "nop;" )*/ + +#define gte_subdvl( r0, r1, r2 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "subu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sw $15, 4( %2 );" \ + "subu $12, $12, $13;" \ + "sw $12, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "$13", "$14", "$15", "memory" ) + +#define gte_subdvd( r0, r1, r2 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "subu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sh $15, 2( %2 );" \ + "subu $12, $12, $13;" \ + "sh $12, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "$13", "$14", "$15", "memory" ) + +#define gte_adddvl( r0, r1, r2 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "addu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sw $15, 4( %2 );" \ + "addu $12, $12, $13;" \ + "sw $12, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "$13", "$14", "$15", "memory" ) + +#define gte_adddvd( r0, r1, r2 ) __asm__ volatile ( \ + "lw $12, 0( %0 );" \ + "lw $13, 0( %1 );" \ + "mtc2 $12, $9;" \ + "mtc2 $13, $10;" \ + "sra $12, $12, 16;" \ + "sra $13, $13, 16;" \ + "addu $15, $12, $13;" \ + "mfc2 $12, $9;" \ + "mfc2 $13, $10;" \ + "sh $15, 2( %2 );" \ + "addu $12, $12, $13;" \ + "sh $12, 0( %2 );" \ + : \ + : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ + : "$12", "$13", "$14", "$15", "memory" ) + +#define gte_FlipRotMatrixX() __asm__ volatile ( \ + "cfc2 $12, $0;" \ + "cfc2 $13, $1;" \ + "sll $14, $12, 16;" \ + "sra $14, $14, 16;" \ + "subu $14, $0, $14;" \ + "sra $15, $12, 16;" \ + "subu $15, $0, $15;" \ + "sll $15, $15, 16;" \ + "sll $14, $14, 16;" \ + "srl $14, $14, 16;" \ + "or $14, $14, $15;" \ + "ctc2 $14, $0;" \ + "sll $14, $13, 16;" \ + "sra $14, $14, 16;" \ + "subu $14, $0, $14;" \ + "sra $15, $13, 16;" \ + "sll $15, $15, 16;" \ + "sll $14, $14, 16;" \ + "srl $14, $14, 16;" \ + "or $14, $14, $15;" \ + "ctc2 $14, $1;" \ + : \ + : \ + : "$12", "$13", "$14", "$15" ) + +#define gte_FlipTRX() __asm__ volatile ( \ + "cfc2 $12, $5;" \ + "nop;" \ + "subu $12, $0, $12;" \ + "ctc2 $12, $5;" \ + : \ + : \ + : "$12" ) + +/* GTE operation macros */ + #define gte_rtps() __asm__ volatile ( \ "nop;" \ "nop;" \ @@ -340,169 +1053,405 @@ "nop;" \ "nop;" \ "cop2 0x0280030;" ) - -#define gte_nclip() __asm__ volatile ( \ + +#define gte_rt() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x1400006;" ) - -#define gte_avsz3() __asm__ volatile ( \ + "cop2 0x0480012;" ) + +#define gte_rtv0() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x158002D;" ) - -#define gte_avsz4() __asm__ volatile ( \ + "cop2 0x0486012;" ) + +#define gte_rtv1() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x168002E;" ) - -#define gte_sqr0() __asm__ volatile ( \ + "cop2 0x048E012;" ) + +#define gte_rtv2() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0A00428;" ) - -#define gte_sqr12() __asm__ volatile ( \ + "cop2 0x0496012;" ) + +#define gte_rtir() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0A80428;" ) - -#define gte_op0() __asm__ volatile ( \ + "cop2 0x049E012;" ) + +#define gte_rtir_sf0() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x170000C;" ) + "cop2 0x041E012;" ) -#define gte_op12() __asm__ volatile ( \ +#define gte_rtv0tr() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x178000C;" ) - -#define gte_ncs() __asm__ volatile ( \ + "cop2 0x0480012;" ) + +#define gte_rtv1tr() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0C8041E;" ) - -#define gte_nct() __asm__ volatile ( \ + "cop2 0x0488012;" ) + +#define gte_rtv2tr() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0D80420;" ) - -#define gte_nccs() __asm__ volatile ( \ + "cop2 0x0490012;" ) + +#define gte_rtirtr() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x108041B;" ) \ - -#define gte_ncct() __asm__ volatile ( \ + "cop2 0x0498012;" ) + +#define gte_rtv0bk() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x118043F;" ) - -#define gte_ncds() __asm__ volatile ( \ + "cop2 0x0482012;" ) + +#define gte_rtv1bk() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0E80413;" ) - -#define gte_ncdt() __asm__ volatile ( \ + "cop2 0x048A012;" ) + +#define gte_rtv2bk() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0F80416;" ) - -#define gte_cc() __asm__ volatile ( \ + "cop2 0x0492012;" ) + +#define gte_rtirbk() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x138041C;" ) - -#define gte_cdp() __asm__ volatile ( \ + "cop2 0x049A012;" ) + +#define gte_ll() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04A6412;" ) + +#define gte_llv0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04A6012;" ) + +#define gte_llv1() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04AE012;" ) + +#define gte_llv2() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04B6012;" ) + +#define gte_llir() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04BE012;" ) + +#define gte_llv0tr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04A0012;" ) + +#define gte_llv1tr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04A8012;" ) + +#define gte_llv2tr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04B0012;" ) + +#define gte_llirtr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04B8012;" ) + +#define gte_llv0bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04A2012;" ) + +#define gte_llv1bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04AA012;" ) + +#define gte_llv2bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04B2012;" ) + +#define gte_llirbk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04BA012;" ) + + +#define gte_lc() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04DA412;" ) + +#define gte_lcv0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04C6012;" ) + +#define gte_lcv1() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04CE012;" ) + +#define gte_lcv2() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04D6012;" ) + +#define gte_lcir() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x1280414;" ) - -#define gte_dcpl() __asm__ volatile ( \ + "cop2 0x04DE012;" ) + +#define gte_lcv0tr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04C0012;" ) + +#define gte_lcv1tr() __asm__ volatile ( \ + "nop;" \ "nop;" \ + "cop2 0x04C8012;" ) + +#define gte_lcv2tr() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04D0012;" ) + +#define gte_lcirtr() __asm__ volatile ( \ "nop;" \ - "cop2 0x0680029;" ) - + "nop;" \ + "cop2 0x04D8012;" ) + +#define gte_lcv0bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04C2012;" ) + +#define gte_lcv1bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04CA012;" ) + +#define gte_lcv2bk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04D2012;" ) + +#define gte_lcirbk() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x04DA012;" ) + +#define gte_dpcl() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0680029;" ) + #define gte_dpcs() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0780010;" ) - + "cop2 0x0780010;" ) + #define gte_dpct() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0180001;" ) + "cop2 0x0F8002A;" ) #define gte_intpl() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x0980011;" ) + "cop2 0x0980011;" ) -#define gte_gpf0() __asm__ volatile ( \ +#define gte_sqr12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0A80428;" ) + +#define gte_sqr0() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x190003D;" ) + "cop2 0x0A00428;" ) + +#define gte_ncs() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0C8041E;" ) + +#define gte_nct() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0D80420;" ) + +#define gte_ncds() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0E80413;" ) + +#define gte_ncdt() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0F80416;" ) + +#define gte_nccs() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0108041B;" ) + +#define gte_ncct() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0118043F;" ) + +#define gte_cdp() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x01280414;" ) + +#define gte_cc() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0138041C;" ) + +#define gte_nclip() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x01400006;" ) + +#define gte_avsz3() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0158002D;" ) + +#define gte_avsz4() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0168002E;" ) + +#define gte_op12() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0178000C;" ) + +#define gte_op0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x0170000C;" ) #define gte_gpf12() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x198003D;" ) - -#define gte_gpl0() __asm__ volatile ( \ + "cop2 0x0198003D;" ) + +#define gte_gpf0() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x1A0003E;" ) + "cop2 0x0190003D;" ) #define gte_gpl12() __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 0x1A8003E;" ) + "cop2 0x01A8003E;" ) + +#define gte_gpl0() __asm__ volatile ( \ + "nop;" \ + "nop;" \ + "cop2 0x01A0003E;" ) #define gte_mvmva_core( r0 ) __asm__ volatile ( \ "nop;" \ "nop;" \ - "cop2 %0" \ + "cop2 %0;" \ : \ : "g"( r0 ) ) - + #define gte_mvmva(sf, mx, v, cv, lm) gte_mvmva_core( 0x0400012 | \ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) - - -/* - * GTE operation macros without leading nops - * - * Checking assembler output when using these is advised. - */ + +/* GTE operation macros without leading nops */ -#define gte_rtps_b() __asm__ volatile ( "cop2 0x0180001;" ) -#define gte_rtpt_b() __asm__ volatile ( "cop2 0x0280030;" ) -#define gte_nclip_b() __asm__ volatile ( "cop2 0x1400006;" ) -#define gte_avsz3_b() __asm__ volatile ( "cop2 0x158002D;" ) -#define gte_avsz4_b() __asm__ volatile ( "cop2 0x168002E;" ) -#define gte_sqr0_b() __asm__ volatile ( "cop2 0x0A00428;" ) -#define gte_sqr12_b() __asm__ volatile ( "cop2 0x0A80428;" ) -#define gte_op0_b() __asm__ volatile ( "cop2 0x170000C;" ) -#define gte_op12_b() __asm__ volatile ( "cop2 0x178000C;" ) -#define gte_ncs_b() __asm__ volatile ( "cop2 0x0C8041E;" ) -#define gte_nct_b() __asm__ volatile ( "cop2 0x0D80420;" ) -#define gte_nccs_b() __asm__ volatile ( "cop2 0x108041B;" ) -#define gte_ncct_b() __asm__ volatile ( "cop2 0x118043F;" ) -#define gte_ncds_b() __asm__ volatile ( "cop2 0x0E80413;" ) -#define gte_ncdt_b() __asm__ volatile ( "cop2 0x0F80416;" ) -#define gte_cc_b() __asm__ volatile ( "cop2 0x138041C;" ) -#define gte_cdp_b() __asm__ volatile ( "cop2 0x1280414;" ) -#define gte_dcpl_b() __asm__ volatile ( "cop2 0x0680029;" ) -#define gte_dpcs_b() __asm__ volatile ( "cop2 0x0780010;" ) -#define gte_dpct_b() __asm__ volatile ( "cop2 0x0180001;" ) -#define gte_intpl_b() __asm__ volatile ( "cop2 0x0980011;" ) -#define gte_gpf0_b() __asm__ volatile ( "cop2 0x190003D;" ) -#define gte_gpf12_b() __asm__ volatile ( "cop2 0x198003D;" ) -#define gte_gpl0_b() __asm__ volatile ( "cop2 0x1A0003E;" ) -#define gte_gpl12_b() __asm__ volatile ( "cop2 0x1A8003E;" ) +#define gte_rtps_b() __asm__ volatile ( "cop2 0x0180001;" ) +#define gte_rtpt_b() __asm__ volatile ( "cop2 0x0280030;" ) +#define gte_rt_b() __asm__ volatile ( "cop2 0x0480012;" ) +#define gte_rtv0_b() __asm__ volatile ( "cop2 0x0486012;" ) +#define gte_rtv1_b() __asm__ volatile ( "cop2 0x048E012;" ) +#define gte_rtv2_b() __asm__ volatile ( "cop2 0x0496012;" ) +#define gte_rtir_b() __asm__ volatile ( "cop2 0x049E012;" ) +#define gte_rtir_sf0_b() __asm__ volatile ( "cop2 0x041E012;" ) +#define gte_rtv0tr_b() __asm__ volatile ( "cop2 0x0480012;" ) +#define gte_rtv1tr_b() __asm__ volatile ( "cop2 0x0488012;" ) +#define gte_rtv2tr_b() __asm__ volatile ( "cop2 0x0490012;" ) +#define gte_rtirtr_b() __asm__ volatile ( "cop2 0x0498012;" ) +#define gte_rtv0bk_b() __asm__ volatile ( "cop2 0x0482012;" ) +#define gte_rtv1bk_b() __asm__ volatile ( "cop2 0x048A012;" ) +#define gte_rtv2bk_b() __asm__ volatile ( "cop2 0x0492012;" ) +#define gte_rtirbk_b() __asm__ volatile ( "cop2 0x049A012;" ) +#define gte_ll_b() __asm__ volatile ( "cop2 0x04A6412;" ) +#define gte_llv0_b() __asm__ volatile ( "cop2 0x04A6012;" ) +#define gte_llv1_b() __asm__ volatile ( "cop2 0x04AE012;" ) +#define gte_llv2_b() __asm__ volatile ( "cop2 0x04B6012;" ) +#define gte_llir_b() __asm__ volatile ( "cop2 0x04BE012;" ) +#define gte_llv0tr_b() __asm__ volatile ( "cop2 0x04A0012;" ) +#define gte_llv1tr_b() __asm__ volatile ( "cop2 0x04A8012;" ) +#define gte_llv2tr_b() __asm__ volatile ( "cop2 0x04B0012;" ) +#define gte_llirtr_b() __asm__ volatile ( "cop2 0x04B8012;" ) +#define gte_llv0bk_b() __asm__ volatile ( "cop2 0x04A2012;" ) +#define gte_llv1bk_b() __asm__ volatile ( "cop2 0x04AA012;" ) +#define gte_llv2bk_b() __asm__ volatile ( "cop2 0x04B2012;" ) +#define gte_llirbk_b() __asm__ volatile ( "cop2 0x04BA012;" ) +#define gte_lc_b() __asm__ volatile ( "cop2 0x04DA412;" ) +#define gte_lcv0_b() __asm__ volatile ( "cop2 0x04C6012;" ) +#define gte_lcv1_b() __asm__ volatile ( "cop2 0x04CE012;" ) +#define gte_lcv2_b() __asm__ volatile ( "cop2 0x04D6012;" ) +#define gte_lcir_b() __asm__ volatile ( "cop2 0x04DE012;" ) +#define gte_lcv0tr_b() __asm__ volatile ( "cop2 0x04C0012;" ) +#define gte_lcv1tr_b() __asm__ volatile ( "cop2 0x04C8012;" ) +#define gte_lcv2tr_b() __asm__ volatile ( "cop2 0x04D0012;" ) +#define gte_lcirtr_b() __asm__ volatile ( "cop2 0x04D8012;" ) +#define gte_lcv0bk_b() __asm__ volatile ( "cop2 0x04C2012;" ) +#define gte_lcv1bk_b() __asm__ volatile ( "cop2 0x04CA012;" ) +#define gte_lcv2bk_b() __asm__ volatile ( "cop2 0x04D2012;" ) +#define gte_lcirbk_b() __asm__ volatile ( "cop2 0x04DA012;" ) +#define gte_dpcl_b() __asm__ volatile ( "cop2 0x0680029;" ) +#define gte_dpcs_b() __asm__ volatile ( "cop2 0x0780010;" ) +#define gte_dpct_b() __asm__ volatile ( "cop2 0x0F8002A;" ) +#define gte_intpl_b() __asm__ volatile ( "cop2 0x0980011;" ) +#define gte_sqr12_b() __asm__ volatile ( "cop2 0x0A80428;" ) +#define gte_sqr0_b() __asm__ volatile ( "cop2 0x0A00428;" ) +#define gte_ncs_b() __asm__ volatile ( "cop2 0x0C8041E;" ) +#define gte_nct_b() __asm__ volatile ( "cop2 0x0D80420;" ) +#define gte_ncds_b() __asm__ volatile ( "cop2 0x0E80413;" ) +#define gte_ncdt_b() __asm__ volatile ( "cop2 0x0F80416;" ) +#define gte_nccs_b() __asm__ volatile ( "cop2 0x0108041B;" ) +#define gte_ncct_b() __asm__ volatile ( "cop2 0x0118043F;" ) +#define gte_cdp_b() __asm__ volatile ( "cop2 0x01280414;" ) +#define gte_cc_b() __asm__ volatile ( "cop2 0x0138041C;" ) +#define gte_nclip_b() __asm__ volatile ( "cop2 0x01400006;" ) +#define gte_avsz3_b() __asm__ volatile ( "cop2 0x0158002D;" ) +#define gte_avsz4_b() __asm__ volatile ( "cop2 0x0168002E;" ) +#define gte_op12_b() __asm__ volatile ( "cop2 0x0178000C;" ) +#define gte_op0_b() __asm__ volatile ( "cop2 0x0170000C;" ) +#define gte_gpf12_b() __asm__ volatile ( "cop2 0x0198003D;" ) +#define gte_gpf0_b() __asm__ volatile ( "cop2 0x0190003D;" ) +#define gte_gpl12_b() __asm__ volatile ( "cop2 0x01A8003E;" ) +#define gte_gpl0_b() __asm__ volatile ( "cop2 0x01A0003E;" ) #define gte_mvmva_core_b( r0 ) __asm__ volatile ( \ - "cop2 %0" \ + "cop2 %0;" \ : \ - : "g"( r0 ) ) + : "g"( r0 ) ) #define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) - -#endif // _INLINE_C_H
\ No newline at end of file + +#endif // _INLINE_C_H diff --git a/libpsn00b/include/inline_s.h b/libpsn00b/include/inline_s.inc index 68e0d07..68e0d07 100644 --- a/libpsn00b/include/inline_s.h +++ b/libpsn00b/include/inline_s.inc diff --git a/libpsn00b/include/lzp/lzp.h b/libpsn00b/include/lzp/lzp.h index cfeeb72..456de02 100644 --- a/libpsn00b/include/lzp/lzp.h +++ b/libpsn00b/include/lzp/lzp.h @@ -16,7 +16,7 @@ #ifndef _LZPACK_H #define _LZPACK_H -#include <sys/types.h> +#include <stdint.h> #ifdef _WIN32 #include <windows.h> #endif @@ -64,9 +64,9 @@ typedef struct { //! File ID (must always be 'LZP') - char id[3]; + char id[3]; //! File count - u_char numFiles; + uint8_t numFiles; } LZP_HEAD; @@ -74,15 +74,15 @@ typedef struct { typedef struct { //! File name - char fileName[16]; + char fileName[16]; //! CRC32 checksum of file - u_int crc; + uint32_t crc; //! Original size of file in bytes - u_int fileSize; + uint32_t fileSize; //! Compressed size of file - u_int packedSize; + uint32_t packedSize; //! File data offset - u_int offset; + uint32_t offset; } LZP_FILE; @@ -162,7 +162,7 @@ void lzResetHashSizes(); * * \returns CRC16 hash of specified buffer. */ -unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc); +uint16_t lzCRC16(const void* buff, int bytes, uint16_t crc); /*! Calculates a CRC32 hash of the specified buffer. * @@ -172,7 +172,7 @@ unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc); * * \returns CRC32 hash of specified buffer. */ -unsigned int lzCRC32(const void* buff, int bytes, unsigned int crc); +uint32_t lzCRC32(const void* buff, int bytes, uint32_t crc); /*! @} */ diff --git a/libpsn00b/include/lzp/lzqlp.h b/libpsn00b/include/lzp/lzqlp.h index 5b70b40..32ce0d7 100644 --- a/libpsn00b/include/lzp/lzqlp.h +++ b/libpsn00b/include/lzp/lzqlp.h @@ -1,7 +1,7 @@ #ifndef _QLP_H #define _QLP_H -#include <sys/types.h> +#include <stdint.h> #ifdef _WIN32 #include <windows.h> #endif @@ -13,14 +13,14 @@ #define PACK_ERR_READ_FAULT -4 typedef struct { - char id[3]; - u_char numfiles; + char id[3]; + uint8_t numfiles; } QLP_HEAD; typedef struct { - char name[16]; - u_int size; - u_int offs; + char name[16]; + uint32_t size; + uint32_t offs; } QLP_FILE; int qlpFileCount(const QLP_HEAD* qlpfile); diff --git a/libpsn00b/include/psxapi.h b/libpsn00b/include/psxapi.h index 1298d29..5d1097e 100644 --- a/libpsn00b/include/psxapi.h +++ b/libpsn00b/include/psxapi.h @@ -1,5 +1,15 @@ -#ifndef __PSXAPI__ -#define __PSXAPI__ +/* + * PSn00bSDK kernel API library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ + +#ifndef __PSXAPI_H +#define __PSXAPI_H + +#include <stdint.h> +#include <stddef.h> + +/* Definitions */ #define DescHW 0xf0000000 #define DescSW 0xf4000000 @@ -32,29 +42,31 @@ #define RCntMdFR 0x0000 #define RCntMdGATE 0x0010 +/* Structure definitions */ + typedef struct { // Thread control block - int status; - int mode; + int status; + int mode; union { - unsigned int reg[37]; + uint32_t reg[37]; struct { - unsigned int zero, at; - unsigned int v0, v1; - unsigned int a0, a1, a2, a3; - unsigned int t0, t1, t2, t3, t4, t5, t6, t7; - unsigned int s0, s1, s2, s3, s4, s5, s6, s7; - unsigned int t8, t9; - unsigned int k0, k1; - unsigned int gp, sp, fp, ra; - - unsigned int cop0r14; - unsigned int hi; - unsigned int lo; - unsigned int cop0r12; - unsigned int cop0r13; + uint32_t zero, at; + uint32_t v0, v1; + uint32_t a0, a1, a2, a3; + uint32_t t0, t1, t2, t3, t4, t5, t6, t7; + uint32_t s0, s1, s2, s3, s4, s5, s6, s7; + uint32_t t8, t9; + uint32_t k0, k1; + uint32_t gp, sp, fp, ra; + + uint32_t cop0r14; + uint32_t hi; + uint32_t lo; + uint32_t cop0r12; + uint32_t cop0r13; }; }; - int _reserved[9]; + int _reserved[9]; } TCB; typedef struct { // Process control block @@ -85,17 +97,17 @@ typedef struct { // Device control block } DCB; typedef struct { // File control block - int status; - unsigned int diskid; - void *trns_addr; - unsigned int trns_len; - unsigned int filepos; - unsigned int flags; - unsigned int lasterr; - DCB *dcb; - unsigned int filesize; - unsigned int lba; - unsigned int fcbnum; + int status; + uint32_t diskid; + void *trns_addr; + uint32_t trns_len; + uint32_t filepos; + uint32_t flags; + uint32_t lasterr; + DCB *dcb; + uint32_t filesize; + uint32_t lba; + uint32_t fcbnum; } FCB; struct DIRENTRY { // Directory entry @@ -108,34 +120,31 @@ struct DIRENTRY { // Directory entry }; struct EXEC { - unsigned int pc0; - unsigned int gp0; - unsigned int t_addr; - unsigned int t_size; - unsigned int d_addr; - unsigned int d_size; - unsigned int b_addr; - unsigned int b_size; - unsigned int s_addr; - unsigned int s_size; - unsigned int sp,fp,rp,ret,base; + uint32_t pc0, gp0; + uint32_t t_addr, t_size; + uint32_t d_addr, d_size; + uint32_t b_addr, b_size; + uint32_t s_addr, s_size; + uint32_t sp, fp, rp, ret, base; }; struct JMP_BUF { - unsigned int ra, sp, fp; - unsigned int s0, s1, s2, s3, s4, s5, s6, s7; - unsigned int gp; + uint32_t ra, sp, fp; + uint32_t s0, s1, s2, s3, s4, s5, s6, s7; + uint32_t gp; }; // Not recommended to use these functions to install IRQ handlers typedef struct { - unsigned int* next; - unsigned int* func2; - unsigned int* func1; - unsigned int pad; + uint32_t *next; + uint32_t *func2; + uint32_t *func1; + int _reserved; } INT_RP; +/* API */ + #ifdef __cplusplus extern "C" { #endif @@ -143,30 +152,27 @@ extern "C" { void SysEnqIntRP(int pri, INT_RP *rp); void SysDeqIntRP(int pri, INT_RP *rp); -// Event handler stuff - -int OpenEvent(unsigned int cl, int spec, int mode, void (*func)()); -int CloseEvent(int ev_desc); -int EnableEvent(int ev_desc); -int DisableEvent(int ev_desc); - -// BIOS file functions +int OpenEvent(uint32_t cl, uint32_t spec, int mode, void (*func)()); +int CloseEvent(int event); +int WaitEvent(int event); +int TestEvent(int event); +int EnableEvent(int event); +int DisableEvent(int event); +void DeliverEvent(uint32_t cl, uint32_t spec); +void UnDeliverEvent(uint32_t cl, uint32_t spec); int open(const char *name, int mode); int close(int fd); -int seek(int fd, unsigned int offset, int mode); -int read(int fd, char *buff, unsigned int len); -int write(int fd, const char *buff, unsigned int len); +int seek(int fd, uint32_t offset, int mode); +int read(int fd, uint8_t *buff, size_t len); +int write(int fd, const uint8_t *buff, size_t len); int ioctl(int fd, int cmd, int arg); struct DIRENTRY *firstfile(const char *wildcard, struct DIRENTRY *entry); struct DIRENTRY *nextfile(struct DIRENTRY *entry); int erase(const char *name); int chdir(const char *path); -//#define delete( p ) erase( p ) // May conflict with delete operator in C++ -#define cd( p ) chdir( p ) // For compatibility - -// BIOS device functions +//#define cd(p) chdir(p) int AddDev(DCB *dcb); int DelDev(const char *name); @@ -175,18 +181,17 @@ void AddDummyTty(void); void EnterCriticalSection(void); void ExitCriticalSection(void); +void SwEnterCriticalSection(void); +void SwExitCriticalSection(void); -// BIOS CD functions void _InitCd(void); void _96_init(void); void _96_remove(void); -// BIOS pad functions -void InitPAD(char *buff1, int len1, char *buff2, int len2); +void InitPAD(uint8_t *buff1, int len1, uint8_t *buff2, int len2); void StartPAD(void); void StopPAD(void); -// BIOS memory card functions void InitCARD(int pad_enable); void StartCARD(void); void StopCARD(void); @@ -198,32 +203,32 @@ int _card_status(int chan); int _card_wait(int chan); int _card_clear(int chan); int _card_chan(void); -int _card_read(int chan, int sector, unsigned char *buf); -int _card_write(int chan, int sector, unsigned char *buf); +int _card_read(int chan, int sector, uint8_t *buf); +int _card_write(int chan, int sector, uint8_t *buf); void _new_card(void); -// Timers -int SetRCnt(int spec, unsigned short target, int mode); +int SetRCnt(int spec, uint16_t target, int mode); int GetRCnt(int spec); int StartRCnt(int spec); int StopRCnt(int spec); int ResetRCnt(int spec); -// BIOS IRQ acknowledge control void ChangeClearPAD(int mode); void ChangeClearRCnt(int t, int m); -// Executable functions +uint32_t OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp); +int CloseTh(uint32_t thread); +int ChangeTh(uint32_t thread); + int Exec(struct EXEC *exec, int argc, char **argv); void FlushCache(void); -// BIOS setjmp functions void b_setjmp(struct JMP_BUF *buf); -void b_longjmp(struct JMP_BUF *buf, int param); +void b_longjmp(const struct JMP_BUF *buf, int param); void SetDefaultExitFromException(void); -void SetCustomExitFromException(struct JMP_BUF *buf); +void SetCustomExitFromException(const struct JMP_BUF *buf); +void ReturnFromException(void); -// Misc functions int GetSystemInfo(int index); void *GetB0Table(void); void *GetC0Table(void); diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h index 3336963..03ee792 100644 --- a/libpsn00b/include/psxcd.h +++ b/libpsn00b/include/psxcd.h @@ -1,7 +1,12 @@ -#ifndef _LIBPSXCD_H -#define _LIBPSXCD_H +/* + * PSn00bSDK CD-ROM drive library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ -#include <sys/types.h> +#ifndef __PSXCD_H +#define __PSXCD_H + +#include <stdint.h> /* * CD-ROM control commands @@ -74,18 +79,18 @@ #define CdlIsoInvalidFs 0x03 #define CdlIsoLidOpen 0x04 -#define btoi(b) ((b)/16*10+(b)%16) /* Convert BCD value to integer */ -#define itob(i) ((i)/10*16+(i)%10) /* Convert integer to BCD value */ +#define btoi(b) ((b)/16*10+(b)%16) /* Convert BCD value to integer */ +#define itob(i) ((i)/10*16+(i)%10) /* Convert integer to BCD value */ /* * CD-ROM disc location struct */ typedef struct _CdlLOC { - u_char minute; - u_char second; - u_char sector; - u_char track; + uint8_t minute; + uint8_t second; + uint8_t sector; + uint8_t track; } CdlLOC; /* @@ -93,10 +98,10 @@ typedef struct _CdlLOC */ typedef struct _CdlATV { - u_char val0; /* L -> SPU L */ - u_char val1; /* L -> SPU R */ - u_char val2; /* R -> SPU R */ - u_char val3; /* R -> SPU L */ + uint8_t val0; /* L -> SPU L */ + uint8_t val1; /* L -> SPU R */ + uint8_t val2; /* R -> SPU R */ + uint8_t val3; /* R -> SPU L */ } CdlATV; /* @@ -104,68 +109,68 @@ typedef struct _CdlATV */ typedef struct _CdlFILE { - CdlLOC pos; - u_long size; - char name[16]; + CdlLOC pos; + uint32_t size; + char name[16]; } CdlFILE; typedef struct _CdlFILTER { - u_char file; - u_char chan; - u_short pad; + uint8_t file; + uint8_t chan; + uint16_t pad; } CdlFILTER; /* Directory query context */ typedef void* CdlDIR; /* Data callback */ -typedef void (*CdlCB)(int, u_char *); +typedef void (*CdlCB)(int, uint8_t *); #ifdef __cplusplus extern "C" { #endif -int CdInit(void); +int CdInit(void); -CdlLOC* CdIntToPos(int i, CdlLOC *p); -int CdPosToInt(CdlLOC *p); -int CdGetToc(CdlLOC *toc); +CdlLOC* CdIntToPos(int i, CdlLOC *p); +int CdPosToInt(CdlLOC *p); +int CdGetToc(CdlLOC *toc); -int CdControl(u_char com, const void *param, u_char *result); -int CdControlB(u_char com, const void *param, u_char *result); -int CdControlF(u_char com, const void *param); -int CdSync(int mode, u_char *result); -u_long CdSyncCallback(CdlCB func); +int CdControl(uint8_t com, const void *param, uint8_t *result); +int CdControlB(uint8_t com, const void *param, uint8_t *result); +int CdControlF(uint8_t com, const void *param); +int CdSync(int mode, uint8_t *result); +uint32_t CdSyncCallback(CdlCB func); -long CdReadyCallback(CdlCB func); -int CdGetSector(void *madr, int size); +long CdReadyCallback(CdlCB func); +int CdGetSector(void *madr, int size); CdlFILE* CdSearchFile(CdlFILE *loc, const char *filename); -int CdRead(int sectors, u_long *buf, int mode); -int CdReadSync(int mode, u_char *result); -u_long CdReadCallback(CdlCB func); +int CdRead(int sectors, uint32_t *buf, int mode); +int CdReadSync(int mode, uint8_t *result); +uint32_t CdReadCallback(CdlCB func); -int CdStatus(void); -int CdMode(void); +int CdStatus(void); +int CdMode(void); -int CdMix(CdlATV *vol); +int CdMix(CdlATV *vol); /* ORIGINAL CODE */ -CdlDIR* CdOpenDir(const char* path); -int CdReadDir(CdlDIR* dir, CdlFILE* file); -void CdCloseDir(CdlDIR* dir); +CdlDIR* CdOpenDir(const char* path); +int CdReadDir(CdlDIR* dir, CdlFILE* file); +void CdCloseDir(CdlDIR* dir); -int CdGetVolumeLabel(char* label); +int CdGetVolumeLabel(char* label); -long* CdAutoPauseCallback(void(*func)()); -int CdIsoError(); +long* CdAutoPauseCallback(void(*func)()); +int CdIsoError(); -int CdLoadSession(int session); +int CdLoadSession(int session); #ifdef __cplusplus } #endif -#endif /* _LIBPSXCD_H */ +#endif diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h index a55e593..24485d9 100644 --- a/libpsn00b/include/psxetc.h +++ b/libpsn00b/include/psxetc.h @@ -1,15 +1,25 @@ -#ifndef _PSXETC_H -#define _PSXETC_H +/* + * PSn00bSDK interrupt management library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ + +#ifndef __PSXETC_H +#define __PSXETC_H + +/* Public API */ #ifdef __cplusplus extern "C" { #endif -// Interrupt callback functions -void *DMACallback(int dma, void (*func)(void)); void *InterruptCallback(int irq, void (*func)(void)); -void *GetInterruptCallback(int irq); // Original -void RestartCallback(); +void *GetInterruptCallback(int irq); +void *DMACallback(int dma, void (*func)(void)); +void *GetDMACallback(int dma); + +int ResetCallback(void); +void RestartCallback(void); +void StopCallback(void); #ifdef __cplusplus } diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index f061219..0e7ec00 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -1,623 +1,519 @@ +/* + * PSn00bSDK GPU library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ + #ifndef __PSXGPU_H #define __PSXGPU_H -#include <sys/types.h> - -// Low-level display parameters for DISPENV_RAW. A leftover from prototyping -#define DISP_WIDTH_256 0 -#define DISP_WIDTH_320 1 -#define DISP_WIDTH_384 64 -#define DISP_WIDTH_512 2 -#define DISP_WIDTH_640 3 - -#define DISP_HEIGHT_LOW 0 // Could be 240 for NTSC, 256 for PAL -#define DISP_HEIGHT_HIGH 4 // Could be 480 for NTSC, 512 for PAL -#define DISP_INTERLACE 32 -#define DISP_24BIT_COLOR 16 -#define DISP_MODE_NTSC 0 -#define DISP_MODE_PAL 8 +#include <stdint.h> +#include <stddef.h> +/* Definitions */ -#define MODE_NTSC 0 -#define MODE_PAL 1 +typedef enum _GPU_DispFlags { + DISP_WIDTH_256 = 0, + DISP_WIDTH_320 = 1, + DISP_WIDTH_512 = 2, + DISP_WIDTH_640 = 3, + DISP_HEIGHT_HIGH = 1 << 2, + DISP_MODE_PAL = 1 << 3, + DISP_24BIT_COLOR = 1 << 4, + DISP_INTERLACE = 1 << 5, + DISP_WIDTH_384 = 1 << 6 +} GPU_DispFlags; +typedef enum _GPU_VideoMode { + MODE_NTSC = 0, + MODE_PAL = 1 +} GPU_VideoMode; -// Vector macros +/* Structure macros */ -#define setVector( v, _x, _y, _z ) \ +#define setVector(v, _x, _y, _z) \ (v)->vx = _x, (v)->vy = _y, (v)->vz = _z -#define setRECT( v, _x, _y, _w, _h ) \ +#define setRECT(v, _x, _y, _w, _h) \ (v)->x = _x, (v)->y = _y, (v)->w = _w, (v)->h = _h +#define setTPage(p, tp, abr, x, y) ((p)->tpage = getTPage(tp, abr, x, y)) +#define setClut(p, x, y) ((p)->clut = getClut(x, y)) -// Primitive macros - -#define setDrawTPage( p, dfe, dtd, tpage ) \ - ( (p)->code[0] = tpage|(dfe<<10)|(dtd<<9), \ - setlen( p, 1 ), setcode( p, 0xe1 ) ) - -/*#define setVram2Vram( p ) ( setlen( p, 8 ), setcode( p, 0x80 ), \ - (p)->nop[0] = 0, (p)->nop[1] = 0, (p)->nop[2] = 0, (p)->nop[3] = 0 )*/ - -/* - -#define setTPagePri2( p, dth, tp, abr, x, y ) \ - ( (p)->code[0] = getTPage( tp, abr, x, y )|(dth<<9), \ - setlen( p, 1 ), setcode( p, 0xe1 ) )*/ - -/* - * Set primitive attributes - */ -#define setTPage( p, tp, abr, x, y ) \ - ( (p)->tpage = getTPage( tp, abr, x, y ) ) - -#define setClut( p, x, y ) \ - ( (p)->clut = getClut(x, y) ) - - -/* - * Set primitive colors - */ -#define setRGB0( p, r, g, b ) ( (p)->r0 = r, (p)->g0 = g, (p)->b0 = b ) -#define setRGB1( p, r, g, b ) ( (p)->r1 = r, (p)->g1 = g, (p)->b1 = b ) -#define setRGB2( p, r, g, b ) ( (p)->r2 = r, (p)->g2 = g, (p)->b2 = b ) -#define setRGB3( p, r, g, b ) ( (p)->r3 = r, (p)->g3 = g, (p)->b3 = b ) - +#define setRGB0(p, r, g, b) ((p)->r0 = r, (p)->g0 = g, (p)->b0 = b) +#define setRGB1(p, r, g, b) ((p)->r1 = r, (p)->g1 = g, (p)->b1 = b) +#define setRGB2(p, r, g, b) ((p)->r2 = r, (p)->g2 = g, (p)->b2 = b) +#define setRGB3(p, r, g, b) ((p)->r3 = r, (p)->g3 = g, (p)->b3 = b) -/* - * Set primitive screen coordinates - */ -#define setXY0( p, _x0, _y0 ) \ +#define setXY0(p, _x0, _y0) \ (p)->x0 = _x0, (p)->y0 = _y0 -#define setXY2( p, _x0, _y0, _x1, _y1 ) \ +#define setXY2(p, _x0, _y0, _x1, _y1) \ (p)->x0 = _x0, (p)->y0 = _y0, \ (p)->x1 = _x1, (p)->y1 = _y1 - -#define setXY3( p, _x0, _y0, _x1, _y1, _x2, _y2 ) \ + +#define setXY3(p, _x0, _y0, _x1, _y1, _x2, _y2) \ (p)->x0 = _x0, (p)->y0 = _y0, \ (p)->x1 = _x1, (p)->y1 = _y1, \ (p)->x2 = _x2, (p)->y2 = _y2 -#define setXY4( p, _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3 ) \ +#define setXY4(p, _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3) \ (p)->x0 = _x0, (p)->y0 = _y0, \ (p)->x1 = _x1, (p)->y1 = _y1, \ (p)->x2 = _x2, (p)->y2 = _y2, \ (p)->x3 = _x3, (p)->y3 = _y3 -#define setWH( p, _w, _h ) \ +#define setWH(p, _w, _h) \ (p)->w = _w, (p)->h = _h -/* - * Set texture coordinates - */ -#define setUV0( p, _u0, _v0 ) \ +#define setXYWH(p, _x0, _y0, _w, _h) \ + (p)->x0 = _x0, (p)->y0 = _y0, \ + (p)->x1 = (_x0 + (_w)), (p)->y1 = _y0, \ + (p)->x2 = _x0, (p)->y2 = (_y0 + (_h)), \ + (p)->x3 = (_x0 + (_w)), (p)->y3 = (_y0 + (_h)) + +#define setUV0(p, _u0, _v0) \ (p)->u0 = _u0, (p)->v0 = _v0 -#define setUV3( p, _u0, _v0, _u1, _v1, _u2, _v2 ) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = _u1, (p)->v1 = _v1, \ +#define setUV3(p, _u0, _v0, _u1, _v1, _u2, _v2) \ + (p)->u0 = _u0, (p)->v0 = _v0, \ + (p)->u1 = _u1, (p)->v1 = _v1, \ (p)->u2 = _u2, (p)->v2 = _v2 -#define setUV4( p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3 ) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = _u1, (p)->v1 = _v1, \ - (p)->u2 = _u2, (p)->v2 = _v2, \ +#define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \ + (p)->u0 = _u0, (p)->v0 = _v0, \ + (p)->u1 = _u1, (p)->v1 = _v1, \ + (p)->u2 = _u2, (p)->v2 = _v2, \ (p)->u3 = _u3, (p)->v3 = _v3 -#define setUVWH( p, _u0, _v0, _w, _h ) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = _u0+(_w), (p)->v1 = _v0, \ - (p)->u2 = _u0, (p)->v2 = _v0+(_h), \ - (p)->u3 = _u0+(_w), (p)->v3 = _v0+(_h) - - -/* - * Primitive handling macros - */ -#define setlen( p, _len ) ( ((P_TAG*)(p))->len = (unsigned char)(_len) ) -#define setaddr( p, _addr ) ( ((P_TAG*)(p))->addr = (unsigned int)(_addr) ) -#define setcode( p, _code ) ( ((P_TAG*)(p))->code = (unsigned char)(_code) ) - -#define getlen( p ) ( ((P_TAG*)(p))->len ) -#define getaddr( p ) ( ((P_TAG*)(p))->addr ) -#define getcode( p ) ( ((P_TAG*)(p))->code ) - -#define nextPrim( p ) (void*)((((P_TAG*)(p))->addr)|0x80000000) -#define isendprim( p ) ((((P_TAG*)(p))->addr)==0xffffff) - -#define addPrim( ot, p ) setaddr( p, getaddr( ot ) ), setaddr( ot, p ) -#define addPrims( ot, p0, p1 ) setaddr( p1, getaddr( ot ) ), setaddr( ot, p0 ) - -#define catPrim( p0, p1 ) setaddr( p0, p1 ) -#define termPrim( p ) setaddr( p, 0xffffff ) - -#define setSemiTrans( p, abe ) \ - ( (abe)?setcode( p, getcode( p )|0x2 ):setcode( p, getcode( p )&~0x2 ) ) - -#define setShadeTex( p, tge ) \ - ( (tge)?setcode( p, getcode( p )|0x1 ):setcode( p, getcode( p )&~0x1 ) ) - - -/* ORIGINAl CODE */ -#define setDrawMask( p, sb, mt ) \ - setlen( p, 1 ), (p)->code[0] = sb|(mt<<1), \ - setcode( p, 0xe6 ) - - -#define getTPage( tp, abr, x, y ) \ - ( (((x)&0x3ff)>>6) | (((y)>>8)<<4) | (((abr)&0x3)<<5) | (((tp)&0x3)<<7) ) - -#define getClut( x, y ) \ - ( ((y)<<6)|(((x)>>4)&0x3f) ) - - -/* - * Primitive initializers - */ -#define setPolyF3( p ) setlen( p, 4 ), setcode( p, 0x20 ) -#define setPolyFT3( p ) setlen( p, 7 ), setcode( p, 0x24 ) -#define setPolyG3( p ) setlen( p, 6 ), setcode( p, 0x30 ) -#define setPolyGT3( p ) setlen( p, 9 ), setcode( p, 0x34 ) - -#define setPolyF4( p ) setlen( p, 5 ), setcode( p, 0x28 ) -#define setPolyFT4( p ) setlen( p, 9 ), setcode( p, 0x2c ) -#define setPolyG4( p ) setlen( p, 8 ), setcode( p, 0x38 ) -#define setPolyGT4( p ) setlen( p, 12 ), setcode( p, 0x3c ) - -#define setSprt8( p ) setlen( p, 3 ), setcode( p, 0x74 ) -#define setSprt16( p ) setlen( p, 3 ), setcode( p, 0x7c ) -#define setSprt( p ) setlen( p, 4 ), setcode( p, 0x64 ) - -#define setTile1( p ) setlen( p, 2 ), setcode( p, 0x68 ) -#define setTile8( p ) setlen( p, 2 ), setcode( p, 0x70 ) -#define setTile16( p ) setlen( p, 2 ), setcode( p, 0x78 ) -#define setTile( p ) setlen( p, 3 ), setcode( p, 0x60 ) - -#define setLineF2( p ) setlen( p, 3 ), setcode( p, 0x40 ) -#define setLineG2( p ) setlen( p, 4 ), setcode( p, 0x50 ) - -#define setLineF3( p ) setlen( p, 5 ), setcode( p, 0x48 ), (p)->pad = 0x55555555 -#define setLineG3( p ) setlen( p, 7 ), setcode( p, 0x58 ), (p)->pad = 0x55555555, \ - (p)->p1 = 0, (p)->p2 = 0 - -#define setLineF4( p ) setlen( p, 6 ), setcode( p, 0x4c ), (p)->pad = 0x55555555 -#define setLineG4( p ) setlen( p, 9 ), setcode( p, 0x5c ), (p)->pad = 0x55555555, \ - (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 - -#define setFill( p ) setlen( p, 3 ), setcode( p, 0x02 ) - -#define setDrawOffset( p, _x, _y ) \ - setlen( p, 1 ), \ - (p)->code[0] = (_x&0x3FF)|((_y&0x3FF)<<11), \ - ((char*)(p)->code)[3] = 0xE5 - -#define setDrawArea( p, r ) \ - setlen( p, 2 ), \ - (p)->code[0] = ((r)->x&0x3FF)|(((r)->y&0x1FF)<<10), \ - (p)->code[1] = (((r)->x+(r)->w-1)&0x3FF)|((((r)->y+(r)->h-1)&0x1FF)<<10), \ - ((char*)&(p)->code[0])[3] = 0xE3, \ - ((char*)&(p)->code[1])[3] = 0xE4 - -#define setTexWindow( p, r ) \ - setlen( p, 1 ), \ - (p)->code[0] = ((r)->w&0x1F)|(((r)->h&0x1F)<<5)|(((r)->x&0x1F)<<10)|(((r)->y&0x1F)<<15), \ - ((char*)&(p)->code[0])[3] = 0xE2 - -/* - * Primitive definitions - */ -typedef struct _P_TAG -{ - unsigned addr:24; - unsigned len:8; - u_char r,g,b,code; +#define setUVWH(p, _u0, _v0, _w, _h) \ + (p)->u0 = _u0, (p)->v0 = _v0, \ + (p)->u1 = (_u0 + (_w)), (p)->v1 = _v0, \ + (p)->u2 = _u0, (p)->v2 = (_v0 + (_h)), \ + (p)->u3 = (_u0 + (_w)), (p)->v3 = (_v0 + (_h)) + +/* Primitive handling macros */ + +#define setlen(p, _len) (((P_TAG *) (p))->len = (uint8_t) (_len)) +#define setaddr(p, _addr) (((P_TAG *) (p))->addr = (uint32_t) (_addr)) +#define setcode(p, _code) (((P_TAG *) (p))->code = (uint8_t) (_code)) +#define getlen(p) (((P_TAG *) (p))->len) +#define getaddr(p) (((P_TAG *) (p))->addr) +#define getcode(p) (((P_TAG *) (p))->code) + +#define nextPrim(p) (void *) (0x80000000 | (((P_TAG *) (p))->addr)) +#define isendprim(p) ((((P_TAG *) (p))->addr) == 0xffffff) +#define addPrim(ot, p) setaddr(p, getaddr(ot)), setaddr(ot, p) +#define addPrims(ot, a, b) setaddr(b, getaddr(ot)), setaddr(ot, a) +#define catPrim(a, b) setaddr(a, b) +#define termPrim(p) setaddr(p, 0xffffff) + +#define setSemiTrans(p, abe) \ + ((abe) ? (getcode(p) |= 2) : (getcode(p) &= ~2)) + +#define setShadeTex(p, tge) \ + ((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1)) + +#define getTPage(tp, abr, x, y) \ + ((((x) & 0x3ff) >> 6) | (((y) >> 8) << 4) | (((abr) & 3) << 5) | (((tp) & 3) << 7)) + +#define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f)) + +/* Primitive initializer macros */ + +#define setPolyF3(p) setlen(p, 4), setcode(p, 0x20) +#define setPolyFT3(p) setlen(p, 7), setcode(p, 0x24) +#define setPolyG3(p) setlen(p, 6), setcode(p, 0x30) +#define setPolyGT3(p) setlen(p, 9), setcode(p, 0x34) +#define setPolyF4(p) setlen(p, 5), setcode(p, 0x28) +#define setPolyFT4(p) setlen(p, 9), setcode(p, 0x2c) +#define setPolyG4(p) setlen(p, 8), setcode(p, 0x38) +#define setPolyGT4(p) setlen(p, 12), setcode(p, 0x3c) +#define setSprt8(p) setlen(p, 3), setcode(p, 0x74) +#define setSprt16(p) setlen(p, 3), setcode(p, 0x7c) +#define setSprt(p) setlen(p, 4), setcode(p, 0x64) +#define setTile1(p) setlen(p, 2), setcode(p, 0x68) +#define setTile8(p) setlen(p, 2), setcode(p, 0x70) +#define setTile16(p) setlen(p, 2), setcode(p, 0x78) +#define setTile(p) setlen(p, 3), setcode(p, 0x60) +#define setLineF2(p) setlen(p, 3), setcode(p, 0x40) +#define setLineG2(p) setlen(p, 4), setcode(p, 0x50) +#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), \ + (p)->pad = 0x55555555 +#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), \ + (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0 +#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), \ + (p)->pad = 0x55555555 +#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), \ + (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 +#define setFill(p) setlen(p, 3), setcode(p, 0x02) +#define setVram2Vram(p) setlen(p, 8), setcode(p, 0x80), \ + (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0 + +#define setDrawTPage(p, dfe, dtd, tpage) \ + setlen(p, 1), \ + (p)->code[0] = 0xe1000000 | tpage | (dfe << 10) | (dtd << 9) + +#define setDrawOffset(p, _x, _y) \ + setlen(p, 1), \ + (p)->code[0] = 0xe5000000 | (_x & 0x3ff) | ((_y & 0x3ff) << 11) + +#define setDrawMask(p, sb, mt) \ + setlen(p, 1), \ + (p)->code[0] = 0xe6000000 | sb | (mt << 1) + +#define setDrawArea(p, r) \ + setlen(p, 2), \ + (p)->code[0] = 0xe3000000 | ((r)->x & 0x3ff) | (((r)->y & 0x1ff) << 10), \ + (p)->code[1] = 0xe4000000 | (((r)->x + (r)->w - 1) & 0x3ff) | ((((r)->y + (r)->h - 1) & 0x1ff) << 10) + +#define setTexWindow(p, r) \ + setlen(p, 1), \ + (p)->code[0] = 0xe2000000 | ((r)->w & 0x1f) | (((r)->h & 0x1f) << 5) | (((r)->x & 0x1f) << 10) | (((r)->y & 0x1f) << 15) + +/* Primitive structure definitions */ + +typedef struct _P_TAG { + uint32_t addr:24; + uint32_t len:8; + uint8_t r, g, b, code; } P_TAG; -/* - * Polygon primitive definitions - */ -typedef struct _POLY_F3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short x1,y1; - short x2,y2; +typedef struct _POLY_F3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t x1, y1; + int16_t x2, y2; } POLY_F3; -typedef struct _POLY_F4 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short x1,y1; - short x2,y2; - short x3,y3; +typedef struct _POLY_F4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t x1, y1; + int16_t x2, y2; + int16_t x3, y3; } POLY_F4; -typedef struct _POLY_FT3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; - short x1,y1; - u_char u1,v1; - u_short tpage; - short x2,y2; - u_char u2,v2; - u_short pad; +typedef struct _POLY_FT3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; + int16_t x1, y1; + uint8_t u1, v1; + uint16_t tpage; + int16_t x2, y2; + uint8_t u2, v2; + uint16_t pad; } POLY_FT3; -typedef struct _POLY_FT4 -{ - u_long tag; - u_char r0,g0,b0,code; - u_short x0,y0; - u_char u0,v0; - u_short clut; - short x1,y1; - u_char u1,v1; - u_short tpage; - short x2,y2; - u_char u2,v2; - u_short pad0; - short x3,y3; - u_char u3,v3; - u_short pad1; +typedef struct _POLY_FT4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + uint16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; + int16_t x1, y1; + uint8_t u1, v1; + uint16_t tpage; + int16_t x2, y2; + uint8_t u2, v2; + uint16_t pad0; + int16_t x3, y3; + uint8_t u3, v3; + uint16_t pad1; } POLY_FT4; -typedef struct _POLY_G3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char r1,g1,b1,pad0; - short x1,y1; - u_char r2,g2,b2,pad1; - short x2,y2; +typedef struct _POLY_G3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t r1, g1, b1, pad0; + int16_t x1, y1; + uint8_t r2, g2, b2, pad1; + int16_t x2, y2; } POLY_G3; -typedef struct _POLY_G4 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char r1,g1,b1,pad0; - short x1,y1; - u_char r2,g2,b2,pad1; - short x2,y2; - u_char r3,g3,b3,pad2; - short x3,y3; +typedef struct _POLY_G4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t r1, g1, b1, pad0; + int16_t x1, y1; + uint8_t r2, g2, b2, pad1; + int16_t x2, y2; + uint8_t r3, g3, b3, pad2; + int16_t x3, y3; } POLY_G4; -typedef struct _POLY_GT3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; - u_char r1,g1,b1,pad0; - short x1,y1; - u_char u1,v1; - u_short tpage; - u_char r2,g2,b2,pad1; - short x2,y2; - u_char u2,v2; - u_short pad2; +typedef struct _POLY_GT3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; + uint8_t r1, g1, b1, pad0; + int16_t x1, y1; + uint8_t u1, v1; + uint16_t tpage; + uint8_t r2, g2, b2, pad1; + int16_t x2, y2; + uint8_t u2, v2; + uint16_t pad2; } POLY_GT3; -typedef struct _POLY_GT4 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; - u_char r1,g1,b1,pad0; - short x1,y1; - u_char u1,v1; - u_short tpage; - u_char r2,g2,b2,pad1; - short x2,y2; - u_char u2,v2; - u_short pad2; - u_char r3,g3,b3,pad3; - short x3,y3; - u_char u3,v3; - u_short pad4; +typedef struct _POLY_GT4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; + uint8_t r1, g1, b1, pad0; + int16_t x1, y1; + uint8_t u1, v1; + uint16_t tpage; + uint8_t r2, g2, b2, pad1; + int16_t x2, y2; + uint8_t u2, v2; + uint16_t pad2; + uint8_t r3, g3, b3, pad3; + int16_t x3, y3; + uint8_t u3, v3; + uint16_t pad4; } POLY_GT4; -/* - * Line primitive definitions - */ -typedef struct _LINE_F2 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short x1,y1; +typedef struct _LINE_F2 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t x1, y1; } LINE_F2; -typedef struct _LINE_G2 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char r1,g1,b1,p1; - short x1,y1; +typedef struct _LINE_G2 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t r1, g1, b1, p1; + int16_t x1, y1; } LINE_G2; -typedef struct _LINE_F3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short x1,y1; - short x2,y2; - u_long pad; /* actually a terminator for line loops */ +typedef struct _LINE_F3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t x1, y1; + int16_t x2, y2; + uint32_t pad; } LINE_F3; -typedef struct _LINE_G3 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char r1,g1,b1,p1; - short x1,y1; - u_char r2,g2,b2,p2; - short x2,y2; - u_long pad; /* actually a terminator for line loops */ +typedef struct _LINE_G3 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t r1, g1, b1, p1; + int16_t x1, y1; + uint8_t r2, g2, b2, p2; + int16_t x2, y2; + uint32_t pad; } LINE_G3; -typedef struct _LINE_F4 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short x1,y1; - short x2,y2; - short x3,y3; - u_long pad; +typedef struct _LINE_F4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t x1, y1; + int16_t x2, y2; + int16_t x3, y3; + uint32_t pad; } LINE_F4; -typedef struct _LINE_G4 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char r1,g1,b1,p1; - short x1,y1; - u_char r2,g2,b2,p2; - short x2,y2; - u_char r3,g3,b3,p3; - short x3,y3; - u_long pad; +typedef struct _LINE_G4 { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t r1, g1, b1, p1; + int16_t x1, y1; + uint8_t r2, g2, b2, p2; + int16_t x2, y2; + uint8_t r3, g3, b3, p3; + int16_t x3, y3; + uint32_t pad; } LINE_G4; -/* - * Tile and sprite primitive definitions - */ -typedef struct _TILE -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - short w,h; +typedef struct _TILE { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + int16_t w, h; } TILE; -typedef struct _TILE_1 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; -} TILE_1; - -typedef struct _TILE_8 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; -} TILE_8; - -typedef struct _TILE_16 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; -} TILE_16; - -/* - * Sprite primitive definitions - */ -typedef struct _SPRT -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; - u_short w,h; +struct _TILE_FIXED { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; +}; +typedef struct _TILE_FIXED TILE_1; +typedef struct _TILE_FIXED TILE_8; +typedef struct _TILE_FIXED TILE_16; + +typedef struct _SPRT { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; + uint16_t w, h; } SPRT; -typedef struct _SPRT_8 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; -} SPRT_8; - -typedef struct _SPRT_16 -{ - u_long tag; - u_char r0,g0,b0,code; - short x0,y0; - u_char u0,v0; - u_short clut; -} SPRT_16; - -/* - * VRAM fill and transfer primitive definitions - */ - -typedef struct _DR_ENV -{ - u_long tag; - u_long code[15]; +struct _SPRT_FIXED { + uint32_t tag; + uint8_t r0, g0, b0, code; + int16_t x0, y0; + uint8_t u0, v0; + uint16_t clut; +}; +typedef struct _SPRT_FIXED SPRT_8; +typedef struct _SPRT_FIXED SPRT_16; + +typedef struct _DR_ENV { + uint32_t tag; + uint32_t code[15]; } DR_ENV; -typedef struct _DR_AREA -{ - u_long tag; - u_long code[2]; +typedef struct _DR_AREA { + uint32_t tag; + uint32_t code[2]; } DR_AREA; -typedef struct _DR_OFFSET -{ - u_long tag; - u_long code[1]; +typedef struct _DR_OFFSET { + uint32_t tag; + uint32_t code[1]; } DR_OFFSET; -typedef struct _DR_TWIN -{ - u_long tag; - u_long code[2]; +typedef struct _DR_TWIN { + uint32_t tag; + uint32_t code[2]; } DR_TWIN; -typedef struct _DR_TPAGE -{ - u_long tag; - u_long code[1]; +typedef struct _DR_TPAGE { + uint32_t tag; + uint32_t code[1]; } DR_TPAGE; -typedef struct _DR_MASK /* ORIGINAL */ -{ - u_long tag; - u_long code[1]; +typedef struct _DR_MASK { + uint32_t tag; + uint32_t code[1]; } DR_MASK; -typedef struct _FILL /* ORIGINAL */ -{ - u_long tag; - u_char r0,g0,b0,code; - u_short x0,y0; // Note: coordinates must be in 16 pixel steps - u_short w,h; +typedef struct _FILL { + uint32_t tag; + uint8_t r0, g0, b0, code; + uint16_t x0, y0; // Note: coordinates must be in 16 pixel steps + uint16_t w, h; } FILL; -typedef struct _VRAM2VRAM /* ORIGINAL */ -{ - u_long tag; - u_char p0,p1,p2,code; - u_short x0,y0; - u_short x1,y1; - u_short w,h; - u_long nop[4]; +typedef struct _VRAM2VRAM { + uint32_t tag; + uint8_t p0, p1, p2, code; + uint16_t x0, y0; + uint16_t x1, y1; + uint16_t w, h; + uint32_t pad[4]; } VRAM2VRAM; -/* - * General structs - */ +/* Structure definitions */ -typedef struct _RECT -{ - short x,y; - short w,h; +typedef struct _RECT { + int16_t x, y, w, h; } RECT; -typedef struct _DISPENV_RAW /* obsolete */ -{ - unsigned int vid_mode; // Video mode - short vid_xpos,vid_ypos; // Video position (not framebuffer) - short fb_x,fb_y; // Framebuffer display position +typedef struct _DISPENV_RAW { + uint32_t vid_mode; + int16_t vid_xpos, vid_ypos; + int16_t fb_x, fb_y; } DISPENV_RAW; -typedef struct _DISPENV -{ - RECT disp; - RECT screen; - char isinter; - char isrgb24; - char reverse; - char pad; +typedef struct _DISPENV { + RECT disp, screen; + uint8_t isinter, isrgb24, reverse; + uint8_t _reserved; } DISPENV; -typedef struct _DRAWENV -{ - RECT clip; // Drawing area - short ofs[2]; // GPU draw offset (relative to draw area) - RECT tw; // Texture window (doesn't do anything atm) - u_short tpage; // Initial tpage value - u_char dtd; // Dither processing flag (simply OR'ed to tpage) - u_char dfe; // Drawing to display area blocked/allowed (simply OR'ed to tpage) - u_char isbg; // Clear draw area if non-zero - u_char r0,g0,b0; // Draw area clear color (if isbg iz nonzero) - DR_ENV dr_env; // Draw mode packet area (used by PutDrawEnv) +typedef struct _DRAWENV { + RECT clip; // Drawing area + int16_t ofs[2]; // GPU draw offset (relative to draw area) + RECT tw; // Texture window (doesn't do anything atm) + uint16_t tpage; // Initial tpage value + uint8_t dtd; // Dither processing flag (simply OR'ed to tpage) + uint8_t dfe; // Drawing to display area blocked/allowed (simply OR'ed to tpage) + uint8_t isbg; // Clear draw area if non-zero + uint8_t r0, g0, b0; // Draw area clear color (if isbg iz nonzero) + DR_ENV dr_env; // Draw mode packet area (used by PutDrawEnv) } DRAWENV; -typedef struct _TIM_IMAGE -{ - u_long mode; - RECT *crect; - u_long *caddr; - RECT *prect; - u_long *paddr; +typedef struct _TIM_IMAGE { + uint32_t mode; + RECT *crect; + uint32_t *caddr; + RECT *prect; + uint32_t *paddr; } TIM_IMAGE; +typedef struct _GsIMAGE { + uint32_t pmode; + int16_t px, py, pw, ph; + uint32_t *pixel; + int16_t cx, cy, cw, ch; + uint32_t *clut; +} GsIMAGE; + +/* Public API */ #ifdef __cplusplus extern "C" { #endif -// Function definitions (asm) - void ResetGraph(int mode); -int GetVideoMode(void); -void SetVideoMode(int mode); - -int GetODE(void); - -void PutDispEnvRaw(DISPENV_RAW *disp); /* obsolete */ -void PutDispEnv(DISPENV *disp); -void PutDrawEnv(DRAWENV *draw); - +GPU_VideoMode GetVideoMode(void); +void SetVideoMode(GPU_VideoMode mode); void SetDispMask(int mask); -int VSync(int m); -int DrawSync(int m); -void WaitGPUcmd(void); -void WaitGPUdma(void); +//void PutDispEnvRaw(const DISPENV_RAW *env); +void PutDispEnv(const DISPENV *env); +void PutDrawEnv(DRAWENV *env); +void PutDrawEnvFast(DRAWENV *env); -// Callback hook functions +int GetODE(void); +int VSync(int mode); +int DrawSync(int mode); + +void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); void *DrawSyncCallback(void (*func)(void)); -void LoadImage(RECT *rect, u_long *data); -void StoreImage(RECT *rect, u_long *data); - -void ClearOTagR(u_long* ot, int n); -void DrawOTag(u_long* ot); -void DrawPrim(void *pri); - -void AddPrim(u_long* ot, void* pri); +void LoadImage(const RECT *rect, const uint32_t *data); +void StoreImage(const RECT *rect, uint32_t *data); -// Function definitions (C) +void ClearOTagR(uint32_t *ot, size_t length); +void ClearOTag(uint32_t *ot, size_t length); +void DrawOTag(const uint32_t *ot); +void DrawOTag2(const uint32_t *ot); +void DrawOTagEnv(const uint32_t *ot, DRAWENV *env); +void DrawPrim(const uint32_t *pri); -int GetTimInfo(const u_long *tim, TIM_IMAGE *timimg); /* ORIGINAL */ +void AddPrim(uint32_t *ot, const void *pri); -DISPENV *SetDefDispEnv(DISPENV *disp, int x, int y, int w, int h); -DRAWENV *SetDefDrawEnv(DRAWENV *draw, int x, int y, int w, int h); +int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info); +int GetTimInfo(const uint32_t *tim, TIM_IMAGE *info); -// Debug font functions +DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h); +DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h); void FntLoad(int x, int y); -char *FntSort(u_long *ot, char *pri, int x, int y, const char *text); +char *FntSort(uint32_t *ot, char *pri, int x, int y, const char *text); int FntOpen(int x, int y, int w, int h, int isbg, int n); int FntPrint(int id, const char *fmt, ...); char *FntFlush(int id); diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h index 7c10533..ddc988d 100644 --- a/libpsn00b/include/psxgte.h +++ b/libpsn00b/include/psxgte.h @@ -1,44 +1,45 @@ -#ifndef _PSXGTE_H -#define _PSXGTE_H +/* + * PSn00bSDK GTE library + * (C) 2019-2022 Lameguy64 - MPL licensed + */ +#ifndef __PSXGTE_H +#define __PSXGTE_H -#define ONE 4096 +#include <stdint.h> +#define ONE (1 << 12) -// For compatibility with official library syntax -#define csin(a) isin(a) -#define ccos(a) icos(a) -#define rsin(a) isin(a) -#define rcos(a) icos(a) - +/* Structure definitions */ -typedef struct MATRIX { - short m[3][3]; - int t[3]; +typedef struct _MATRIX { + int16_t m[3][3]; + int32_t t[3]; } MATRIX; -typedef struct VECTOR { - int vx, vy, vz; +typedef struct _VECTOR { + int32_t vx, vy, vz; } VECTOR; -typedef struct SVECTOR { - short vx, vy, vz, pad; +typedef struct _SVECTOR { + int16_t vx, vy, vz, pad; } SVECTOR; -typedef struct CVECTOR { - unsigned char r, g, b, cd; +typedef struct _CVECTOR { + uint8_t r, g, b, cd; } CVECTOR; -typedef struct DVECTOR { - short vx, vy; +typedef struct _DVECTOR { + int16_t vx, vy; } DVECTOR; +/* Public API */ #ifdef __cplusplus extern "C" { #endif -void InitGeom(); +void InitGeom(void); // Integer SIN/COS functions (4096 = 360 degrees) // Does not use tables! @@ -70,11 +71,16 @@ void VectorNormalS(VECTOR *v0, SVECTOR *v1); void Square0(VECTOR *v0, VECTOR *v1); -int SquareRoot12( int v ); -int SquareRoot0( int v ); +int SquareRoot12(int v); +int SquareRoot0(int v); + +#define csin(a) isin(a) +#define ccos(a) icos(a) +#define rsin(a) isin(a) +#define rcos(a) icos(a) #ifdef __cplusplus } #endif -#endif // _PSXGTE_H +#endif diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index b060170..2106a53 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -17,6 +17,28 @@ typedef struct _DECDCTENV { int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point) } DECDCTENV; +// This is the "small" lookup table used by DecDCTvlc(). It can be copied to +// the scratchpad. +typedef struct _DECDCTTAB { + uint16_t lut0[2]; + uint32_t lut2[8]; + uint32_t lut3[64]; + uint16_t lut4[8]; + uint16_t lut5[8]; + uint16_t lut7[16]; + uint16_t lut8[32]; + uint16_t lut9[32]; + uint16_t lut10[32]; + uint16_t lut11[32]; + uint16_t lut12[32]; +} DECDCTTAB; + +// This is the "large" table used by DecDCTvlc2(). +typedef struct _DECDCTTAB2 { + uint32_t lut[8192]; + uint32_t lut00[512]; +} DECDCTTAB2; + typedef enum _DECDCTMODE { DECDCT_MODE_24BPP = 1, DECDCT_MODE_16BPP = 0, @@ -24,6 +46,23 @@ typedef enum _DECDCTMODE { DECDCT_MODE_RAW = -1 } DECDCTMODE; +typedef struct _VLC_Context { + const uint32_t *input; + uint32_t window, next_window, remaining; + uint16_t quant_scale; + int8_t is_v3, bit_offset, block_index, coeff_index; +} VLC_Context; + +// Despite what some docs claim, the "number of 32-byte blocks" and "always +// 0x3800" fields are actually a single 32-bit field which is copied over to +// the output buffer, then parsed by DecDCTin() and written to the MDEC0 +// register. +typedef struct { + uint32_t mdec0_header; + uint16_t quant_scale; + uint16_t version; +} BS_Header; + /* Public API */ #ifdef __cplusplus @@ -40,7 +79,7 @@ extern "C" { * * @param mode */ -void DecDCTReset(int32_t mode); +void DecDCTReset(int mode); /** * @brief Uploads the specified decoding environment's quantization tables and @@ -61,15 +100,15 @@ void DecDCTReset(int32_t mode); * @param env Pointer to DECDCTENV or 0 for default tables * @param mono 0 for color (normal), 1 for monochrome */ -void DecDCTPutEnv(const DECDCTENV *env, int32_t mono); +void DecDCTPutEnv(const DECDCTENV *env, int mono); /** - * @brief Sets up the MDEC to start fetching and decoding a stream from the - * given address in main RAM. The first 32-bit word is initially copied to the - * MDEC0 register, then all subsequent data is read in 128-byte (32-word) - * chunks. The length of the stream (in 32-bit units, minus the first word) - * must be encoded in the lower 16 bits of the first word, as expected by the - * MDEC. + * @brief Sets up the MDEC to start fetching and decoding the given buffer. + * This function is meant to be used with buffers generated by DecDCTvlc(): the + * first 32-bit word of the buffer is initially copied to the MDEC0 register, + * then all subsequent data is read in 128-byte (32-word) chunks. The length of + * the stream (in 32-bit units, minus the first word) is encoded by DecDCTvlc() + * in the lower 16 bits of the first word. * * The mode argument optionally specifies the output color depth (0 for 16bpp, * 1 for 24bpp) if not already set in the first word. Passing -1 will result in @@ -79,7 +118,7 @@ void DecDCTPutEnv(const DECDCTENV *env, int32_t mono); * @param data * @param mode DECDCT_MODE_* or -1 */ -void DecDCTin(const uint32_t *data, int32_t mode); +void DecDCTin(const uint32_t *data, int mode); /** * @brief Configures the MDEC to automatically fetch data (the input stream, @@ -111,9 +150,9 @@ void DecDCTinRaw(const uint32_t *data, size_t length); * stream (usually a whole frame) is being written to main RAM. * * @param mode - * @return 0 or -1 in case of a timeout (mode = 0) / MDEC busy flag (mode = 1) + * @return 0 or -1 in case of a timeout (mode = 0), MDEC busy flag (mode = 1) */ -int32_t DecDCTinSync(int32_t mode); +int DecDCTinSync(int mode); /** * @brief Configures the MDEC to automatically transfer decoded image data in @@ -142,9 +181,204 @@ void DecDCTout(uint32_t *data, size_t length); * to register a callback that calls DecDCTin() to feed the MDEC. * * @param mode - * @return 0 or -1 in case of a timeout (mode = 0) / DMA busy flag (mode = 1) + * @return 0 or -1 in case of a timeout (mode = 0), DMA busy flag (mode = 1) + */ +int DecDCToutSync(int mode); + +/** + * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * frame) into a buffer that can be passed to DecDCTin(). This function uses a + * small (<1 KB) lookup table combined with the GTE to accelerate the process; + * performance is roughly on par with DecDCTvlcStart2() if the lookup table + * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTable(). The + * contents of the GTE's LZCR register, if any, will be destroyed. + * + * A VLC_Context object must be created and passed to this function, which will + * then proceed to initialize its fields. The max_size argument sets the + * maximum number of words that will be written to the output buffer; if more + * data needs to be written, this function will return 1. To continue decoding + * call DecDCTvlcContinue() with the same VLC_Context object (the output buffer + * can be different). If max_size = 0, the entire frame will always be decoded + * in one shot. + * + * Only bitstream version 2 is currently supported. + * + * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the + * first time. Attempting to call this function with the GTE disabled will + * result in a crash. + * + * @param ctx Pointer to VLC_Context structure (which will be initialized) + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @param bs + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); + +/** + * @brief Resumes the decompression process started by DecDCTvlcStart(). The + * state of the decompressor is contained entirely in the VLC_Context structure + * so an arbitrary number of bitstreams can be decoded concurrently (although + * the limited CPU power makes it impractical to do so) by keeping a separate + * context for each bitstream. + * + * This function behaves like DecDCTvlcStart(), returning 1 if more data has to + * be written or 0 otherwise. DecDCTvlcContinue() shall not be called after a + * previous call to DecDCTvlcStart() or DecDCTvlcContinue() with the same + * context returned 0; in that case the context shall be discarded or reused to + * decode another bitstream. + * + * The contents of the GTE's LZCR register, if any, will be destroyed. + * + * See DecDCTvlcStart() for more details. + * + * @param ctx Pointer to already initialized VLC_Context structure + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); + +/** + * A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for compatibility + * with the official SDK. This function uses an internal context; additionally, + * the maximum output buffer size is not passed as an argument but is instead + * set by calling DecDCTvlcSize(). + * + * This function behaves identically to DecDCTvlcContinue() if bs = 0 and + * DecDCTvlcStart() otherwise. + * + * See DecDCTvlcStart() for more details. + * + * WARNING: InitGeom() must be called prior to using DecDCTvlc() for the first + * time. Attempting to call this function with the GTE disabled will result in + * a crash. + * + * @param bs Pointer to bitstream data or 0 to resume decoding + * @param buf + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlc(const uint32_t *bs, uint32_t *buf); + +/** + * @brief Sets the maximum number of 32-bit words that a single call to + * DecDCTvlc() will output. If size = 0, the entire frame will always be + * decoded in one shot. + * + * @param size Maximum number of 32-bit words to output + * @return Previously set value + */ +size_t DecDCTvlcSize(size_t size); + +/** + * @brief Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified + * address. A copy of this table is always present in main RAM, however this + * function can be used to copy it to the scratchpad region to boost + * decompression performance. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart(), + * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table + * copied. Call DecDCTvlcCopyTable(0) to revert to using the library's internal + * table in main RAM. + * + * @param addr Pointer to free area in scratchpad region or 0 to reset + */ +void DecDCTvlcCopyTable(DECDCTTAB *addr); + +/** + * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * frame) into a buffer that can be passed to DecDCTin(). This function uses a + * large (34 KB) lookup table that must be loaded into main RAM beforehand by + * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. + * Depending on the specific bitstream being decoded DecDCTvlcStart2() might be + * slightly faster or slower than DecDCTvlcStart() with its lookup table copied + * to the scratchpad (see DecDCTvlcCopyTable()). DecDCTvlcStart() with the + * table in main RAM tends to be much slower. + * + * A VLC_Context object must be created and passed to this function, which will + * then proceed to initialize its fields. The max_size argument sets the + * maximum number of words that will be written to the output buffer; if more + * data needs to be written, this function will return 1. To continue decoding + * call DecDCTvlcContinue2() with the same VLC_Context object (the output + * buffer can be different). If max_size = 0, the entire frame will always be + * decoded in one shot. + * + * Only bitstream version 2 is currently supported. + * + * @param ctx Pointer to VLC_Context structure (which will be initialized) + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @param bs + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); + +/** + * @brief Resumes the decompression process started by DecDCTvlcStart2(). The + * state of the decompressor is contained entirely in the VLC_Context structure + * so an arbitrary number of bitstreams can be decoded concurrently (although + * the limited CPU power makes it impractical to do so) by keeping a separate + * context for each bitstream. + * + * This function behaves like DecDCTvlcStart2(), returning 1 if more data has + * to be written or 0 otherwise. DecDCTvlcContinue2() shall not be called after + * a previous call to DecDCTvlcStart2() or DecDCTvlcContinue2() with the same + * context returned 0; in that case the context shall be discarded or reused to + * decode another bitstream. + * + * See DecDCTvlcStart2() for more details. + * + * @param ctx Pointer to already initialized VLC_Context structure + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); + +/** + * A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for + * compatibility with the official SDK. This function uses an internal context; + * additionally, the maximum output buffer size is not passed as an argument + * but is instead set by calling DecDCTvlcSize2(). + * + * This function behaves identically to DecDCTvlcContinue() if bs = 0 and + * DecDCTvlcStart() otherwise. The table argument can optionally be passed to + * use a custom lookup table. If zero, the last pointer passed to + * DecDCTvlcBuild() will be used. + * + * See DecDCTvlcStart2() for more details. + * + * @param bs Pointer to bitstream data or 0 to resume decoding + * @param buf + * @param table Pointer to decompressed table or 0 to use last table used + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table); + +/** + * @brief Sets the maximum number of 32-bit words that a single call to + * DecDCTvlc2() will output. If size = 0, the entire frame will always be + * decoded in one shot. + * + * @param size Maximum number of 32-bit words to output + * @return Previously set value + */ +size_t DecDCTvlcSize2(size_t size); + +/** + * @brief Generates the lookup table required by DecDCTvlcStart2(), + * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the + * specified buffer. Since the table is relatively large (34 KB), it is + * recommended to only generate it in a dynamically-allocated buffer when + * needed and deallocate the buffer afterwards. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart2() and + * DecDCTvlcContinue2() will automatically use the last table decompressed. + * + * @param table */ -int32_t DecDCToutSync(int32_t mode); +void DecDCTvlcBuild(DECDCTTAB2 *table); #ifdef __cplusplus } diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h index da000e3..cf78e3d 100644 --- a/libpsn00b/include/psxspu.h +++ b/libpsn00b/include/psxspu.h @@ -1,138 +1,124 @@ +/* + * PSn00bSDK SPU library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ + #ifndef __PSXSPU_H #define __PSXSPU_H -#include <sys/types.h> - -// Mask settings bits for specifying voice channels - -#define SPU_00CH (1<<0) -#define SPU_01CH (1<<1) -#define SPU_02CH (1<<2) -#define SPU_03CH (1<<3) -#define SPU_04CH (1<<4) -#define SPU_05CH (1<<5) -#define SPU_06CH (1<<6) -#define SPU_07CH (1<<7) -#define SPU_08CH (1<<8) -#define SPU_09CH (1<<9) -#define SPU_10CH (1<<10) -#define SPU_11CH (1<<11) -#define SPU_12CH (1<<12) -#define SPU_13CH (1<<13) -#define SPU_14CH (1<<14) -#define SPU_15CH (1<<15) -#define SPU_16CH (1<<16) -#define SPU_17CH (1<<17) -#define SPU_18CH (1<<18) -#define SPU_19CH (1<<19) -#define SPU_20CH (1<<20) -#define SPU_21CH (1<<21) -#define SPU_22CH (1<<22) -#define SPU_23CH (1<<23) - -#define SPU_0CH SPU_00CH -#define SPU_1CH SPU_01CH -#define SPU_2CH SPU_02CH -#define SPU_3CH SPU_03CH -#define SPU_4CH SPU_04CH -#define SPU_5CH SPU_05CH -#define SPU_6CH SPU_06CH -#define SPU_7CH SPU_07CH -#define SPU_8CH SPU_08CH -#define SPU_9CH SPU_09CH - -#define SPU_KEYCH(x) (1<<(x)) -#define SPU_VOICECH(x) SPU_KEYCH(x) - - -// Mask setting bits for SpuVoiceAttr.mask - -#define SPU_VOICE_VOLL (1<<0) // Left volume -#define SPU_VOICE_VOLR (1<<1) // Right volume -#define SPU_VOICE_VOLMODEL (1<<2) // Left volume mode -#define SPU_VOICE_VOLMODER (1<<3) // Right volume mode -#define SPU_VOICE_PITCH (1<<4) // Pitch tone -#define SPU_VOICE_NOTE (1<<5) // Pitch note -#define SPU_VOICE_SAMPLE_NOTE (1<<6) // Sample base frequency? -#define SPU_VOICE_WDSA (1<<7) // Sample start address (in SPU RAM) -#define SPU_VOICE_ADSR_AMODE (1<<8) // ADSR attack mode -#define SPU_VOICE_ADSR_SMODE (1<<9) // ADSR sustain mode -#define SPU_VOICE_ADSR_RMODE (1<<10) // ADSR release mode -#define SPU_VOICE_ADSR_AR (1<<11) // ADSR attack rate -#define SPU_VOICE_ADSR_DR (1<<12) // ADSR decay rate -#define SPU_VOICE_ADSR_SR (1<<13) // ADSR sustain rate -#define SPU_VOICE_ADSR_RR (1<<14) // ADSR release rate -#define SPU_VOICE_ADSR_SL (1<<15) // ADSR sustain level -#define SPU_VOICE_LSAX (1<<16) // Loop start address (in SPU RAM) -#define SPU_VOICE_ADSR_ADSR1 (1<<17) // adsr1 for VagAtr (?) -#define SPU_VOICE_ADSR_ADSR2 (1<<18) // adsr2 for VagAtr (?) - - -#define SPU_TRANSFER_BY_DMA 0 - - -typedef struct { - short left; - short right; +#include <stdint.h> +#include <stddef.h> +#include <hwregs_c.h> + +/* Definitions */ + +typedef enum _SPU_AttrMask { + SPU_VOICE_VOLL = 1 << 0, // Left volume + SPU_VOICE_VOLR = 1 << 1, // Right volume + SPU_VOICE_VOLMODEL = 1 << 2, // Left volume mode + SPU_VOICE_VOLMODER = 1 << 3, // Right volume mode + SPU_VOICE_PITCH = 1 << 4, // Pitch tone + SPU_VOICE_NOTE = 1 << 5, // Pitch note + SPU_VOICE_SAMPLE_NOTE = 1 << 6, // Sample base frequency? + SPU_VOICE_WDSA = 1 << 7, // Sample start address (in SPU RAM) + SPU_VOICE_ADSR_AMODE = 1 << 8, // ADSR attack mode + SPU_VOICE_ADSR_SMODE = 1 << 9, // ADSR sustain mode + SPU_VOICE_ADSR_RMODE = 1 << 10, // ADSR release mode + SPU_VOICE_ADSR_AR = 1 << 11, // ADSR attack rate + SPU_VOICE_ADSR_DR = 1 << 12, // ADSR decay rate + SPU_VOICE_ADSR_SR = 1 << 13, // ADSR sustain rate + SPU_VOICE_ADSR_RR = 1 << 14, // ADSR release rate + SPU_VOICE_ADSR_SL = 1 << 15, // ADSR sustain level + SPU_VOICE_LSAX = 1 << 16, // Loop start address (in SPU RAM) + SPU_VOICE_ADSR_ADSR1 = 1 << 17, + SPU_VOICE_ADSR_ADSR2 = 1 << 18 +} SPU_AttrMask; + +typedef enum _SPU_TransferMode { + SPU_TRANSFER_BY_DMA = 0, + SPU_TRANSFER_BY_IO = 1 +} SPU_TransferMode; + +typedef enum _SPU_WaitMode { + SPU_TRANSFER_PEEK = 0, + SPU_TRANSFER_WAIT = 1 +} SPU_WaitMode; + +/* Structure definitions */ + +typedef struct _SpuVolume { + int16_t left, right; } SpuVolume; -typedef struct { - SpuVolume vol; // 0 - unsigned short freq; // 4 - unsigned short addr; // 6 - unsigned short loop_addr; // 8 - unsigned short res; // 10 - unsigned int adsr_param; // 12 -} SpuVoiceRaw; - -typedef struct { - u_int voice; - u_int mask; - SpuVolume volume; - SpuVolume volmode; - SpuVolume volumex; - u_short pitch; - u_short note; - u_short sample_note; - short envx; - u_int addr; - u_int loop_addr; - int a_mode; - int s_mode; - int r_mode; - u_short ar; - u_short dr; - u_short sr; - u_short rr; - u_short sl; - u_short adsr1; - u_short adsr2; +typedef struct _SpuVoiceAttr { + uint32_t voice; + uint32_t mask; + SpuVolume volume, volmode, volumex; + uint16_t pitch, note, sample_note; + int16_t envx; + uint32_t addr, loop_addr; + int a_mode, s_mode, r_mode; + uint16_t ar, dr, sr, rr, sl; + uint16_t adsr1, adsr2; } SpuVoiceAttr; +typedef struct _SpuExtAttr { + SpuVolume volume; + int reverb, mix; +} SpuExtAttr; + +typedef struct _SpuCommonAttr { + uint32_t mask; + SpuVolume mvol, mvolmode, mvolx; + SpuExtAttr cd, ext; +} SpuCommonAttr; + +/* "Useless" macros for official SDK compatibility */ + +#define SpuSetCommonMasterVolume(left, right) \ + (SPU_MASTER_VOL_L = (left), SPU_MASTER_VOL_R = (right)) +#define SpuSetCommonCDVolume(left, right) \ + (SPU_CD_VOL_L = (left), SPU_CD_VOL_R = (right)) +#define SpuSetCommonCDReverb(enable) \ + ((enable) ? (SPU_CTRL |= 0x0004) : (SPU_CTRL &= 0xfffb)) +#define SpuSetCommonExtVolume(left, right) \ + (SPU_EXT_VOL_L = (left), SPU_EXT_VOL_R = (right)) +#define SpuSetCommonExtReverb(enable) \ + ((enable) ? (SPU_CTRL |= 0x0002) : (SPU_CTRL &= 0xfffd)) + +#define SpuSetReverbAddr(addr) \ + (SPU_REVERB_ADDR = ((addr) + 7) / 8) +#define SpuSetIRQAddr(addr) \ + (SPU_IRQ_ADDR = ((addr) + 7) / 8) + +#define SpuSetVoiceVolume(ch, left, right) \ + (SPU_CH_VOL_L(ch) = (left), SPU_CH_VOL_R(ch) = (right)) +#define SpuSetVoicePitch(ch, pitch) \ + (SPU_CH_FREQ(ch) = (pitch)) +#define SpuSetVoiceStartAddr(ch, addr) \ + (SPU_CH_ADDR(ch) = ((addr) + 7) / 8) +#define SpuSetVoiceADSR(ch, ar, dr, sr, rr, sl) \ + (SPU_CH_ADSR(ch) = ((sl)) | ((dr) << 4) | ((ar) << 8) | ((rr) << 16) | ((sr) << 22) | (1 << 30)) + +#define SpuSetKey(enable, voice_bit) \ + ((enable) ? (SPU_KEY_ON = (voice_bit)) : (SPU_KEY_OFF = (voice_bit))) + +/* Public API */ + #ifdef __cplusplus extern "C" { #endif -void SpuInit(); - -void SpuSetVoiceRaw( int voice, const SpuVoiceRaw* param ); -void SpuReverbOn( int voice ); -void SpuSetReverb(); - -void SpuSetReverbAddr( int addr ); -void SpuSetReverbVolume( int left, int right ); +void SpuInit(void); - -void SpuSetKey(int on_off, u_int voice_bit); - -// SPU transfer functions -int SpuSetTransferMode(int mode); -int SpuSetTransferStartAddr(int addr); -int SpuWrite(const unsigned char* addr, int size); -void SpuWait(); +void SpuRead(uint32_t *data, size_t size); +void SpuWrite(const uint32_t *data, size_t size); +SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode); +uint32_t SpuSetTransferStartAddr(uint32_t addr); +int SpuIsTransferCompleted(int mode); #ifdef __cplusplus } #endif -#endif
\ No newline at end of file +#endif diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h index 4c4fcd3..1888c69 100644 --- a/libpsn00b/include/stdlib.h +++ b/libpsn00b/include/stdlib.h @@ -1,36 +1,27 @@ /* - * stdlib.h - * - * Standard library functions - * - * Inherited from PSXSDK + * PSn00bSDK standard library + * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef _STDLIB_H -#define _STDLIB_H +#ifndef __STDLIB_H +#define __STDLIB_H -#define RAND_MAX 0x7fff +#include <stddef.h> -/* Conversion functions (not yet implemented) */ +/* Definitions */ -/* -extern int atoi(char *s); -extern long atol(char *s); -extern char atob(char *s); // Is this right? -*/ - -// Quick sort (not yet implemented) +#define RAND_MAX 0x7fff -//void qsort(void *base , int nel , int width , int (*cmp)(const void *,const void *)); +/* API */ #ifdef __cplusplus extern "C" { #endif -extern int __argc; -extern const char **__argv; +extern int __argc; +extern const char **__argv; -int rand(); +int rand(void); void srand(unsigned long seed); int abs(int j); @@ -39,16 +30,17 @@ long long strtoll(const char *nptr, char **endptr, int base); long strtol(const char *nptr, char **endptr, int base); long double strtold(const char *nptr, char **endptr); -// Note: these use floats internally! double strtod(const char *nptr, char **endptr); float strtof(const char *nptr, char **endptr); -// Memory allocation functions -void _mem_init(int ram_size, int stack_max_size); -void InitHeap(unsigned int *addr, int size); -int SetHeapSize(int size); -void *malloc(int size); -void *calloc(int number, int size); +void _mem_init(size_t ram_size, size_t stack_max_size); +void InitHeap(void *addr, size_t size); +//int SetHeapSize(size_t size); +void *sbrk(ptrdiff_t incr); + +void *malloc(size_t size); +void *calloc(size_t num, size_t size); +void *realloc(void *ptr, size_t size); void free(void *ptr); #ifdef __cplusplus @@ -56,4 +48,3 @@ void free(void *ptr); #endif #endif - diff --git a/libpsn00b/include/string.h b/libpsn00b/include/string.h index 365d238..ceee066 100644 --- a/libpsn00b/include/string.h +++ b/libpsn00b/include/string.h @@ -1,13 +1,10 @@ /* - * string.h - * - * Prototypes for string functions of the C library - * - * PSXSDK + * PSn00bSDK standard library + * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef _STRING_H -#define _STRING_H +#ifndef __STRING_H +#define __STRING_H #ifdef __cplusplus extern "C" { @@ -19,11 +16,6 @@ char *strpbrk(const char *dst , const char *src); char *strtok(char *s , char *set); char *strstr(const char *big , const char *little); -//int strspn(char *s , char *set); -//int strcspn(char *s , char *set); -//int index(char *s , int c); -//int rindex(char *s , int c); - char *strcat(char *s , const char *append); char *strncat(char *s , const char *append, int n); char *strcpy(char *dst , const char *src); @@ -34,9 +26,6 @@ char *strrchr(const char *s , int c); void *memmove(void *dst , const void *src , int n); void *memchr(void *s , int c , int n); - -// Functions converted to ASM - void *memcpy(void *dst , const void *src , int n); void *memset(void *dst , char c , int n); int memcmp(const void *b1 , const void *b2 , int n); @@ -46,4 +35,3 @@ int memcmp(const void *b1 , const void *b2 , int n); #endif #endif - diff --git a/libpsn00b/include/strings.h b/libpsn00b/include/strings.h index e5e88d4..7223ab9 100644 --- a/libpsn00b/include/strings.h +++ b/libpsn00b/include/strings.h @@ -1,18 +1,19 @@ -/* - * strings.h - * - * PSXSDK +/* + * PSn00bSDK standard library + * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef _STRINGS_H -#define _STRINGS_H +#ifndef __STRINGS_H +#define __STRINGS_H #include <string.h> -#define bcopy(src,dst,len) memmove(dst,src,len) -#define bzero(ptr, len) memset(ptr, 0, len) -#define bcmp(b1,b2,len) memcmp(b1,b2,len) -#define index(s, c) strchr(s, c) -#define rindex(s, c) strrchr(s, c) +/* Compatibility macros (this header is useless) */ + +#define bcopy(src, dst, len) memmove(dst, src, len) +#define bzero(ptr, len) memset(ptr, 0, len) +#define bcmp(b1, b2, len) memcmp(b1, b2, len) +#define index(s, c) strchr(s, c) +#define rindex(s, c) strrchr(s, c) #endif diff --git a/libpsn00b/ldscripts/exe.ld b/libpsn00b/ldscripts/exe.ld index 583d76a..a8dfccf 100644 --- a/libpsn00b/ldscripts/exe.ld +++ b/libpsn00b/ldscripts/exe.ld @@ -6,6 +6,10 @@ * using $gp to reference them) is fully supported; the block is made up of * sections .sdata and .sbss. Note that GP-relative addressing is not * compatible with dynamic linking, as DLLs require GP to be unused. + * + * This linker script assumes main RAM is 8 MB to allow executables meant for + * devkits or arcade systems to be built, however the executable conversion + * tool (elf2x) will throw a warning if the size exceeds 2 MB. */ /*OUTPUT_FORMAT(elf32-littlemips)*/ @@ -15,7 +19,7 @@ ENTRY(_start) MEMORY { /* Mapped into KSEG0 */ KERNEL_RAM (rwx) : ORIGIN = 0x80000000, LENGTH = 0x010000 - APP_RAM (rwx) : ORIGIN = 0x80010000, LENGTH = 0x1f0000 + APP_RAM (rwx) : ORIGIN = 0x80010000, LENGTH = 0x7f0000 } SECTIONS { diff --git a/libpsn00b/libc/_start.s b/libpsn00b/libc/_start.s new file mode 100644 index 0000000..56075c8 --- /dev/null +++ b/libpsn00b/libc/_start.s @@ -0,0 +1,18 @@ +# PSn00bSDK _start() trampoline +# (C) 2022 spicyjpeg - MPL licensed +# +# This file provides a weak function that can be easily overridden to e.g. set +# $sp or perform additional initialization before the "real" _start() +# (_start_inner()) is called. + +.set noreorder +.section .text + +.global _start +.type _start, @function +.weak _start +_start: + la $gp, _gp + + j _start_inner + nop diff --git a/libpsn00b/libc/c++-support.cxx b/libpsn00b/libc/cpp_support.cpp index 38354dd..f451044 100644 --- a/libpsn00b/libc/c++-support.cxx +++ b/libpsn00b/libc/cpp_support.cpp @@ -1,8 +1,22 @@ +/* + * PSn00bSDK C++ support library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ #include <stdint.h> #include <stdlib.h> #include <stdio.h> +/* GCC builtins */ + +extern "C" void *__builtin_new(size_t size) { + return malloc(size); +} + +extern "C" void __builtin_delete(void *ptr) { + free(ptr); +} + /* Default new/delete operators */ void *operator new(size_t size) noexcept { @@ -21,20 +35,15 @@ void operator delete[](void *ptr) noexcept { free(ptr); } -/* - * https://en.cppreference.com/w/cpp/memory/new/operator_delete - * - * Called if a user-defined replacement is provided, except that it's - * unspecified whether other overloads or this overload is called when deleting - * objects of incomplete type and arrays of non-class and trivially - * destructible class types. - * - * A memory allocator can use the given size to be more efficient. - */ +// https://en.cppreference.com/w/cpp/memory/new/operator_delete void operator delete(void *ptr, size_t size) noexcept { free(ptr); } +void operator delete[](void *ptr, size_t size) noexcept { + free(ptr); +} + /* Placement new operators */ void *operator new(size_t size, void *ptr) noexcept { diff --git a/libpsn00b/libc/malloc.c b/libpsn00b/libc/malloc.c new file mode 100644 index 0000000..9d538cd --- /dev/null +++ b/libpsn00b/libc/malloc.c @@ -0,0 +1,235 @@ +/* + * PSn00bSDK default memory allocator + * (C) 2022 Nicolas Noble, spicyjpeg + * + * This code is based on psyqo's malloc implementation, available here: + * https://github.com/grumpycoders/pcsx-redux/blob/main/src/mips/psyqo/src/alloc.c + * + * Heap management and memory allocation are completely separate, with the + * latter being built on top of the former. This makes it possible to override + * only InitHeap() and sbrk() while still using the default allocator, or + * override malloc()/realloc()/free() while using the default heap manager. + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#define _align(x, n) (((x) + ((n) - 1)) & ~((n) - 1)) + +/* Private types */ + +typedef struct _BlockHeader { + struct _BlockHeader *prev, *next; + void *ptr; + size_t size; +} BlockHeader; + +/* Data */ + +static void *_heap_start, *_heap_end, *_heap_limit; +static void *_alloc_start = 0; +static BlockHeader *_alloc_head = 0, *_alloc_tail = 0; + +/* Heap management API */ + +__attribute__((weak)) void InitHeap(void *addr, size_t size) { + _heap_start = addr; + _heap_end = addr; + _heap_limit = (void *) ((uintptr_t) addr + size); +} + +__attribute__((weak)) void *sbrk(ptrdiff_t incr) { + void *old_end = _heap_end; + void *new_end = (void *) _align((uintptr_t) old_end + incr, 8); + + if (new_end > _heap_limit) + return 0; + + _heap_end = new_end; + return old_end; +} + +/* Memory allocator */ + +static BlockHeader *_find_fit(BlockHeader *head, size_t size) { + BlockHeader *prev = head; + + for (; prev; prev = prev->next) { + if (prev->next) { + uintptr_t next_bot = (uintptr_t) prev->next; + next_bot -= (uintptr_t) prev->ptr + prev->size; + + if (next_bot >= size) + return prev; + } + } + + return prev; +} + +__attribute__((weak)) void *malloc(size_t size) { + size_t _size = _align(size + sizeof(BlockHeader), 8); + + // Nothing's initialized yet? Let's just initialize the bottom of our heap, + // flag it as allocated. + if (!_alloc_head) { + if (!_alloc_start) + _alloc_start = sbrk(0); + + BlockHeader *new = (BlockHeader *) sbrk(_size); + if (!new) + return 0; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size - sizeof(BlockHeader); + new->prev = 0; + new->next = 0; + + _alloc_head = new; + _alloc_tail = new; + return ptr; + } + + // We *may* have the bottom of our heap that has shifted, because of a free. + // So let's check first if we have free space there, because I'm nervous + // about having an incomplete data structure. + if (((uintptr_t) _alloc_start + _size) < ((uintptr_t) _alloc_head)) { + BlockHeader *new = (BlockHeader *) _alloc_start; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size - sizeof(BlockHeader); + new->prev = 0; + new->next = _alloc_head; + + _alloc_head->prev = new; + _alloc_head = new; + return ptr; + } + + // No luck at the beginning of the heap, let's walk the heap to find a fit. + BlockHeader *prev = _find_fit(_alloc_head, _size); + if (prev) { + BlockHeader *new = (BlockHeader *) ((uintptr_t) prev->ptr + prev->size); + + void *ptr = (void *)((uintptr_t) new + sizeof(BlockHeader)); + new->ptr = ptr; + new->size = _size - sizeof(BlockHeader); + new->prev = prev; + new->next = prev->next; + + (new->next)->prev = new; + prev->next = new; + return ptr; + } + + // Time to extend the size of the heap. + BlockHeader *new = (BlockHeader *) sbrk(_size); + if (!new) + return 0; + + void *ptr = (void *) &new[1]; + new->ptr = ptr; + new->size = _size - sizeof(BlockHeader); + new->prev = _alloc_tail; + new->next = 0; + + _alloc_tail->next = new; + _alloc_tail = new; + return ptr; +} + +__attribute__((weak)) void *calloc(size_t num, size_t size) { + return malloc(num * size); +} + +__attribute__((weak)) void *realloc(void *ptr, size_t size) { + if (!size) { + free(ptr); + return 0; + } + if (!ptr) + return malloc(size); + + size_t _size = _align(size + sizeof(BlockHeader), 8); + + BlockHeader *prev = (BlockHeader *) ((uintptr_t) ptr - sizeof(BlockHeader)); + + // New memory block shorter? + if (prev->size >= _size) { + prev->size = _size; + if (!prev->next) + sbrk((ptr - sbrk(0)) + _size); + + return ptr; + } + + // New memory block larger; is it the last one? + if (!prev->next) { + void *new = sbrk(_size - prev->size); + if (!new) + return 0; + + prev->size = _size; + return ptr; + } + + // Do we have free memory after it? + if (((prev->next)->ptr - ptr) > _size) { + prev->size = _size; + return ptr; + } + + // No luck. + void *new = malloc(_size); + if (!new) + return 0; + + __builtin_memcpy(new, ptr, prev->size); + free(ptr); + return new; +} + +__attribute__((weak)) void free(void *ptr) { + if (!ptr || !_alloc_head) + return; + + // First block; bumping head ahead. + if (ptr == _alloc_head->ptr) { + size_t size = _alloc_head->size; + size += (uintptr_t) _alloc_head->ptr - (uintptr_t) _alloc_head; + _alloc_head = _alloc_head->next; + + if (_alloc_head) { + _alloc_head->prev = 0; + } else { + _alloc_tail = 0; + sbrk(-size); + } + + return; + } + + // Finding the proper block + BlockHeader *cur = _alloc_head; + for (cur = _alloc_head; ptr != cur->ptr; cur = cur->next) { + if (!cur->next) + return; + } + + if (cur->next) { + // In the middle, just unlink it + cur->next->prev = cur->prev; + } else { + // At the end, shrink heap + _alloc_tail = cur->prev; + + void *top = sbrk(0); + size_t size = (top - (cur->prev)->ptr) - (cur->prev)->size; + sbrk(-size); + } + + (cur->prev)->next = cur->next; +} diff --git a/libpsn00b/libc/malloc.s b/libpsn00b/libc/malloc.s deleted file mode 100644 index e441bbe..0000000 --- a/libpsn00b/libc/malloc.s +++ /dev/null @@ -1,242 +0,0 @@ -# Custom first-fit malloc routines by Lameguy64 -# Part of the PSn00bSDK Project -# -# NOTE: there reportedly is a GCC bug which messes up .weak functions written -# in assembly if LTO is enabled. I haven't tested but, according to the -# internet, this bug has never been fixed. -# https://gcc.gnu.org/legacy-ml/gcc-help/2019-10/msg00092.html - -.set noreorder - -.set ND_PREV, 0 # Address to previous block (NULL if starting block) -.set ND_NEXT, 4 # Address to next block (NULL if end block) -.set ND_SIZE, 8 # Size of block -.set ND_HSIZ, 12 - -.section .text - -# Stupid small function just to get bss end -# due to GCC insisting externs to be gp relative -.global GetBSSend -.type GetBSSend, @function -GetBSSend: - la $v0, _end - jr $ra - nop - - -# Initializes the heap for malloc -# a0 - Starting address of heap -# a1 - Size of memory heap -# -.global InitHeap -.type InitHeap, @function -.weak InitHeap -InitHeap: - la $v0, _malloc_addr - sw $a0, 0($v0) - la $v0, _malloc_size - sw $a1, 0($v0) - - sw $0 , ND_PREV($a0) # Set heap header - sw $0 , ND_NEXT($a0) - jr $ra - sw $0 , ND_SIZE($a0) - - -# Changes the heap size without clearing or relocating the heap -# a0 - Size of memory heap in bytes -.global SetHeapSize -.type SetHeapSize, @function -.weak SetHeapSize -SetHeapSize: - la $v1, _malloc_size - lw $v0, 0($v1) - jr $ra - sw $a1, 0($v1) - - -# Allocates a block of memory in the heap -# a0 - Size of memory block to allocate. -# -.global malloc -.type malloc, @function -.weak malloc -malloc: - addiu $a0, 3 # Round size to a multiple of 4 - srl $a0, 2 - - la $a2, _malloc_addr - lw $a2, 0($a2) - sll $a0, 2 - -.Lfind_next: - - move $a1, $a2 - - lw $a2, ND_NEXT($a1) # Get block header - lw $v1, ND_SIZE($a1) - - subu $v0, $a2, $a1 # Compute space between current and next - - beqz $v1, .Lempty_block # Occupy empty block (if size = 0) - nop - - beqz $a2, .Lnew_block # Allocate a new block (if no next) - nop - - addiu $v0, -(ND_HSIZ*2) # Compute remaining space of block - subu $v0, $v1 - - blt $v0, $a0, .Lfind_next # Search for the next block if space is not big enough - nop - - # Perform a block split using remaining space of current block - - addiu $v0, $a1, ND_HSIZ # Compute address for new header - addu $v0, $v1 - - sw $a1, ND_PREV($v0) # Set the new block header - sw $a2, ND_NEXT($v0) - sw $a0, ND_SIZE($v0) - - sw $v0, ND_NEXT($a1) # Update previous and next blocks - sw $v0, ND_PREV($a2) - - jr $ra - addiu $v0, ND_HSIZ - -.Lempty_block: # Occupy an empty block - - beqz $a2, .Lno_next # Skip size calculation if there's no next - nop - - addiu $v0, -ND_HSIZ - blt $v0, $a0, .Lfind_next - nop - - b .Lskip_space_check - nop - -.Lno_next: - - la $v1, _malloc_addr # Check if there's enough space for a block - lw $v1, 0($v1) - la $v0, _malloc_size - lw $v0, 0($v0) - - subu $v1, $a1, $v1 - addu $v1, $a0 - addiu $v1, ND_HSIZ - - bgt $v1, $v0, .Lno_space - nop - -.Lskip_space_check: - - sw $a0, ND_SIZE($a1) - jr $ra # Return address - addiu $v0, $a1, ND_HSIZ - -.Lnew_block: # Create a new block - - addu $a2, $a1, $v1 # Compute address for new block - addiu $a2, ND_HSIZ - - la $v1, _malloc_addr - lw $v1, 0($v1) - la $v0, _malloc_size - lw $v0, 0($v0) - - subu $v1, $a2, $v1 - addu $v1, $a0 - addiu $v1, ND_HSIZ - - bgt $v1, $v0, .Lno_space # Reject if it exceeds specified size - nop - - sw $a1, ND_PREV($a2) - sw $0 , ND_NEXT($a2) - sw $a0, ND_SIZE($a2) - - sw $a2, ND_NEXT($a1) - - jr $ra # Return address - addiu $v0, $a2, ND_HSIZ - -.Lno_space: # Return a null if no space can be found - - jr $ra - move $v0, $0 - - -# Allocates a block of memory in block units and zero fills the -# allocated block. -# a0 - Block size. -# a1 - Number of blocks to allocate -# -.global calloc -.type calloc, @function -.weak calloc -calloc: - mult $a0, $a1 - addiu $sp, -4 - sw $ra, 0($sp) - - jal malloc - mflo $a0 - - move $a0, $v0 - mflo $a1 - -.Lclear_loop: - - sw $0 , 0($a0) - addi $a1, 4 - bgtz $a1, .Lclear_loop - addiu $a0, 4 - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -# Deallocates an allocated block -# a0 - An address returned by malloc to deallocate -# -.global free -.type free, @function -.weak free -free: - - addiu $a0, -ND_HSIZ - lw $a1, ND_PREV($a0) - lw $a2, ND_NEXT($a0) - - beqz $a1, .Lis_start # Check if block is a starting block - nop - - beqz $a2, .Lis_end - nop - - # Unlink - - sw $a2, ND_NEXT($a1) - jr $ra - sw $a1, ND_PREV($a2) - -.Lis_end: # Unlinks the ending block - - jr $ra - sw $0 , ND_NEXT($a1) - -.Lis_start: # Simply set size to 0 if starting block - - jr $ra - sw $0 , ND_SIZE($a0) - - -# Internal variables -.comm _malloc_addr, 4, 4 -.comm _malloc_size, 4, 4 diff --git a/libpsn00b/libc/start.c b/libpsn00b/libc/start.c index fd6fe33..87ac951 100644 --- a/libpsn00b/libc/start.c +++ b/libpsn00b/libc/start.c @@ -62,10 +62,10 @@ extern uint8_t _end[]; // useful though to change the stack size and/or reinitialize the heap on // systems that have more than 2 MB of RAM (e.g. emulators, devkits, PS1-based // arcade boards). -void _mem_init(int ram_size, int stack_max_size) { - void *exe_end = _end + 4; - int exe_size = (int) exe_end - (int) __text_start; - int ram_used = (0x10000 + exe_size + stack_max_size) & 0xfffffffc; +void _mem_init(size_t ram_size, size_t stack_max_size) { + void *exe_end = _end + 4; + size_t exe_size = (size_t) exe_end - (size_t) __text_start; + size_t ram_used = (0x10000 + exe_size + stack_max_size) & 0xfffffffc; InitHeap(exe_end, ram_size - ram_used); } @@ -80,8 +80,8 @@ extern int main(int argc, const char* argv[]); // Even though _start() usually takes no arguments, this implementation allows // parent executables to pass args directly to child executables without having // to overwrite the arg strings in kernel RAM. -void _start(int32_t override_argc, const char **override_argv) { - __asm__ volatile("la $gp, _gp;"); +void _start_inner(int32_t override_argc, const char **override_argv) { + //__asm__ volatile("la $gp, _gp;"); // Clear BSS 4 bytes at a time. BSS is always aligned to 4 bytes by the // linker script. diff --git a/libpsn00b/lzp/crc.c b/libpsn00b/lzp/crc.c index 3c1ae57..6b00c8e 100644 --- a/libpsn00b/lzp/crc.c +++ b/libpsn00b/lzp/crc.c @@ -1,14 +1,15 @@ +#include <stdint.h> #include "lzp.h" -void initTable16(unsigned short* table) { +void initTable16(uint16_t *table) { int i, j; - unsigned short crc, c; + uint16_t crc, c; for (i=0; i<256; i++) { crc = 0; - c = (unsigned short) i; + c = (uint16_t) i; for (j=0; j<8; j++) { @@ -25,10 +26,10 @@ void initTable16(unsigned short* table) { } -void initTable32(unsigned int* table) { +void initTable32(uint32_t *table) { int i,j; - unsigned int crcVal; + uint32_t crcVal; for(i=0; i<256; i++) { @@ -49,17 +50,17 @@ void initTable32(unsigned int* table) { } -unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc) { +uint16_t lzCRC16(const void* buff, int bytes, uint16_t crc) { int i; - unsigned short tmp, short_c; - unsigned short crcTable[256]; + uint16_t tmp, short_c; + uint16_t crcTable[256]; initTable16(crcTable); for(i=0; i<bytes; i++) { - short_c = 0x00ff & (unsigned short)((const unsigned char*)buff)[i]; + short_c = 0x00ff & (uint16_t)((const uint8_t *)buff)[i]; tmp = crc ^ short_c; crc = (crc >> 8) ^ crcTable[tmp&0xff]; @@ -70,18 +71,18 @@ unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc) { } -unsigned int lzCRC32(const void* buff, int bytes, unsigned int crc) { +uint32_t lzCRC32(const void* buff, int bytes, uint32_t crc) { int i; - const unsigned char* byteBuff = (const unsigned char*)buff; - unsigned int byte; - unsigned int crcTable[256]; + const uint8_t *byteBuff = (const uint8_t *)buff; + uint32_t byte; + uint32_t crcTable[256]; initTable32(crcTable); for(i=0; i<bytes; i++) { - byte = 0x000000ffL&(unsigned int)byteBuff[i]; + byte = 0x000000ffL&(uint32_t)byteBuff[i]; crc = (crc>>8)^crcTable[(crc^byte)&0xff]; } diff --git a/libpsn00b/lzp/lzp.h b/libpsn00b/lzp/lzp.h index cfeeb72..456de02 100644 --- a/libpsn00b/lzp/lzp.h +++ b/libpsn00b/lzp/lzp.h @@ -16,7 +16,7 @@ #ifndef _LZPACK_H #define _LZPACK_H -#include <sys/types.h> +#include <stdint.h> #ifdef _WIN32 #include <windows.h> #endif @@ -64,9 +64,9 @@ typedef struct { //! File ID (must always be 'LZP') - char id[3]; + char id[3]; //! File count - u_char numFiles; + uint8_t numFiles; } LZP_HEAD; @@ -74,15 +74,15 @@ typedef struct { typedef struct { //! File name - char fileName[16]; + char fileName[16]; //! CRC32 checksum of file - u_int crc; + uint32_t crc; //! Original size of file in bytes - u_int fileSize; + uint32_t fileSize; //! Compressed size of file - u_int packedSize; + uint32_t packedSize; //! File data offset - u_int offset; + uint32_t offset; } LZP_FILE; @@ -162,7 +162,7 @@ void lzResetHashSizes(); * * \returns CRC16 hash of specified buffer. */ -unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc); +uint16_t lzCRC16(const void* buff, int bytes, uint16_t crc); /*! Calculates a CRC32 hash of the specified buffer. * @@ -172,7 +172,7 @@ unsigned short lzCRC16(const void* buff, int bytes, unsigned short crc); * * \returns CRC32 hash of specified buffer. */ -unsigned int lzCRC32(const void* buff, int bytes, unsigned int crc); +uint32_t lzCRC32(const void* buff, int bytes, uint32_t crc); /*! @} */ diff --git a/libpsn00b/lzp/lzqlp.h b/libpsn00b/lzp/lzqlp.h index 5b70b40..32ce0d7 100644 --- a/libpsn00b/lzp/lzqlp.h +++ b/libpsn00b/lzp/lzqlp.h @@ -1,7 +1,7 @@ #ifndef _QLP_H #define _QLP_H -#include <sys/types.h> +#include <stdint.h> #ifdef _WIN32 #include <windows.h> #endif @@ -13,14 +13,14 @@ #define PACK_ERR_READ_FAULT -4 typedef struct { - char id[3]; - u_char numfiles; + char id[3]; + uint8_t numfiles; } QLP_HEAD; typedef struct { - char name[16]; - u_int size; - u_int offs; + char name[16]; + uint32_t size; + uint32_t offs; } QLP_FILE; int qlpFileCount(const QLP_HEAD* qlpfile); diff --git a/libpsn00b/psxapi/_initcd.s b/libpsn00b/psxapi/_initcd.s index c3a2861..600d686 100644 --- a/libpsn00b/psxapi/_initcd.s +++ b/libpsn00b/psxapi/_initcd.s @@ -1,7 +1,7 @@ .section .text .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .global _InitCd .type _InitCd, @function diff --git a/libpsn00b/psxapi/_syscalls.s b/libpsn00b/psxapi/_syscalls.s new file mode 100644 index 0000000..24864f3 --- /dev/null +++ b/libpsn00b/psxapi/_syscalls.s @@ -0,0 +1,50 @@ +# PSn00bSDK syscall wrappers +# (C) 2022 spicyjpeg - MPL licensed + +.set noreorder + +.section .text.EnterCriticalSection +.global EnterCriticalSection +.type EnterCriticalSection, @function +EnterCriticalSection: + li $a0, 0x01 + syscall 0 + + jr $ra + nop + +.section .text.ExitCriticalSection +.global ExitCriticalSection +.type ExitCriticalSection, @function +ExitCriticalSection: + li $a0, 0x02 + syscall 0 + + jr $ra + nop + +.section .text.SwEnterCriticalSection +.global SwEnterCriticalSection +.type SwEnterCriticalSection, @function +SwEnterCriticalSection: + mfc0 $a0, $12 # cop0r12 &= ~0x00000401 + li $a1, -1026 + and $a0, $a1 + mtc0 $a0, $12 + nop + + jr $ra + nop + +.section .text.SwExitCriticalSection +.global SwExitCriticalSection +.type SwExitCriticalSection, @function +SwExitCriticalSection: + mfc0 $a0, $12 # cop0r12 |= 0x00000401 + nop + ori $a0, 0x0401 + mtc0 $a0, $12 + nop + + jr $ra + nop diff --git a/libpsn00b/psxapi/stubs.json b/libpsn00b/psxapi/stubs.json index 9198b06..200cbc7 100644 --- a/libpsn00b/psxapi/stubs.json +++ b/libpsn00b/psxapi/stubs.json @@ -133,6 +133,12 @@ }, { "type": "a", + "id": 156, + "name": "SetConf", + "file": "sys.s" + }, + { + "type": "a", "id": 160, "name": "_boot", "file": "sys.s" @@ -205,12 +211,36 @@ }, { "type": "b", + "id": 7, + "name": "DeliverEvent", + "file": "sys.s" + }, + { + "type": "b", "id": 8, "name": "OpenEvent", "file": "sys.s" }, { "type": "b", + "id": 9, + "name": "CloseEvent", + "file": "sys.s" + }, + { + "type": "b", + "id": 10, + "name": "WaitEvent", + "file": "sys.s" + }, + { + "type": "b", + "id": 11, + "name": "TestEvent", + "file": "sys.s" + }, + { + "type": "b", "id": 12, "name": "EnableEvent", "file": "sys.s" @@ -223,6 +253,24 @@ }, { "type": "b", + "id": 14, + "name": "OpenTh", + "file": "sys.s" + }, + { + "type": "b", + "id": 15, + "name": "CloseTh", + "file": "sys.s" + }, + { + "type": "b", + "id": 16, + "name": "ChangeTh", + "file": "sys.s" + }, + { + "type": "b", "id": 18, "name": "InitPAD", "file": "sys.s" @@ -259,6 +307,12 @@ }, { "type": "b", + "id": 32, + "name": "UnDeliverEvent", + "file": "sys.s" + }, + { + "type": "b", "id": 64, "name": "chdir", "file": "fs.s" @@ -391,20 +445,20 @@ }, { "type": "c", - "id": 10, - "name": "ChangeClearRCnt", + "id": 7, + "name": "InstallExceptionHandlers", "file": "sys.s" }, { - "type": "syscall", - "id": 1, - "name": "EnterCriticalSection", + "type": "c", + "id": 8, + "name": "SysInitMemory", "file": "sys.s" }, { - "type": "syscall", - "id": 2, - "name": "ExitCriticalSection", + "type": "c", + "id": 10, + "name": "ChangeClearRCnt", "file": "sys.s" } ] diff --git a/libpsn00b/psxapi/sys.s b/libpsn00b/psxapi/sys.s index e54bd98..e2505e1 100644 --- a/libpsn00b/psxapi/sys.s +++ b/libpsn00b/psxapi/sys.s @@ -6,7 +6,7 @@ .set noreorder -## A0 table functions (7) +## A0 table functions (8) .section .text.b_setjmp .global b_setjmp @@ -48,6 +48,14 @@ FlushCache: jr $t2 li $t1, 0x44 +.section .text.SetConf +.global SetConf +.type SetConf, @function +SetConf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9c + .section .text._boot .global _boot .type _boot, @function @@ -64,7 +72,7 @@ GetSystemInfo: jr $t2 li $t1, 0xb4 -## B0 table functions (19) +## B0 table functions (27) .section .text._kernel_malloc .global _kernel_malloc @@ -122,6 +130,14 @@ ResetRCnt: jr $t2 li $t1, 0x06 +.section .text.DeliverEvent +.global DeliverEvent +.type DeliverEvent, @function +DeliverEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x07 + .section .text.OpenEvent .global OpenEvent .type OpenEvent, @function @@ -130,6 +146,30 @@ OpenEvent: jr $t2 li $t1, 0x08 +.section .text.CloseEvent +.global CloseEvent +.type CloseEvent, @function +CloseEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x09 + +.section .text.WaitEvent +.global WaitEvent +.type WaitEvent, @function +WaitEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0a + +.section .text.TestEvent +.global TestEvent +.type TestEvent, @function +TestEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0b + .section .text.EnableEvent .global EnableEvent .type EnableEvent, @function @@ -146,6 +186,30 @@ DisableEvent: jr $t2 li $t1, 0x0d +.section .text.OpenTh +.global OpenTh +.type OpenTh, @function +OpenTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0e + +.section .text.CloseTh +.global CloseTh +.type CloseTh, @function +CloseTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x0f + +.section .text.ChangeTh +.global ChangeTh +.type ChangeTh, @function +ChangeTh: + li $t2, 0xb0 + jr $t2 + li $t1, 0x10 + .section .text.InitPAD .global InitPAD .type InitPAD, @function @@ -194,6 +258,14 @@ SetCustomExitFromException: jr $t2 li $t1, 0x19 +.section .text.UnDeliverEvent +.global UnDeliverEvent +.type UnDeliverEvent, @function +UnDeliverEvent: + li $t2, 0xb0 + jr $t2 + li $t1, 0x20 + .section .text.GetC0Table .global GetC0Table .type GetC0Table, @function @@ -218,7 +290,7 @@ ChangeClearPAD: jr $t2 li $t1, 0x5b -## C0 table functions (3) +## C0 table functions (5) .section .text.SysEnqIntRP .global SysEnqIntRP @@ -236,6 +308,22 @@ SysDeqIntRP: jr $t2 li $t1, 0x03 +.section .text.InstallExceptionHandlers +.global InstallExceptionHandlers +.type InstallExceptionHandlers, @function +InstallExceptionHandlers: + li $t2, 0xc0 + jr $t2 + li $t1, 0x07 + +.section .text.SysInitMemory +.global SysInitMemory +.type SysInitMemory, @function +SysInitMemory: + li $t2, 0xc0 + jr $t2 + li $t1, 0x08 + .section .text.ChangeClearRCnt .global ChangeClearRCnt .type ChangeClearRCnt, @function @@ -244,23 +332,3 @@ ChangeClearRCnt: jr $t2 li $t1, 0x0a -## Syscalls (2) - -.section .text.EnterCriticalSection -.global EnterCriticalSection -.type EnterCriticalSection, @function -EnterCriticalSection: - li $a0, 0x01 - syscall 0 - jr $ra - nop - -.section .text.ExitCriticalSection -.global ExitCriticalSection -.type ExitCriticalSection, @function -ExitCriticalSection: - li $a0, 0x02 - syscall 0 - jr $ra - nop - diff --git a/libpsn00b/psxcd/_cd_control.s b/libpsn00b/psxcd/_cd_control.s index c4153ff..5fa336a 100644 --- a/libpsn00b/psxcd/_cd_control.s +++ b/libpsn00b/psxcd/_cd_control.s @@ -1,6 +1,6 @@ .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .section .text diff --git a/libpsn00b/psxcd/cdgetsector.s b/libpsn00b/psxcd/cdgetsector.s index 9f38e7a..6a29069 100644 --- a/libpsn00b/psxcd/cdgetsector.s +++ b/libpsn00b/psxcd/cdgetsector.s @@ -1,6 +1,6 @@ .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .section .text diff --git a/libpsn00b/psxcd/cdmix.s b/libpsn00b/psxcd/cdmix.s index 745fb65..40cd181 100644 --- a/libpsn00b/psxcd/cdmix.s +++ b/libpsn00b/psxcd/cdmix.s @@ -1,6 +1,6 @@ .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .section .text diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c index d1c1b18..582b8d9 100644 --- a/libpsn00b/psxcd/isofs.c +++ b/libpsn00b/psxcd/isofs.c @@ -1,4 +1,4 @@ -#include <sys/types.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -15,18 +15,18 @@ typedef struct _CdlDIR_INT { - u_long _pos; - u_long _len; - u_char* _dir; + uint32_t _pos; + uint32_t _len; + uint8_t *_dir; } CdlDIR_INT; extern int _cd_media_changed; static int _cd_iso_last_dir_lba; -static u_char _cd_iso_descriptor_buff[2048]; -static u_char* _cd_iso_pathtable_buff=NULL; -static u_char* _cd_iso_directory_buff=NULL; +static uint8_t _cd_iso_descriptor_buff[2048]; +static uint8_t *_cd_iso_pathtable_buff=NULL; +static uint8_t *_cd_iso_directory_buff=NULL; static int _cd_iso_directory_len; static int _cd_iso_error=0; @@ -68,7 +68,7 @@ static int _CdReadIsoDescriptor(int session_offs) // Seek to volume descriptor CdIntToPos(16+session_offs, &loc); - if( !CdControl(CdlSetloc, (u_char*)&loc, 0) ) + if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { #ifdef DEBUG printf("psxcd: Could not set seek destination.\n"); @@ -85,7 +85,7 @@ static int _CdReadIsoDescriptor(int session_offs) printf("psxcd: Read sectors.\n"); #endif // Read volume descriptor - CdRead(1, (u_long*)_cd_iso_descriptor_buff, CdlModeSpeed); + CdRead(1, (uint32_t*)_cd_iso_descriptor_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { @@ -122,7 +122,7 @@ static int _CdReadIsoDescriptor(int session_offs) { free(_cd_iso_pathtable_buff); } - _cd_iso_pathtable_buff = (u_char*)malloc(i); + _cd_iso_pathtable_buff = (uint8_t*)malloc(i); #ifdef DEBUG printf("psxcd_dbg: Allocated %d bytes for path table.\n", i); @@ -130,8 +130,8 @@ static int _CdReadIsoDescriptor(int session_offs) // Read path table CdIntToPos(descriptor->pathTable1Offs, &loc); - CdControl(CdlSetloc, (u_char*)&loc, 0); - CdRead(i>>11, (u_long*)_cd_iso_pathtable_buff, CdlModeSpeed); + CdControl(CdlSetloc, (uint8_t*)&loc, 0); + CdRead(i>>11, (uint32_t*)_cd_iso_pathtable_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { #ifdef DEBUG @@ -165,7 +165,7 @@ static int _CdReadIsoDirectory(int lba) #ifdef DEBUG printf("psxcd_dbg: Seek to sector %d\n", i); #endif - if( !CdControl(CdlSetloc, (u_char*)&loc, 0) ) + if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { #ifdef DEBUG printf("psxcd: Could not set seek destination.\n"); @@ -180,8 +180,8 @@ static int _CdReadIsoDirectory(int lba) } // Read first sector of directory record - _cd_iso_directory_buff = (u_char*)malloc(2048); - CdRead(1, (u_long*)_cd_iso_directory_buff, CdlModeSpeed); + _cd_iso_directory_buff = (uint8_t*)malloc(2048); + CdRead(1, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { #ifdef DEBUG @@ -201,7 +201,7 @@ static int _CdReadIsoDirectory(int lba) if( _cd_iso_directory_len > 2048 ) { - if( !CdControl(CdlSetloc, (u_char*)&loc, 0) ) + if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { #ifdef DEBUG printf("psxcd: Could not set seek destination.\n"); @@ -212,12 +212,12 @@ static int _CdReadIsoDirectory(int lba) free(_cd_iso_directory_buff); i = ((2047+_cd_iso_directory_len)>>11)<<11; - _cd_iso_directory_buff = (u_char*)malloc(i); + _cd_iso_directory_buff = (uint8_t*)malloc(i); #ifdef DEBUG printf("psxcd_dbg: Allocated %d bytes for directory record.\n", i); #endif - CdRead(i>>11, (u_long*)_cd_iso_directory_buff, CdlModeSpeed); + CdRead(i>>11, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { #ifdef DEBUG @@ -279,7 +279,7 @@ static void dump_directory(void) static void dump_pathtable(void) { - u_char *tbl_pos; + uint8_t *tbl_pos; ISO_PATHTABLE_ENTRY *tbl_entry; ISO_DESCRIPTOR *descriptor; char namebuff[16]; @@ -314,7 +314,7 @@ static void dump_pathtable(void) static int get_pathtable_entry(int entry, ISO_PATHTABLE_ENTRY *tbl, char *namebuff) { int i; - u_char *tbl_pos; + uint8_t *tbl_pos; ISO_PATHTABLE_ENTRY *tbl_entry; ISO_DESCRIPTOR *descriptor; diff --git a/libpsn00b/psxcd/psxcd.c b/libpsn00b/psxcd/psxcd.c index 8f19c8d..6340638 100644 --- a/libpsn00b/psxcd/psxcd.c +++ b/libpsn00b/psxcd/psxcd.c @@ -1,4 +1,4 @@ -#include <sys/types.h> +#include <stdint.h> #include <stdio.h> #include <psxgpu.h> #include "psxcd.h" @@ -6,16 +6,16 @@ #define READ_TIMEOUT 600 // 10 seconds for NTSC extern volatile char _cd_ack_wait; -extern volatile u_char _cd_last_int; -extern volatile u_char _cd_last_mode; -extern volatile u_char _cd_status; +extern volatile uint8_t _cd_last_int; +extern volatile uint8_t _cd_last_mode; +extern volatile uint8_t _cd_status; extern volatile CdlCB _cd_callback_int1_data; -volatile u_char *_cd_result_ptr; +volatile uint8_t *_cd_result_ptr; // For read retry volatile CdlLOC _cd_last_setloc; -volatile u_long *_cd_last_read_addr; +volatile uint32_t *_cd_last_read_addr; volatile int _cd_last_sector_count; int _cd_media_changed; @@ -169,7 +169,7 @@ int CdSync(int mode, unsigned char *result) int CdGetToc(CdlLOC *toc) { - u_char track_info[8]; + uint8_t track_info[8]; int i,tracks; // Get number of tracks @@ -189,7 +189,7 @@ int CdGetToc(CdlLOC *toc) for(i=0; i<tracks; i++) { int t = itob(1+i); - if( !CdControl(CdlGetTD, (u_char*)&t, (u_char*)&toc[i]) ) + if( !CdControl(CdlGetTD, (uint8_t*)&t, (uint8_t*)&toc[i]) ) { return 0; } @@ -234,11 +234,11 @@ int CdMode(void) // CD data read routines volatile int _cd_sector_count = 0; -volatile u_long *_cd_read_addr; -volatile u_char _cd_read_result[8]; -volatile u_long _cd_read_oldcb; -volatile u_long _cd_read_sector_sz; -volatile u_long _cd_read_counter; +volatile uint32_t *_cd_read_addr; +volatile uint8_t _cd_read_result[8]; +volatile uint32_t _cd_read_oldcb; +volatile uint32_t _cd_read_sector_sz; +volatile uint32_t _cd_read_counter; @@ -279,7 +279,7 @@ static void _CdReadReadyCallback(int status, unsigned char *result) } } -int CdRead(int sectors, u_long *buf, int mode) +int CdRead(int sectors, uint32_t *buf, int mode) { // Set sectors to read count _cd_sector_count = sectors; @@ -307,10 +307,10 @@ int CdRead(int sectors, u_long *buf, int mode) _cd_read_oldcb = CdReadyCallback(_CdReadReadyCallback); // Set specified mode - CdControl(CdlSetmode, (u_char*)&mode, 0); + CdControl(CdlSetmode, (uint8_t*)&mode, 0); // Begin reading sectors - CdControl(CdlReadN, 0, (u_char*)_cd_read_result); + CdControl(CdlReadN, 0, (uint8_t*)_cd_read_result); return 0; } @@ -336,10 +336,10 @@ static void CdDoRetry() // Retry read CdControl(CdlSetloc, (void*)&_cd_last_setloc, 0); - CdControl(CdlReadN, 0, (u_char*)_cd_read_result); + CdControl(CdlReadN, 0, (uint8_t*)_cd_read_result); } -int CdReadSync(int mode, u_char *result) +int CdReadSync(int mode, uint8_t *result) { if( (VSync(-1)-_cd_read_counter) > READ_TIMEOUT ) { @@ -371,7 +371,7 @@ int CdReadSync(int mode, u_char *result) return 0; } -u_long CdReadCallback(CdlCB func) +uint32_t CdReadCallback(CdlCB func) { unsigned int old_func; diff --git a/libpsn00b/psxcd/psxcd_asm.s b/libpsn00b/psxcd/psxcd_asm.s index 129bc89..906ab32 100644 --- a/libpsn00b/psxcd/psxcd_asm.s +++ b/libpsn00b/psxcd/psxcd_asm.s @@ -1,6 +1,6 @@ .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .section .text diff --git a/libpsn00b/psxetc/_dl_resolve_wrapper.s b/libpsn00b/psxetc/_dl_resolve_wrapper.s index 01ebf3a..eedfa10 100644 --- a/libpsn00b/psxetc/_dl_resolve_wrapper.s +++ b/libpsn00b/psxetc/_dl_resolve_wrapper.s @@ -10,11 +10,11 @@ # - $t8 = index of the function in the .dynsym symbol table # - $t9 = _dl_resolve_wrapper itself's address -.set noreorder -.section .text +.set noreorder -.global _dl_resolve_wrapper -.type _dl_resolve_wrapper, @function +.section .text._dl_resolve_wrapper +.global _dl_resolve_wrapper +.type _dl_resolve_wrapper, @function _dl_resolve_wrapper: # Push the registers we're going to use onto the stack. addiu $sp, -16 @@ -47,10 +47,3 @@ _dl_resolve_wrapper: jr $t0 nop - -.section .data - -.global _dl_credits -.type _dl_credits, @object -_dl_credits: - .asciiz "psxetc runtime dynamic linker by spicyjpeg\n" diff --git a/libpsn00b/psxetc/dmacallback.s b/libpsn00b/psxetc/dmacallback.s deleted file mode 100644 index 8ea8ec0..0000000 --- a/libpsn00b/psxetc/dmacallback.s +++ /dev/null @@ -1,191 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -.global DMACallback -.type DMACallback, @function -DMACallback: - - # a0 - DMA channel - # a1 - Callback function - - addiu $sp, -8 - sw $ra, 0($sp) - - beqz $a1, .Lremove_cb # Remove callback if function is NULL - nop - - addiu $sp, -8 # Install IRQ handler for DMA handler - sw $a0, 0($sp) # if not set installed yet - sw $a1, 4($sp) - - jal GetInterruptCallback - li $a0, 3 - - bnez $v0, .Lskip_install - nop - - la $a1, _dma_handler - jal InterruptCallback - li $a0, 3 - -.Lskip_install: - - lw $a0, 0($sp) - lw $a1, 4($sp) - addiu $sp, 8 - - la $v0, _dma_func_table - sll $v1, $a0, 2 - addu $v0, $v1 - lw $v1, 0($v0) - sw $a1, 0($v0) - sw $v1, 4($sp) - - lui $a2, IOBASE - - lw $v0, DMA_DICR($a2) # Enable DMA interrupt - lui $v1, 0x1 - sll $v1, $a0 - or $v0, $v1 - lui $v1, 0x80 - or $v0, $v1 - sw $v0, DMA_DICR($a2) - - b .Lskip_remove - nop - -.Lremove_cb: - - la $v0, _dma_func_table # Set callback address - sll $v1, $a0, 2 - addu $v0, $v1 - lw $v1, 0($v0) - sw $a1, 0($v0) - sw $v1, 4($sp) - - lui $a2, IOBASE # Disable DMA interrupt - lw $v0, DMA_DICR($a2) - lui $v1, 0x1 - sll $v1, $a0 - .set noat - addiu $at, $0, -1 - xor $v1, $at - and $v0, $v1 - lui $v1, 0x7f00 - xor $v1, $at - and $v0, $v1 - .set at - sw $v0, DMA_DICR($a2) - - jal _dma_has_cb # Check if callbacks are present - nop - bnez $v0, .Lskip_remove - nop - sw $0 , DMA_DICR($a2) - - jal GetInterruptCallback # Check if callback is the DMA handler - li $a0, 3 - la $v1, _dma_handler - bne $v0, $v1, .Lskip_remove - nop - - li $a0, 3 - jal InterruptCallback - move $a1, $0 - -.Lskip_remove: - - lw $ra, 0($sp) - lw $v0, 4($sp) - jr $ra - addiu $sp, 8 - - -.type _dma_has_cb, @function -_dma_has_cb: - - la $v1, _dma_func_table - li $t0, 6 - -.Lscan_loop: - - lw $v0, 0($v1) - addiu $v1, 4 - bnez $v0, .Lhas_cb - nop - - bgtz $t0, .Lscan_loop - addiu $t0, -1 - - jr $ra - move $v0, $0 - -.Lhas_cb: - - jr $ra - li $v0, 1 - - -.type _dma_handler, @function -_dma_handler: - - addiu $sp, -12 - sw $ra, 0($sp) - sw $s0, 4($sp) - sw $s1, 8($sp) - - move $s0, $0 - la $s1, _dma_func_table - -.Lhandler_loop: - - lui $a0, IOBASE - lw $v0, DMA_DICR($a0) - li $v1, 24 - addu $v1, $s0 - srl $v0, $v1 - andi $v0, 0x1 - - lw $v1, 0($s1) - - beqz $v0, .Lno_irq - addiu $s1, 4 - - beqz $v1, .Lno_irq - nop - - jalr $v1 - nop - -.Lno_irq: - - blt $s0, 6, .Lhandler_loop - addi $s0, 1 - - lui $a0, IOBASE - lw $v0, DMA_DICR($a0) - nop - sw $v0, DMA_DICR($a0) - - lw $ra, 0($sp) - lw $s0, 4($sp) - lw $s1, 8($sp) - - jr $ra - addiu $sp, -12 - - -.section .data - -_dma_func_table: - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 -
\ No newline at end of file diff --git a/libpsn00b/psxetc/getinterruptcallback.s b/libpsn00b/psxetc/getinterruptcallback.s deleted file mode 100644 index 510447f..0000000 --- a/libpsn00b/psxetc/getinterruptcallback.s +++ /dev/null @@ -1,19 +0,0 @@ -.set noreorder - -.section .text - -.global GetInterruptCallback -.type GetInterruptCallback, @function -GetInterruptCallback: - - # a0 - Interrupt number - - la $a1, _irq_func_table - sll $a0, 2 - addu $a1, $a0 - lw $v0, 0($a1) - nop - jr $ra - nop - -
\ No newline at end of file diff --git a/libpsn00b/psxetc/interruptcallback.s b/libpsn00b/psxetc/interruptcallback.s deleted file mode 100644 index 78e5e6e..0000000 --- a/libpsn00b/psxetc/interruptcallback.s +++ /dev/null @@ -1,48 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -.global InterruptCallback -.type InterruptCallback, @function -InterruptCallback: - - # a0 - Interrupt number - # a1 - Callback function - - lui $a2, IOBASE - - beqz $a1, .Ldisable_irq - nop - - lw $v0, IRQ_MASK($a2) # Enable interrupt mask - li $v1, 1 - sll $v1, $a0 - or $v0, $v1 - - b .Lcont - sw $v0, IRQ_MASK($a2) - -.Ldisable_irq: - -.set noat - lw $v0, IRQ_MASK($a2) # Disable interrupt mask - li $v1, 1 - sll $v1, $a0 - addiu $at, $0 , -1 - xor $v1, $at -.set at - and $v0, $v1 - sw $v0, IRQ_MASK($a2) - -.Lcont: - - la $a2, _irq_func_table # Get address to IRQ function table - - sll $v1, $a0, 2 # Compute the slot - addu $v1, $a2, $v1 - lw $v0, 0($v1) # Get old handler address - - jr $ra # Return and set new IRQ handler - sw $a1, 0($v1) diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c new file mode 100644 index 0000000..859209a --- /dev/null +++ b/libpsn00b/psxetc/interrupts.c @@ -0,0 +1,225 @@ +/* + * PSn00bSDK interrupt management library + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <psxapi.h> +#include <psxetc.h> +#include <hwregs_c.h> + +#define NUM_IRQ_CHANNELS 11 +#define NUM_DMA_CHANNELS 7 +#define ISR_STACK_SIZE 0x1000 + +/* Internal globals */ + +static void (*_irq_handlers[NUM_IRQ_CHANNELS])(void); +static void (*_dma_handlers[NUM_DMA_CHANNELS])(void); +static int _num_dma_handlers = 0; + +static uint16_t _saved_irq_mask; +static uint32_t _saved_dma_dpcr, _saved_dma_dicr; +static int _isr_installed = 0; + +/* Custom ISR jmp_buf */ + +// The ISR and all functions called by it (thus, all callbacks registered using +// InterruptCallback() and DMACallback()) use an independent stack, isolated +// from the main thread's stack. As the size of this stack is limited, custom +// callbacks shall keep the number of nested subroutine calls to a minimum and +// avoid allocating large buffers (e.g. for receiving a sector from the CD +// drive) on the stack. +static uint8_t _isr_stack[ISR_STACK_SIZE]; + +extern uint8_t _gp[]; +static void _global_isr(void); + +static const struct JMP_BUF _isr_jmp_buf = { + .ra = (uint32_t) &_global_isr, + .sp = (uint32_t) &_isr_stack[ISR_STACK_SIZE], + .fp = 0, + .s0 = 0, + .s1 = 0, + .s2 = 0, + .s3 = 0, + .s4 = 0, + .s5 = 0, + .s6 = 0, + .s7 = 0, + .gp = (uint32_t) _gp +}; + +/* Internal IRQ and DMA handlers */ + +static void _global_isr(void) { + uint16_t stat = IRQ_STAT, mask = IRQ_MASK; + + // Clear all IRQ flags in one shot. This is not the "proper" way to do it + // but it's much faster than clearing one flag at a time. + IRQ_STAT = ~mask; + + //for (int i = 0; i < NUM_IRQ_CHANNELS; i++) { + for (int i = 0; stat; i++, stat >>= 1) { + if (!(stat & 1)) + continue; + + if (_irq_handlers[i]) + _irq_handlers[i](); + } + + ReturnFromException(); +} + +static void _global_dma_handler(void) { + uint32_t stat = DMA_DICR; + + // Clear all DMA IRQ flags in one shot (note that flags are cleared by + // writing 1 to them rather than 0). + stat &= 0x7fff0000; + DMA_DICR = stat; + stat >>= 24; + + //for (int i = 0; i < NUM_DMA_CHANNELS; i++) { + for (int i = 0; stat; i++, stat >>= 1) { + if (!(stat & 1)) + continue; + + if (_dma_handlers[i]) + _dma_handlers[i](); + } +} + +/* Callback registration API */ + +void *InterruptCallback(int irq, void (*func)(void)) { + if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) + return 0; + + void *old_callback = _irq_handlers[irq]; + _irq_handlers[irq] = func; + + // Enable or disable the IRQ in the IRQ_MASK register depending on whether + // the callback is being registered or removed. + if (func) + IRQ_MASK |= 1 << irq; + else + IRQ_MASK &= ~(1 << irq); + + return old_callback; +} + +void *GetInterruptCallback(int irq) { + if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) + return 0; + + return _irq_handlers[irq]; +} + +void *DMACallback(int dma, void (*func)(void)) { + if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) + return 0; + + void *old_callback = _dma_handlers[dma]; + _dma_handlers[dma] = func; + + // Enable or disable the IRQ in the DMA_DICR register depending on whether + // the callback is being registered or removed. The main DMA IRQ dispatcher + // is also registered if this is the first DMA callback being configured, + // or disabled if it's the last one being removed. + if (func) { + DMA_DICR |= (0x10000 << dma) | (1 << 23); + + if (!(_num_dma_handlers++)) + InterruptCallback(3, &_global_dma_handler); + } else { + if (--_num_dma_handlers) { + DMA_DICR &= ~(0x10000 << dma); + } else { + DMA_DICR = 0; + InterruptCallback(3, 0); + } + } + + return old_callback; +} + +void *GetDMACallback(int dma) { + if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) + return 0; + + return _dma_handlers[dma]; +} + +/* Hook installation/removal API */ + +int ResetCallback(void) { + if (_isr_installed) + return -1; + + EnterCriticalSection(); + _saved_irq_mask = 1 << 3; // Enable DMA IRQ by default + _saved_dma_dpcr = 0x03333333; + _saved_dma_dicr = 0; + + for (int i = 0; i < NUM_IRQ_CHANNELS; i++) + _irq_handlers[i] = (void *) 0; + for (int i = 0; i < NUM_DMA_CHANNELS; i++) + _dma_handlers[i] = (void *) 0; + + // Set up the DMA IRQ handler. This handler shall *not* be overridden using + // InterruptCallback(). + _irq_handlers[3] = &_global_dma_handler; + + _96_remove(); + RestartCallback(); + return 0; +} + +void RestartCallback(void) { + if (_isr_installed) + return; + + IRQ_STAT = 0; + IRQ_MASK = _saved_irq_mask; + DMA_DPCR = _saved_dma_dpcr; + DMA_DICR = _saved_dma_dicr; + + // Install the ISR hook and prevent the kernel's internal handlers from + // automatically acknowledging SPI and timer IRQs. + SetCustomExitFromException(&_isr_jmp_buf); + ChangeClearPAD(0); + ChangeClearRCnt(0, 0); + ChangeClearRCnt(1, 0); + ChangeClearRCnt(2, 0); + ChangeClearRCnt(3, 0); + + _isr_installed = 1; + ExitCriticalSection(); +} + +void StopCallback(void) { + if (!_isr_installed) + return; + + // Save the state of IRQ and DMA registers, then reset them and undo the + // changes that were made to the kernel's state. + EnterCriticalSection(); + _saved_irq_mask = IRQ_MASK; + _saved_dma_dpcr = DMA_DPCR; + _saved_dma_dicr = DMA_DICR; + + IRQ_STAT = 0; + IRQ_MASK = 0; + DMA_DPCR = _saved_dma_dpcr & 0x07777777; + DMA_DICR = 0; + + SetDefaultExitFromException(); + ChangeClearPAD(1); + ChangeClearRCnt(0, 1); + ChangeClearRCnt(1, 1); + ChangeClearRCnt(2, 1); + ChangeClearRCnt(3, 1); + + _isr_installed = 0; +} diff --git a/libpsn00b/psxetc/isr.s b/libpsn00b/psxetc/isr.s deleted file mode 100644 index 440be50..0000000 --- a/libpsn00b/psxetc/isr.s +++ /dev/null @@ -1,107 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.set ISR_STACK_SIZE, 4096 - - -.section .text - -# Global ISR handler of PSn00bSDK - -.set at - -.type _global_isr, @function -_global_isr: - -.Lisr_loop: - - #la $gp, _gp # Keep restoring GP since it gets - # changed elsewhere sometimes - - lui $a0, IOBASE # Get IRQ status - lw $v0, IRQ_MASK($a0) - nop - - srl $v0, $s1 # Check IRQ mask bit if set - andi $v0, 0x1 - - beqz $v0, .Lno_irq # Don't execute callback if IRQ not enabled - nop - - lw $v0, IRQ_STAT($a0) - nop - srl $v0, $s1 # Check IRQ status bit if set - andi $v0, 0x1 - beqz $v0, .Lno_irq # Don't execute callback if no IRQ - nop - - lw $v1, 0($s0) # Load IRQ callback function - nop - - lw $v0, IRQ_STAT($a0) # Acknowledge the IRQ (by writing a 0 bit) - li $a1, 1 - sll $a1, $s1 - addiu $a2, $0 , -1 - xor $a1, $a2 - sw $a1, IRQ_STAT($a0) - - beqz $v1, .Lno_irq # Don't execute if callback is not set - nop - - jalr $v1 # Call interrupt handler - nop - -.Lno_irq: - - addiu $s0, 4 - - blt $s1, 11, .Lisr_loop - addiu $s1, 1 - - j ReturnFromException - nop - - -.section .data - -# Global ISR callback table - -.global _irq_func_table -.type _irq_func_table, @object -_irq_func_table: - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - -# Global ISR hook structure -.global _custom_exit -.type _custom_exit, @object -_custom_exit: - .word _global_isr # pc - .word _isr_stack+ISR_STACK_SIZE # sp - .word 0 # fp - .word _irq_func_table # s0 - .word 0 # s1 - .word 0 # s2 - .word 0 # s3 - .word 0 # s4 - .word 0 # s5 - .word 0 # s6 - .word 0 # s7 - .word _gp # gp - -# Global ISR stack -# .fill 1024 -#_custom_exit_stack: -# .fill 4 -.comm _isr_stack, ISR_STACK_SIZE+4 diff --git a/libpsn00b/psxetc/restartcallback.s b/libpsn00b/psxetc/restartcallback.s deleted file mode 100644 index 036a5a0..0000000 --- a/libpsn00b/psxetc/restartcallback.s +++ /dev/null @@ -1,54 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -.global RestartCallback -.type RestartCallback, @function -RestartCallback: - - addiu $sp, -4 - sw $ra, 0($sp) - - la $a0, _custom_exit - jal SetCustomExitFromException - addiu $sp, -4 - addiu $sp, 4 - - move $a0, $0 - jal ChangeClearPAD - addiu $sp, -4 - addiu $sp, 4 - - li $a0, 3 - move $a1, $0 - jal ChangeClearRCnt - addiu $sp, -8 - addiu $sp, 8 - - la $a0, _irq_func_table - move $a1, $0 - move $v0, $0 - -.Lcheck_cbs: # Set up the interrupt masks - lw $v1, 0($a0) - nop - beqz $v1, .Lno_cb - addiu $a0, 4 - li $v1, 1 - sll $v1, $a1 - or $v0, $v1 -.Lno_cb: - blt $a1, 10, .Lcheck_cbs - addiu $a1, 1 - - lui $a0, IOBASE - sw $0 , IRQ_STAT($a0) - sw $v0, IRQ_MASK($a0) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop -
\ No newline at end of file diff --git a/libpsn00b/psxgpu/addprim.s b/libpsn00b/psxgpu/addprim.s deleted file mode 100644 index 1b66274..0000000 --- a/libpsn00b/psxgpu/addprim.s +++ /dev/null @@ -1,26 +0,0 @@ -.set noreorder -.set noat - -.section .text - - -.global AddPrim -.type AddPrim, @function -AddPrim: - - lw $v0, 0($a0) # Load OT entry - lw $v1, 0($a1) # Set packet length value (in words) - lui $at, 0x00ff - or $at, 0xffff - and $v0, $at # Mask off the upper 8 bits of OT entry - or $v1, $v0 # OR values together - sw $v1, 0($a1) # Store new address to primitive tag - lw $v0, 0($a0) # Load OT entry - and $a1, $at # Mask off the upper 8 bits of primitive tag - lui $at, 0xff00 - and $v0, $at # Mask off the first 24 bits of OT entry - or $v0, $a1 # OR values together - - jr $ra - sw $v0, 0($a0) # Store result to OT - diff --git a/libpsn00b/psxgpu/clearotagr.s b/libpsn00b/psxgpu/clearotagr.s deleted file mode 100644 index 562cad4..0000000 --- a/libpsn00b/psxgpu/clearotagr.s +++ /dev/null @@ -1,21 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global ClearOTagR -.type ClearOTagR, @function -ClearOTagR: - lui $a2, IOBASE - addi $v0, $a1, -1 - sll $v0, 2 - addu $a0, $v0 - sw $a0, DMA6_MADR($a2) - andi $a1, 0xffff - sw $a1, DMA6_BCR($a2) - lui $v0, 0x1100 - addiu $v0, 2 - jr $ra - sw $v0, DMA6_CHCR($a2) diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c new file mode 100644 index 0000000..cef1508 --- /dev/null +++ b/libpsn00b/psxgpu/common.c @@ -0,0 +1,299 @@ +/* + * PSn00bSDK GPU library (common functions) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <stdio.h> +#include <psxetc.h> +#include <psxapi.h> +#include <psxgpu.h> +#include <hwregs_c.h> + +#define QUEUE_LENGTH 16 +#define DMA_CHUNK_LENGTH 8 +#define VSYNC_TIMEOUT 0x100000 + +static void _default_vsync_halt(void); + +/* Internal globals */ + +GPU_VideoMode _gpu_video_mode; + +static void (*_vsync_halt_func)(void) = &_default_vsync_halt; +static void (*_vsync_callback)(void) = (void *) 0; +static void (*_drawsync_callback)(void) = (void *) 0; + +static const uint32_t *volatile _draw_queue[QUEUE_LENGTH]; +static volatile uint8_t _queue_head, _queue_tail, _queue_length; +static volatile uint32_t _vblank_counter; +static volatile uint16_t _last_hblank; + +/* Interrupt handlers */ + +static void _vblank_handler(void) { + _vblank_counter++; + + if (_vsync_callback) + _vsync_callback(); +} + +static void _gpu_dma_handler(void) { + //while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + if (_queue_length) { + DrawOTag2(_draw_queue[_queue_head++]); + + _queue_length--; + _queue_head %= QUEUE_LENGTH; + } else { + GPU_GP1 = 0x04000000; // Disable DMA request + + if (_drawsync_callback) + _drawsync_callback(); + } +} + +/* GPU reset and system initialization */ + +void ResetGraph(int mode) { + // Perform some basic system initialization when ResetGraph() is called for + // the first time. + if (!ResetCallback()) { + EnterCriticalSection(); + InterruptCallback(0, &_vblank_handler); + DMACallback(2, &_gpu_dma_handler); + + _gpu_video_mode = (GPU_GP1 >> 20) & 1; + ExitCriticalSection(); + + printf("psxgpu: setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC"); + } + + if (mode == 3) { + GPU_GP1 = 0x01000000; // Reset command buffer + return; + } + + DMA_DPCR |= 0x0b000b00; // Enable DMA2 and DMA6 + DMA_CHCR(2) = 0x00000201; // Stop DMA2 + DMA_CHCR(6) = 0x00000200; // Stop DMA6 + + if (mode == 1) { + GPU_GP1 = 0x01000000; // Reset command buffer + return; + } + + GPU_GP1 = 0x00000000; // Reset GPU + TIMER_CTRL(0) = 0x0500; + TIMER_CTRL(1) = 0x0500; + + _queue_head = 0; + _queue_tail = 0; + _queue_length = 0; + _vblank_counter = 0; + _last_hblank = 0; +} + +/* Syncing API */ + +// TODO: add support for no$psx's "halt" register +static void _default_vsync_halt(void) { + int counter = _vblank_counter; + + for (int i = VSYNC_TIMEOUT; i; i--) { + if (counter != _vblank_counter) + return; + } + + printf("psxgpu: VSync() timeout\n"); + ChangeClearPAD(0); + ChangeClearRCnt(3, 0); +} + +int VSync(int mode) { + uint16_t delta = (TIMER_VALUE(1) - _last_hblank) & 0xffff; + if (mode == 1) + return delta; + if (mode < 0) + return _vblank_counter; + + uint32_t status = GPU_GP1; + + // Wait for at least one vertical blank event to occur. + do { + _vsync_halt_func(); + + // If interlaced mode is enabled, wait until the GPU starts displaying + // the next field. + if (status & (1 << 22)) { + while (!((GPU_GP1 ^ status) & (1 << 31))) + __asm__ volatile(""); + } + } while ((--mode) > 0); + + _last_hblank = TIMER_VALUE(1); + return delta; +} + +int DrawSync(int mode) { + if (mode) + return (DMA_BCR(2) >> 16); + + // Wait for the queue to become empty. + // TODO: add a timeout + while (_queue_length) + __asm__ volatile(""); + + // Wait for any DMA transfer to finish if DMA is enabled. + if (GPU_GP1 & (3 << 29)) { + while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24))) + __asm__ volatile(""); + } + + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + return 0; +} + +void *VSyncHaltFunction(void (*func)(void)) { + void *old_callback = _vsync_halt_func; + _vsync_halt_func = func; + + return old_callback; +} + +void *VSyncCallback(void (*func)(void)) { + EnterCriticalSection(); + + void *old_callback = _vsync_callback; + _vsync_callback = func; + + ExitCriticalSection(); + return old_callback; +} + +void *DrawSyncCallback(void (*func)(void)) { + EnterCriticalSection(); + + void *old_callback = _drawsync_callback; + _drawsync_callback = func; + + ExitCriticalSection(); + return old_callback; +} + +/* OT and primitive drawing API */ + +void ClearOTagR(uint32_t *ot, size_t length) { + DMA_MADR(6) = (uint32_t) &ot[length - 1]; + DMA_BCR(6) = length & 0xffff; + DMA_CHCR(6) = 0x11000002; + + //while (DMA_CHCR(6) & (1 << 24)) + //__asm__ volatile(""); +} + +void ClearOTag(uint32_t *ot, size_t length) { + // DMA6 only supports writing to RAM in reverse order (last to first), so + // the OT has to be cleared in software here. This function is thus much + // slower than ClearOTagR(). + // https://problemkaputt.de/psx-spx.htm#dmachannels + for (int i = 0; i < (length - 1); i++) + ot[i] = (uint32_t) &ot[i + 1] & 0x00ffffff; + + ot[length - 1] = 0x00ffffff; +} + +void DrawOTag(const uint32_t *ot) { + // If GPU DMA is currently busy, append the OT to the queue instead of + // drawing it immediately. Note that interrupts must be disabled *prior* to + // checking if DMA is busy; disabling them afterwards would create a race + // condition where the DMA transfer could end while interrupts are being + // disabled. Interrupts are disabled through the IRQ_MASK register rather + // than by calling EnterCriticalSection() for performance reasons. + uint16_t mask = IRQ_MASK; + IRQ_MASK = 0; + + if (DMA_CHCR(2) & (1 << 24)) { + if (_queue_length < QUEUE_LENGTH) { + _draw_queue[_queue_tail++] = ot; + + _queue_length++; + _queue_tail %= QUEUE_LENGTH; + + IRQ_MASK = mask; + return; + } + + IRQ_MASK = mask; + printf("psxgpu: DrawOTag() failed, draw queue full\n"); + return; + } + + IRQ_MASK = mask; + DrawOTag2(ot); +} + +void DrawOTag2(const uint32_t *ot) { + GPU_GP1 = 0x04000002; + + while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) + __asm__ volatile(""); + + DMA_MADR(2) = (uint32_t) ot; + DMA_BCR(2) = 0; + DMA_CHCR(2) = 0x01000401; +} + +void DrawPrim(const uint32_t *pri) { + size_t length = getlen(pri); + + DrawSync(0); + GPU_GP1 = 0x04000002; + + // NOTE: if length >= DMA_CHUNK_LENGTH then it also has to be a multiple of + // DMA_CHUNK_LENGTH, otherwise the DMA channel will get stuck waiting for + // more data indefinitely. + DMA_MADR(2) = (uint32_t) &pri[1]; + if (length < DMA_CHUNK_LENGTH) + DMA_BCR(2) = 0x00010000 | length; + else + DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + + DMA_CHCR(2) = 0x01000201; +} + +void AddPrim(uint32_t *ot, const void *pri) { + addPrim(ot, pri); +} + +/* Misc. functions */ + +GPU_VideoMode GetVideoMode(void) { + return _gpu_video_mode; +} + +void SetVideoMode(GPU_VideoMode mode) { + uint32_t _mode, stat = GPU_GP1; + + _gpu_video_mode = mode & 1; + + _mode = (mode & 1) << 3; + _mode |= (stat >> 17) & 0x37; // GPUSTAT 17-22 -> cmd bits 0-5 + _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> cmd bit 6 + _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> cmd bit 7 + + GPU_GP1 = 0x08000000 | mode; +} + +int GetODE(void) { + return (GPU_GP1 >> 31); +} + +void SetDispMask(int mask) { + GPU_GP1 = 0x03000000 | (mask ? 0 : 1); +} diff --git a/libpsn00b/psxgpu/dbugfont.c b/libpsn00b/psxgpu/dbugfont.c deleted file mode 100644 index ff21d84..0000000 --- a/libpsn00b/psxgpu/dbugfont.c +++ /dev/null @@ -1,144 +0,0 @@ -unsigned int dbugfont_size=2112; -unsigned char dbugfont[] = { -0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x00,0x00,0xe0, -0x01,0x10,0x00,0x01,0x00,0x00,0x00,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0c,0x08,0x00,0x00,0x80,0x02,0x00,0x00, -0x20,0x00,0x20,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x01,0x01, -0x00,0x10,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x11, -0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x11,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x01, -0x00,0x00,0x01,0x01,0x00,0x01,0x01,0x01,0x00,0x10,0x00,0x10,0x00,0x10,0x00, -0x01,0x00,0x00,0x11,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x01,0x00, -0x00,0x00,0x00,0x00,0x00,0x10,0x11,0x11,0x00,0x01,0x01,0x00,0x00,0x11,0x01, -0x01,0x00,0x10,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x10,0x00,0x00,0x00,0x00, -0x10,0x00,0x00,0x01,0x01,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x10,0x00,0x11, -0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x10,0x11, -0x00,0x00,0x10,0x10,0x00,0x00,0x01,0x01,0x01,0x00,0x00,0x00,0x00,0x00,0x10, -0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x11,0x00,0x00,0x11,0x11,0x01,0x00, -0x00,0x00,0x00,0x00,0x11,0x11,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00, -0x00,0x10,0x10,0x10,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x11, -0x11,0x00,0x00,0x01,0x01,0x00,0x00,0x01,0x01,0x00,0x01,0x10,0x01,0x00,0x00, -0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x01,0x01,0x01,0x00, -0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x01,0x00,0x00,0x10,0x01,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x01,0x01,0x00,0x01,0x01,0x01,0x00,0x10,0x10,0x11,0x00,0x01, -0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x11,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x00,0x01, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x10,0x11,0x00,0x00,0x01, -0x00,0x01,0x00,0x10,0x01,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00, -0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x11, -0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x11,0x01,0x00,0x00, -0x11,0x01,0x00,0x00,0x10,0x01,0x00,0x10,0x11,0x11,0x00,0x00,0x10,0x01,0x00, -0x10,0x11,0x11,0x00,0x00,0x11,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x10,0x00, -0x00,0x00,0x00,0x11,0x01,0x00,0x00,0x11,0x11,0x00,0x00,0x11,0x00,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x01,0x01,0x00,0x10,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x00,0x11,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x01,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x01,0x10, -0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x01,0x00, -0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x00,0x11,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x01, -0x00,0x00,0x10,0x11,0x11,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x10, -0x10,0x01,0x01,0x00,0x10,0x00,0x00,0x00,0x10,0x01,0x00,0x00,0x10,0x01,0x00, -0x10,0x11,0x11,0x00,0x10,0x11,0x01,0x00,0x10,0x11,0x01,0x00,0x00,0x00,0x01, -0x00,0x00,0x11,0x01,0x00,0x00,0x11,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x01,0x00,0x10,0x10,0x01,0x01,0x00,0x10,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x00,0x10,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x10,0x11,0x11,0x00,0x00, -0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x10,0x11,0x00,0x00,0x10,0x00,0x00, -0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x00,0x00, -0x01,0x00,0x00,0x11,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x10,0x11,0x11,0x00,0x10,0x11,0x11,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x01, -0x00,0x10,0x11,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0x10,0x00,0x00,0x00,0x11, -0x01,0x00,0x00,0x11,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x00, -0x00,0x11,0x11,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10, -0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x11,0x01, -0x00,0x00,0x11,0x01,0x00,0x10,0x11,0x01,0x00,0x10,0x11,0x11,0x00,0x10,0x11, -0x11,0x00,0x00,0x11,0x11,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x11,0x00,0x00, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x00,0x11,0x01,0x00,0x10,0x11,0x01,0x00,0x00,0x01,0x01, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00, -0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x00, -0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x00, -0x10,0x01,0x11,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10, -0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x01,0x00, -0x10,0x00,0x00,0x00,0x10,0x10,0x10,0x00,0x10,0x01,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x01,0x00,0x10,0x00, -0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x00,0x00,0x10,0x11,0x00,0x00,0x10, -0x10,0x11,0x00,0x10,0x11,0x11,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x10,0x00, -0x10,0x11,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x10,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x11,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x10, -0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00,0x00, -0x10,0x00,0x10,0x00,0x10,0x00,0x01,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x11,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x01,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00, -0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00, -0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x01,0x00,0x00,0x11,0x01,0x00,0x10, -0x11,0x01,0x00,0x10,0x11,0x11,0x00,0x10,0x00,0x00,0x00,0x00,0x11,0x01,0x00, -0x10,0x00,0x10,0x00,0x10,0x11,0x11,0x00,0x00,0x11,0x01,0x00,0x10,0x00,0x10, -0x00,0x10,0x11,0x11,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x11, -0x01,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x01,0x00,0x10, -0x11,0x01,0x00,0x00,0x11,0x01,0x00,0x10,0x11,0x11,0x00,0x10,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x11,0x11,0x00,0x00,0x11,0x01,0x00,0x01,0x00,0x00,0x00,0x00,0x11, -0x01,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00,0x00, -0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x01,0x00,0x00,0x10,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x11,0x01,0x00,0x00,0x00,0x00,0x00,0x00, -0x11,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x00, -0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x00,0x01,0x01,0x00,0x10,0x00,0x10,0x00,0x00,0x00,0x01,0x00,0x00,0x01, -0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x01,0x00,0x10,0x01,0x11,0x00,0x00, -0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x11,0x01,0x00, -0x00,0x11,0x01,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x10, -0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x10,0x10,0x00, -0x10,0x00,0x10,0x00,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x10, -0x00,0x10,0x00,0x10,0x00,0x10,0x10,0x10,0x00,0x00,0x01,0x01,0x00,0x00,0x10, -0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x10,0x00,0x01,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00, -0x00,0x10,0x00,0x10,0x00,0x00,0x01,0x01,0x00,0x10,0x01,0x11,0x00,0x10,0x00, -0x10,0x00,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x00,0x10,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x11,0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x11,0x01, -0x00,0x00,0x10,0x00,0x00,0x00,0x11,0x01,0x00,0x00,0x10,0x00,0x00,0x10,0x00, -0x10,0x00,0x10,0x00,0x10,0x00,0x00,0x10,0x00,0x00,0x10,0x11,0x11,0x00,0x00, -0x11,0x01,0x00,0x00,0x00,0x00,0x01,0x00,0x11,0x01,0x00,0x00,0x00,0x00,0x00, -0x11,0x11,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -}; diff --git a/libpsn00b/psxgpu/drawotag.s b/libpsn00b/psxgpu/drawotag.s deleted file mode 100644 index 3cb0db0..0000000 --- a/libpsn00b/psxgpu/drawotag.s +++ /dev/null @@ -1,38 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global DrawOTag -.type DrawOTag, @function -DrawOTag: - addiu $sp, -4 - sw $ra, 0($sp) - - lui $a3, IOBASE # I/O segment base - - lui $v0, 0x0400 # Set DMA direction to CPUtoGPU - ori $v0, 0x2 - sw $v0, GPU_GP1($a3) - -.Lgpu_wait: # Wait for GPU to be ready for commands & DMA - jal ReadGPUstat - nop - srl $v0, 26 - andi $v0, 1 - beqz $v0, .Lgpu_wait - nop - - sw $a0, DMA2_MADR($a3) # Set DMA base address to specified OT - sw $0 , DMA2_BCR($a3) - - lui $v0, 0x0100 # Begin OT transfer! - ori $v0, 0x0401 - sw $v0, DMA2_CHCR($a3) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop diff --git a/libpsn00b/psxgpu/drawprim.s b/libpsn00b/psxgpu/drawprim.s deleted file mode 100644 index d62c202..0000000 --- a/libpsn00b/psxgpu/drawprim.s +++ /dev/null @@ -1,41 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.text - -.global DrawPrim -.type DrawPrim, @function -DrawPrim: - - addiu $sp, -8 - sw $ra, 0($sp) - sw $s0, 4($sp) - - move $s0, $a0 # Wait for GPU to complete - jal DrawSync - move $a0, $0 - - lui $a3, IOBASE - lui $v0, 0x0400 # Set transfer direction to off - sw $v0, GPU_GP1($a3) - - move $a0, $s0 - lbu $a1, 3($a0) # Get length of primitive packet - addiu $a0, 4 - addiu $a1, -1 - -.Ltransfer_loop: - lw $v0, 0($a0) - addiu $a0, 4 - sw $v0, GPU_GP0($a3) - bgtz $a1, .Ltransfer_loop - addiu $a1, -1 - - jal DrawSync - move $a0, $0 - - lw $ra, 0($sp) - lw $s0, 4($sp) - jr $ra - addiu $sp, 8
\ No newline at end of file diff --git a/libpsn00b/psxgpu/drawsync.s b/libpsn00b/psxgpu/drawsync.s deleted file mode 100644 index b671b03..0000000 --- a/libpsn00b/psxgpu/drawsync.s +++ /dev/null @@ -1,67 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global DrawSync -.type DrawSync, @function -DrawSync: - - bnez $a0, .Lgetwords - lui $a0, IOBASE - - addiu $sp, -4 - sw $ra, 0($sp) - - jal ReadGPUstat # Check if DMA enabled - nop - srl $v0, 29 - andi $v0, 0x3 - - beqz $v0, .Lsimple_wait - nop - -.Ldma_wait: - lw $v0, DMA2_CHCR($a0) - nop - srl $v0, 24 - andi $v0, 0x1 - bnez $v0, .Ldma_wait - nop - -.Lgpu_wait: - jal ReadGPUstat - nop - srl $v0, 26 - andi $v0, 0x5 - bne $v0, 5, .Lgpu_wait - nop - - b .Lexit - nop - -.Lsimple_wait: # Wait for GPU to be ready for next DMA - jal ReadGPUstat - nop - srl $v0, 28 - andi $v0, 0x1 - beqz $v0, .Lsimple_wait - nop - -.Lexit: - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - -.Lgetwords: - - lw $v0, DMA2_BCR($a0) - nop - - jr $ra - srl $v0, 16 - diff --git a/libpsn00b/psxgpu/drawsynccallback.s b/libpsn00b/psxgpu/drawsynccallback.s deleted file mode 100644 index 22cfb7d..0000000 --- a/libpsn00b/psxgpu/drawsynccallback.s +++ /dev/null @@ -1,105 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.text - -.global DrawSyncCallback -.type DrawSyncCallback, @function -DrawSyncCallback: - - addiu $sp, -8 - sw $ra, 0($sp) - sw $a0, 4($sp) - - jal EnterCriticalSection - nop - - beqz $a0, .Luninstall - nop - - la $a1, _drawsync_handler - lw $a1, 4($sp) - jal DMACallback - li $a0, 2 - - b .Lcontinue - nop - -.Luninstall: - - move $a1, $0 - jal DMACallback - li $a0, 2 - -.Lcontinue: - - lw $a0, 4($sp) - la $v1, _drawsync_func - lw $v0, 0($v1) - sw $a0, 0($v1) - sw $v0, 4($sp) - -.Lexit: - - jal ExitCriticalSection - nop - - lw $ra, 0($sp) - lw $v0, 4($sp) - jr $ra - addiu $sp, 8 - - -.type _drawsync_handler, @function -_drawsync_handler: - -.Ldma_wait: - la $v0, _drawsync_func - lw $v0, 0($v0) - nop - beqz $v0, .Lskip - nop - - addiu $sp, -4 - sw $ra, 0($sp) - - lw $v0, DMA2_CHCR($a0) - nop - srl $v0, 24 - andi $v0, 0x1 - - bnez $v0, .Ldma_wait - nop - -.Lgpu_wait: - jal ReadGPUstat - nop - srl $v0, 28 - andi $v0, 0x1 - beqz $v0, .Lgpu_wait - nop - - la $v1, _drawsync_func - lw $v1, 0($v1) - - lui $v0, 0x0400 # Set DMA direction to off - sw $v0, GPU_GP1($a0) - - jalr $v1 - nop - - lw $ra, 0($sp) - addiu $sp, 4 - -.Lskip: - - jr $ra - nop - - -.data - -_drawsync_func: - .word 0 -
\ No newline at end of file diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c new file mode 100644 index 0000000..5642ad4 --- /dev/null +++ b/libpsn00b/psxgpu/env.c @@ -0,0 +1,212 @@ +/* + * PSn00bSDK GPU library (DRAWENV/DISPENV functions) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <psxgpu.h> +#include <hwregs_c.h> + +#define _min(x, y) (((x) < (y)) ? (x) : (y)) + +extern GPU_VideoMode _gpu_video_mode; + +/* Drawing API */ + +DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { + env->clip.x = x; + env->clip.y = y; + env->clip.w = w; + env->clip.h = h; + + env->ofs[0] = 0; + env->ofs[1] = 0; + + env->tw.x = 0; + env->tw.y = 0; + env->tw.w = 0; + env->tw.h = 0; + + env->tpage = 0x0a; + env->dtd = 1; + env->dfe = 0; + env->isbg = 0; + setRGB0(env, 0, 0, 0); + + env->dr_env.tag = 0; + return env; +} + +void DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { + DR_ENV *prim = &(env->dr_env); + + // All commands are grouped into a single display list packet for + // performance reasons (keep in mind that the GPU doesn't care about this + // as the display list is parsed by the DMA unit in the CPU and only the + // payload is sent to the GPU). + setaddr(prim, ot); + setlen(prim, 4); + + // Set drawing area top left + prim->code[0] = 0xe3000000; + prim->code[0] |= env->clip.x & 0x3ff; + prim->code[0] |= (env->clip.y & 0x3ff) << 10; + + // Set drawing area bottom right + prim->code[1] = 0xe4000000; + prim->code[1] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff; + prim->code[1] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10; + + // Set drawing offset + prim->code[2] = 0xe5000000; + prim->code[2] |= (env->clip.x + env->ofs[0]) & 0x7ff; + prim->code[2] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11; + + // Texture page (reset active page and set dither/mask bits) + prim->code[3] = 0xe1000000; + prim->code[3] |= (env->dtd & 1) << 9; + prim->code[3] |= (env->dfe & 1) << 10; + + if (env->isbg) { + setlen(prim, 7); + + // Rectangle fill + // FIXME: reportedly this command doesn't accept height values >511... + prim->code[4] = 0x02000000; + //prim->code[4] |= env->r0 | (env->g0 << 8) | (env->b0 << 16); + prim->code[4] |= *((const uint32_t *) &(env->isbg)) >> 8; + //prim->code[5] = env->clip.x; + //prim->code[5] |= env->clip.y << 16; + prim->code[5] = *((const uint32_t *) &(env->clip.x)); + prim->code[6] = env->clip.w; + prim->code[6] |= _min(env->clip.h, 0x1ff) << 16; + } + + //while (!(GPU_GP1 & (1 << 26))) + //__asm__ volatile(""); + + DrawOTag((const uint32_t *) prim); +} + +void PutDrawEnv(DRAWENV *env) { + DrawOTagEnv((const uint32_t *) 0x00ffffff, env); +} + +// This function skips rebuilding the cached packet whenever possible and is +// useful if the DRAWENV structure is never modified (which is the case most of +// the time). +void PutDrawEnvFast(DRAWENV *env) { + if (!(env->dr_env.tag)) { + DrawOTagEnv((const uint32_t *) 0x00ffffff, env); + return; + } + + DrawOTag((const uint32_t *) &(env->dr_env)); +} + +/* Display API */ + +DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) { + env->disp.x = x; + env->disp.y = y; + env->disp.w = w; + env->disp.h = h; + + env->screen.x = 0; + env->screen.y = 0; + env->screen.w = 0; + env->screen.h = 0; + + env->isinter = 0; + env->isrgb24 = 0; + env->reverse = 0; + + return env; +} + +void PutDispEnv(const DISPENV *env) { + uint32_t h_range, v_range, mode, fb_pos; + + mode = _gpu_video_mode << 3; + mode |= (env->isrgb24 & 1) << 4; + mode |= (env->isinter & 1) << 5; + mode |= (env->reverse & 1) << 7; + + if (env->disp.h >= 256) + mode |= 1 << 2; + + // Calculate the horizontal display range values. The original code was + // this bad; in actual fact it was even worse due to being written in + // assembly and using slow multiplication even when not necessary. + int offset, span, default_span = 2560; + + if (env->disp.w > 560) { + // 640 pixels + mode |= 3; + offset = 620; + span = env->screen.w * 4; + } else if (env->disp.w > 400) { + // 512 pixels + mode |= 2; + offset = 615; + span = env->screen.w * 4 + env->screen.w; + } else if (env->disp.w > 352) { + // 384 pixels (this mode is weird) + mode |= 1 << 6; + offset = 539; + span = env->screen.w * 8 - env->screen.w; + default_span = 2688; + } else if (env->disp.w > 280) { + // 320 pixels + mode |= 1; + offset = 600; + span = env->screen.w * 8; + } else { + // 256 pixels + offset = 590; + span = env->screen.w * 8 + env->screen.w * 2; + } + + offset += env->screen.x * 4; + if (!span) + span = default_span; + + h_range = offset & 0xfff; + h_range |= ((offset + span) & 0xfff) << 12; + + // Calculate the vertical display range values. + offset = 16 + env->screen.y; + span = env->screen.h ? env->screen.h : 240; + + v_range = offset & 0x3ff; + v_range |= ((offset + span) & 0x3ff) << 10; + + fb_pos = env->disp.x & 0x3ff; + fb_pos |= (env->disp.y & 0x1ff) << 10; + + GPU_GP1 = 0x06000000 | h_range; // Set horizontal display range + GPU_GP1 = 0x07000000 | v_range; // Set vertical display range + GPU_GP1 = 0x08000000 | mode; // Set video mode + GPU_GP1 = 0x05000000 | fb_pos; // Set VRAM location to display +} + +/* Deprecated "raw" display API */ + +void PutDispEnvRaw(const DISPENV_RAW *env) { + uint32_t h_range, v_range, fb_pos; + + h_range = 608 + env->vid_xpos; + h_range |= (3168 + env->vid_xpos) << 12; + + // FIXME: these hardcoded values are for NTSC displays. + v_range = (136 - 120 + env->vid_ypos) & 0x3ff; + v_range |= ((136 + 120 + env->vid_ypos) & 0x3ff) << 12; + + fb_pos = env->fb_x & 0x3ff; + fb_pos |= (env->fb_y & 0x1ff) << 10; + + GPU_GP1 = 0x06000000 | h_range; // Set horizontal display range + GPU_GP1 = 0x07000000 | v_range; // Set vertical display range + GPU_GP1 = 0x08000000 | env->vid_mode; // Set video mode + GPU_GP1 = 0x05000000 | fb_pos; // Set VRAM location to display +} diff --git a/libpsn00b/psxgpu/fntsort.c b/libpsn00b/psxgpu/fntsort.c deleted file mode 100644 index 9358793..0000000 --- a/libpsn00b/psxgpu/fntsort.c +++ /dev/null @@ -1,48 +0,0 @@ -#include <sys/types.h> -#include <stdio.h> -#include <ctype.h> -#include <psxgpu.h> - -extern unsigned short _font_tpage; -extern unsigned short _font_clut; - -char *FntSort(u_long *ot, char *pri, int x, int y, const char *text) { - - DR_TPAGE *tpage; - SPRT_8 *sprt = (SPRT_8*)pri; - int i; - - while( *text != 0 ) { - - i = toupper( *text )-32; - - if( i > 0 ) { - - i--; - setSprt8( sprt ); - setRGB0( sprt, 128, 128, 128 ); - setXY0( sprt, x, y ); - setUV0( sprt, (i%16)<<3, (i>>4)<<3 ); - sprt->clut = _font_clut; - addPrim( ot, sprt ); - sprt++; - - } - - x += 8; - text++; - - } - - pri = (char*)sprt; - - tpage = (DR_TPAGE*)pri; - tpage->code[0] = _font_tpage; - setlen( tpage, 1 ); - setcode( tpage, 0xe1 ); - addPrim( ot, pri ); - pri += sizeof(DR_TPAGE); - - return pri; - -}
\ No newline at end of file diff --git a/libpsn00b/psxgpu/font.c b/libpsn00b/psxgpu/font.c index 4c715a9..2d4105f 100644 --- a/libpsn00b/psxgpu/font.c +++ b/libpsn00b/psxgpu/font.c @@ -1,4 +1,4 @@ -#include <sys/types.h> +#include <stdint.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -6,29 +6,28 @@ #include <psxgpu.h> typedef struct _fnt_stream { - char *txtbuff; - char *txtnext; - char *pribuff; - short x,y; - short w,h; - int bg; - int maxchars; + char *txtbuff; + char *txtnext; + char *pribuff; + int16_t x, y; + int16_t w, h; + int bg, maxchars; } _fnt_stream; static _fnt_stream _stream[8]; static int _nstreams = 0; -u_short _font_tpage; -u_short _font_clut; +uint16_t _font_tpage; +uint16_t _font_clut; -extern u_char dbugfont[]; +extern uint8_t _gpu_debug_font[]; void FntLoad(int x, int y) { RECT pos; TIM_IMAGE tim; - GetTimInfo( (u_long*)dbugfont, &tim ); + GetTimInfo( (const uint32_t *) _gpu_debug_font, &tim ); // Load font image pos = *tim.prect; @@ -215,7 +214,7 @@ char *FntFlush(int id) { // Draw the primitives DrawSync(0); - DrawOTag((u_long*)_stream[id].pribuff); + DrawOTag((uint32_t*)_stream[id].pribuff); DrawSync(0); _stream[id].txtnext = _stream[id].txtbuff; @@ -223,4 +222,45 @@ char *FntFlush(int id) { return _stream[id].pribuff; -}
\ No newline at end of file +} + +char *FntSort(uint32_t *ot, char *pri, int x, int y, const char *text) { + + DR_TPAGE *tpage; + SPRT_8 *sprt = (SPRT_8*)pri; + int i; + + while( *text != 0 ) { + + i = toupper( *text )-32; + + if( i > 0 ) { + + i--; + setSprt8( sprt ); + setRGB0( sprt, 128, 128, 128 ); + setXY0( sprt, x, y ); + setUV0( sprt, (i%16)<<3, (i>>4)<<3 ); + sprt->clut = _font_clut; + addPrim( ot, sprt ); + sprt++; + + } + + x += 8; + text++; + + } + + pri = (char*)sprt; + + tpage = (DR_TPAGE*)pri; + tpage->code[0] = _font_tpage; + setlen( tpage, 1 ); + setcode( tpage, 0xe1 ); + addPrim( ot, pri ); + pri += sizeof(DR_TPAGE); + + return pri; + +} diff --git a/libpsn00b/psxgpu/getode.s b/libpsn00b/psxgpu/getode.s deleted file mode 100644 index 5dc1e70..0000000 --- a/libpsn00b/psxgpu/getode.s +++ /dev/null @@ -1,20 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global GetODE -.type GetODE, @function -GetODE: - addiu $sp, -4 - sw $ra, 0($sp) - jal ReadGPUstat - nop - srl $v0, 31 - andi $v0, 1 - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop diff --git a/libpsn00b/psxgpu/gettimimage.c b/libpsn00b/psxgpu/gettimimage.c deleted file mode 100644 index 5598e07..0000000 --- a/libpsn00b/psxgpu/gettimimage.c +++ /dev/null @@ -1,40 +0,0 @@ -#include <sys/types.h> -#include <psxgpu.h> - -int GetTimInfo(const u_long *tim, TIM_IMAGE *timimg) { - - u_long *rtim; - - // Check ID - if( ( tim[0]&0xff ) != 0x10 ) { - return 1; - } - - // Check version - if( ( (tim[0]>>8)&0xff ) != 0x0 ) { - return 2; - } - - timimg->mode = tim[1]; - rtim = tim+2; - - // Clut present? - if( timimg->mode & 0x8 ) { - - timimg->crect = (RECT*)(rtim+1); - timimg->caddr = (u_long*)(rtim+3); - - rtim += rtim[0]>>2; - - } else { - - timimg->caddr = 0; - - } - - timimg->prect = (RECT*)(rtim+1); - timimg->paddr = (u_long*)(rtim+3); - - return 0; - -} diff --git a/libpsn00b/psxgpu/getvideomode.s b/libpsn00b/psxgpu/getvideomode.s deleted file mode 100644 index 6f1613c..0000000 --- a/libpsn00b/psxgpu/getvideomode.s +++ /dev/null @@ -1,14 +0,0 @@ -.set noreorder - - -.section .text - -.global GetVideoMode -.type GetVideoMode, @function -GetVideoMode: - - la $v0, _gpu_standard - lw $v0, 0($v0) - - jr $ra - nop diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c new file mode 100644 index 0000000..da51e7d --- /dev/null +++ b/libpsn00b/psxgpu/image.c @@ -0,0 +1,115 @@ +/* + * PSn00bSDK GPU library (image and VRAM transfer functions) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <stdio.h> +#include <psxgpu.h> +#include <hwregs_c.h> + +#define DMA_CHUNK_LENGTH 8 + +/* VRAM transfer API */ + +static void _load_store_image( + uint32_t command, + int mode, + const RECT *rect, + uint32_t *data +) { + size_t length = rect->w * rect->h; + if (length % 2) + printf("psxgpu: can't transfer an odd number of pixels\n"); + + length /= 2; + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { + printf("psxgpu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + length += DMA_CHUNK_LENGTH - 1; + } + + DrawSync(0); + GPU_GP1 = 0x04000000; // Disable DMA request + GPU_GP0 = 0x01000000; // Flush cache + + GPU_GP0 = command; + //GPU_GP0 = rect->x | (rect->y << 16); + GPU_GP0 = *((const uint32_t *) &(rect->x)); + //GPU_GP0 = rect->w | (rect->h << 16); + GPU_GP0 = *((const uint32_t *) &(rect->w)); + + // Enable DMA request, route to GP0 (2) or from GPU_READ (3) + GPU_GP1 = 0x04000000 | mode; + + DMA_MADR(2) = (uint32_t) data; + if (length < DMA_CHUNK_LENGTH) + DMA_BCR(2) = 0x00010000 | length; + else + DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + + DMA_CHCR(2) = 0x01000200 | ((mode & 1) ^ 1); +} + +void LoadImage(const RECT *rect, const uint32_t *data) { + _load_store_image(0xa0000000, 2, rect, (uint32_t *) data); +} + +void StoreImage(const RECT *rect, uint32_t *data) { + _load_store_image(0xc0000000, 3, rect, data); +} + +/* .TIM image parsers */ + +// This is the only libgs function PSn00bSDK is ever going to implement. The +// difference from GetTimInfo() is that it copies RECTs rather than merely +// returning pointers to them, which become useless once the .TIM file is +// unloaded from main RAM. +int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) { + if ((*(tim++) & 0xffff) != 0x0010) + return 1; + + info->pmode = *(tim++); + if (info->pmode & 8) { + const uint32_t *palette_end = tim; + palette_end += *(tim++) / 4; + + *((uint32_t *) &(info->cx)) = *(tim++); + *((uint32_t *) &(info->cw)) = *(tim++); + info->clut = (uint32_t *) tim; + + tim = palette_end; + } else { + info->clut = 0; + } + + tim++; + *((uint32_t *) &(info->px)) = *(tim++); + *((uint32_t *) &(info->pw)) = *(tim++); + info->pixel = (uint32_t *) tim; + + return 0; +} + +int GetTimInfo(const uint32_t *tim, TIM_IMAGE *info) { + if ((*(tim++) & 0xffff) != 0x0010) + return 1; + + info->mode = *(tim++); + if (info->mode & 8) { + const uint32_t *palette_end = tim; + palette_end += *(tim++) / 4; + + info->crect = (RECT *) tim; + info->caddr = (uint32_t *) &tim[2]; + + tim = palette_end; + } else { + info->caddr = 0; + } + + tim++; + info->prect = (RECT *) tim; + info->paddr = (uint32_t *) &tim[2]; + + return 0; +} diff --git a/libpsn00b/psxgpu/loadimage.s b/libpsn00b/psxgpu/loadimage.s deleted file mode 100644 index 45f152f..0000000 --- a/libpsn00b/psxgpu/loadimage.s +++ /dev/null @@ -1,70 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.set RECT_x, 0 -.set RECT_y, 2 -.set RECT_w, 4 -.set RECT_h, 6 - -.section .text - - -.global LoadImage -.type LoadImage, @function -LoadImage: - addiu $sp, -8 - sw $ra, 0($sp) - sw $s0, 4($sp) - - lui $s0, IOBASE # Set I/O segment base address - -.Lgpu_wait: # Wait for GPU to be ready for commands and DMA - jal ReadGPUstat - nop - srl $v0, 0x1a - andi $v0, 0x5 - li $v1, 5 - #srl $v0, 28 - #andi $v0, 1 - bne $v0, $v1, .Lgpu_wait - nop - - lui $v0, 0x400 # Set DMA direction to off - sw $v0, GPU_GP1($s0) - - lui $v0, 0x0100 # Clear GPU cache - sw $v0, GPU_GP0($s0) - - lui $v1, 0xa000 # Load image to VRAM - sw $v1, GPU_GP0($s0) - lw $v0, RECT_x($a0) # Set XY and dimensions of image - lw $v1, RECT_w($a0) - sw $v0, GPU_GP0($s0) - sw $v1, GPU_GP0($s0) - - lui $v0, 0x400 # Set DMA direction to CPUtoVRAM - ori $v0, 0x2 - sw $v0, GPU_GP1($s0) - - lhu $v0, RECT_w($a0) # Get rectangle size - lhu $v1, RECT_h($a0) - nop - mult $v0, $v1 # Calculate BCR value - mflo $v1 - srl $v1, 0x4 - sll $v1, 0x10 - ori $v1, 0x8 - - sw $a1, DMA2_MADR($s0) # Set DMA base address and transfer length - sw $v1, DMA2_BCR($s0) - - lui $v0, 0x100 # Start DMA transfer - ori $v0, 0x201 - sw $v0, DMA2_CHCR($s0) - - lw $ra, 0($sp) - lw $s0, 4($sp) - jr $ra - addiu $sp, 8 - diff --git a/libpsn00b/psxgpu/putdispenv.s b/libpsn00b/psxgpu/putdispenv.s deleted file mode 100644 index fc09454..0000000 --- a/libpsn00b/psxgpu/putdispenv.s +++ /dev/null @@ -1,174 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.set DISP_dx, 0 -.set DISP_dy, 2 -.set DISP_dw, 4 -.set DISP_dh, 6 -.set DISP_sx, 8 -.set DISP_sy, 10 -.set DISP_sw, 12 -.set DISP_sh, 14 -.set DISP_inter, 16 -.set DISP_isrgb24, 17 -.set DISP_reverse, 18 - -.section .text - - -.global PutDispEnv -.type PutDispEnv, @function -PutDispEnv: - - lui $a3, IOBASE - - # Horizontal resolution stuff - - lh $a2, DISP_dw($a0) # Get X resolution - - lh $v0, DISP_sx($a0) - lh $v1, DISP_sw($a0) # Get X screen width - - move $a1, $0 # To use as mode value - - bgt $a2, 560, .Lmode_640 - nop - bgt $a2, 400, .Lmode_512 - nop - bgt $a2, 352, .Lmode_384 - nop - bgt $a2, 280, .Lmode_320 - nop - - .set noat - -.Lmode_256: - li $at, 10 - mult $at, $v1 - li $a2, 0x24e - sll $v0, 2 - add $a2, $v0 - b .Lmode_end - li $v1, 0xa00 -.Lmode_320: - li $at, 8 - mult $at, $v1 - li $a2, 0x258 - ori $a1, 0x01 - sll $v0, 2 - add $a2, $v0 - b .Lmode_end - li $v1, 0xa00 -.Lmode_384: - li $at, 7 - mult $at, $v1 - li $a2, 0x21b - ori $a1, 0x64 - sll $v0, 2 - add $a2, $v0 - b .Lmode_end - li $v1, 0xa80 -.Lmode_512: - li $at, 5 - mult $at, $v1 - li $a2, 0x267 - ori $a1, 0x02 - sll $v0, 2 - add $a2, $v0 - b .Lmode_end - li $v1, 0xa00 -.Lmode_640: - li $at, 4 - mult $at, $v1 - li $a2, 0x26c - ori $a1, 0x03 - sll $v0, 2 - add $a2, $v0 - li $v1, 0xa00 -.Lmode_end: - - .set at - - mflo $v0 - bnez $v0, .Lno_default # Check if screen with is non zero - nop - move $v0, $v1 # Use default if screen width is 0 -.Lno_default: - - addu $v0, $a2 # Apply horizontal display coordinates - sll $v0, 12 - andi $a2, 0xfff - or $a2, $v0 - lui $v0, 0x0600 - or $v0, $a2 - sw $v0, GPU_GP1($a3) - - # Vertical resolution - - lh $v0, DISP_dh($a0) - li $a2, 0x10 - ble $v0, 256, .Lmode_low - nop - -.Lmode_high: - ori $a1, 0x04 -.Lmode_low: - lh $v0, DISP_sy($a0) - lh $v1, DISP_sh($a0) - add $a2, $v0 - bnez $v1, .Lno_default_vert - nop - li $v1, 0xf0 -.Lno_default_vert: - add $v1, $a2 - and $a2, 0x3ff - sll $v1, 10 - or $v1, $a2 - lui $v0, 0x0700 - or $v1, $v0 - sw $v1, GPU_GP1($a3) - - # Video mode - - la $v0, _gpu_standard - lbu $v0, 0($v0) - nop - beqz $v0, .Lconfig_ntsc - nop -.Lconfig_pal: - ori $a1, 0x08 -.Lconfig_ntsc: - - lbu $v0, DISP_inter($a0) - lbu $v1, DISP_isrgb24($a0) - beqz $v0, .Lno_inter - nop - or $a1, 0x20 -.Lno_inter: - beqz $v1, .Lno_rgb24 - nop - or $a1, 0x10 -.Lno_rgb24: - lbu $v0, DISP_inter($a0) - nop - beqz $v0, .Lno_reverse - nop - or $a1, 0x80 -.Lno_reverse: - - lui $v0, 0x800 # Apply mode - or $a1, $v0 - sw $a1, GPU_GP1($a3) - - lhu $v0, DISP_dx($a0) # Set VRAM XY offset - lhu $v1, DISP_dy($a0) - andi $v0, 0x3ff - andi $v1, 0x1ff - sll $v1, 10 - or $v0, $v1 - lui $v1, 0x500 - or $v0, $v1 - - jr $ra - sw $v0, GPU_GP1($a3) diff --git a/libpsn00b/psxgpu/putdispenvraw.s b/libpsn00b/psxgpu/putdispenvraw.s deleted file mode 100644 index 747796f..0000000 --- a/libpsn00b/psxgpu/putdispenvraw.s +++ /dev/null @@ -1,71 +0,0 @@ -.set noreorder -.set noat - -.include "hwregs_a.h" - -.set DISP_mode, 0 -.set DISP_vxpos, 4 -.set DISP_vypos, 6 -.set DISP_fbx, 8 -.set DISP_fby, 10 - -.section .text - - -.global PutDispEnvRaw -.type PutDispEnvRaw, @function -PutDispEnvRaw: - addiu $sp, -8 - sw $ra, 0($sp) - sw $s0, 4($sp) - - lui $s0, IOBASE - - lh $at, DISP_vxpos($a0) # Set horizontal display range - li $v0, 608 - add $v0, $at - li $v1, 3168 - add $v1, $at - sll $v1, 12 - or $v0, $v1 - lui $v1, 0x600 - or $v1, $v0 - sw $v1, GPU_GP1($s0) - - lh $at, DISP_vypos($a0) # Set vertical display range (for NTSC) - li $v1, 120 # (values differet for PAL modes) - sub $v1, $at - li $v0, 136 - sub $v0, $v1 - andi $v0, 0x1ff - li $v1, 120 - add $v1, $at - li $at, 136 - add $at, $v1 - andi $at, 0x1ff - sll $at, 10 - or $v0, $at - lui $at, 0x700 - or $v0, $at - sw $v0, GPU_GP1($s0) - - lw $v0, DISP_mode($a0) # Set video mode - lui $at, 0x800 - or $v0, $at - sw $v0, GPU_GP1($s0) - - lhu $v0, DISP_fbx($a0) # Set VRAM XY offset - lhu $v1, DISP_fby($a0) - andi $v0, 0x3ff - andi $v1, 0x1ff - sll $v1, 10 - or $v0, $v1 - lui $v1, 0x500 - or $v0, $v1 - sw $v0, GPU_GP1($s0) - - lw $ra, 0($sp) - lw $s0, 4($sp) - jr $ra - addiu $sp, 8 - diff --git a/libpsn00b/psxgpu/putdrawenv.s b/libpsn00b/psxgpu/putdrawenv.s deleted file mode 100644 index c0d5676..0000000 --- a/libpsn00b/psxgpu/putdrawenv.s +++ /dev/null @@ -1,142 +0,0 @@ -.set noreorder -.set noat - -.include "hwregs_a.h" - -.set DRAW_x, 0 # Drawing area -.set DRAW_y, 2 -.set DRAW_w, 4 -.set DRAW_h, 6 -.set DRAW_ofx, 8 # Draw offset -.set DRAW_ofy, 10 -.set DRAW_tx, 12 # Texture window -.set DRAW_ty, 14 -.set DRAW_tw, 16 -.set DRAW_th, 18 -.set DRAW_tpage, 20 # TPage values -.set DRAW_dtd, 22 -.set DRAW_dfe, 23 -.set DRAW_isbg, 24 # Clear draw area -.set DRAW_r0, 25 -.set DRAW_g0, 26 -.set DRAW_b0, 27 -.set DRAW_env, 28 - - -.section .text - -.global PutDrawEnv -.type PutDrawEnv, @function -PutDrawEnv: - addiu $sp, -4 - sw $ra, 0($sp) - - addiu $a1, $a0, DRAW_env - - li $v0, 0x04ffffff # Packet header (length+terminator) - sw $v0, 0($a1) - - lhu $v0, DRAW_x($a0) # Set draw area top-left - lhu $v1, DRAW_y($a0) - andi $v0, 0x3ff - andi $v1, 0x1ff - sll $v1, 10 - or $v0, $v1 - lui $v1, 0xe300 - or $v0, $v1 - sw $v0, 4($a1) # 1 - - .set noat - - lhu $v0, DRAW_w($a0) # Set draw area bottom-right - lhu $at, DRAW_x($a0) - addiu $v0, -1 - addu $at, $v0 - andi $at, 0x3ff - lhu $v1, DRAW_h($a0) - lhu $v0, DRAW_y($a0) - addiu $v1, -1 - addu $v0, $v1 - andi $v0, 0x1ff - sll $v0, 10 - or $at, $v0 - lui $v0, 0xe400 - or $at, $v0 - sw $at, 8($a1) # 2 - - lhu $v0, DRAW_x($a0) # Set drawing offset - lhu $v1, DRAW_ofx($a0) - nop - add $v0, $v1 - andi $at, $v0, 0x7ff - lhu $v0, DRAW_y($a0) - lhu $v1, DRAW_ofy($a0) - nop - add $v0, $v1 - andi $v0, 0x7ff - sll $v0, 11 - or $at, $v0 - lui $v0, 0xe500 - or $at, $v0 - sw $at, 12($a1) # 3 - - lhu $at, DRAW_tpage($a0) # Set tpage - lbu $v0, DRAW_dtd($a0) - lbu $v1, DRAW_dfe($a0) - andi $v0, 1 - and $v1, 1 - sll $v0, 9 - sll $v1, 10 - or $at, $v0 - or $at, $v1 - lui $v0, 0xe100 - or $at, $v0 - sw $at, 16($a1) # 4 - - .set at - - lbu $v0, DRAW_isbg($a0) - nop - beqz $v0, .Lno_fillVRAM - nop - - lw $v0, DRAW_isbg($a0) # FillVRAM - lui $v1, 0x0200 - srl $v0, 8 - or $v0, $v1 - sw $v0, 20($a1) # 5 - lw $v0, DRAW_x($a0) - lw $v1, DRAW_w($a0) - sw $v0, 24($a1) # 6 - - srl $v0, $v1, 16 # Workaround as rectangle primitives - blt $v0, 511, .Lno_overflow # don't accept a height of 512 - nop - - li $v0, 511 - sll $v0, 16 - andi $v1, 0xffff - or $v1, $v0 - -.Lno_overflow: - sw $v1, 28($a1) # 7 - li $v0, 0x07ffffff # Packet header (length+terminator) - sw $v0, 0($a1) - -.Lno_fillVRAM: - -.Lgpu_wait: # Wait for GPU to become ready for commands and DMA - jal ReadGPUstat - nop - srl $v0, 26 - andi $v0, 1 - beqz $v0, .Lgpu_wait - nop - - jal DrawOTag - move $a0, $a1 - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop diff --git a/libpsn00b/psxgpu/readgpustat.s b/libpsn00b/psxgpu/readgpustat.s deleted file mode 100644 index ffff4d7..0000000 --- a/libpsn00b/psxgpu/readgpustat.s +++ /dev/null @@ -1,14 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global ReadGPUstat -.type ReadGPUstat, @function -ReadGPUstat: - lui $v0, IOBASE - lw $v0, GPU_GP1($v0) - jr $ra - nop diff --git a/libpsn00b/psxgpu/readme.txt b/libpsn00b/psxgpu/readme.txt index b626e1d..67aa110 100644 --- a/libpsn00b/psxgpu/readme.txt +++ b/libpsn00b/psxgpu/readme.txt @@ -3,25 +3,18 @@ PSX GPU library, part of PSn00bSDK Licensed under Mozilla Public License - Open source implementation of the GPU library written mostly in MIPS -assembly. Supports DMA transfers for ordering table draw and transferring -image data to and from VRAM. The syntax is intentionally made to closely -resemble Sony's syntax for familiarity and to make porting homebrew made -using the official SDK to PSn00bSDK a little easier. - +Open source implementation of the GPU library written entirely in C. Supports +DMA transfers for drawing OTs (with an internal queue so DrawOTag() can be +called even when another OT is being drawn) and transferring image data to and +from VRAM. The syntax is intentionally made to closely resemble Sony's syntax +for familiarity and to make porting homebrew made using the official SDK to +PSn00bSDK a little easier. Library developer(s): - Lameguy64 - + Lameguy64 (initial implementation in assembly, debug font API) + spicyjpeg Library header(s): - hwregs_a.h (GNU assembler port defs) psxgpu.h - - -Todo list: - - * ClearOTag() function (non reverse version of ClearOTagR()) yet to be - implemented (but should be trivial). diff --git a/libpsn00b/psxgpu/resetgraph.s b/libpsn00b/psxgpu/resetgraph.s deleted file mode 100644 index 6327f02..0000000 --- a/libpsn00b/psxgpu/resetgraph.s +++ /dev/null @@ -1,363 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - -.set ISR_STACK_SIZE, 4096 - -.global ResetGraph # Resets the GPU and installs a -.type ResetGraph, @function # VSync event handler -ResetGraph: - addiu $sp, -8 # C style stack allocation (required if - sw $ra, 0($sp) # you call BIOS functions from asm) - sw $a0, 4($sp) - - la $a0, resetgraph_msg - move $a1, $0 - move $a2, $0 - la $a1, _irq_func_table - la $a2, _custom_exit - jal printf - addiu $sp, -16 - addiu $sp, 16 - - la $a0, sr_msg - mfc0 $a1, $12 - jal printf - addiu $sp, -16 - addiu $sp, 16 - - la $v0, _hooks_installed # Skip installing hooks if this function - lbu $v0, 0($v0) # has already been called before once - nop - bnez $v0, .Lskip_hook_init - nop - - jal EnterCriticalSection # Disable interrupts as LoadExec() keeps - nop # interrupts enabled when transferring - # execution to the loaded program - - lui $a3, IOBASE # Base address for I/O - - lui $v0, 0x3b33 # Enables DMA channel 6 (for ClearOTag) - ori $v0, 0x3b33 # Enables DMA channel 2 - sw $v0, DMA_DPCR($a3) - sw $0 , DMA_DICR($a3) # Clear DICR (not needed) - - sw $0 , IRQ_MASK($a3) # Clear IRQ settings - - la $v0, _hooks_installed # Set installed flag - li $v1, 0x1 - sb $v1, 0($v0) - - la $v0, _vsync_cb_func # Clear VSync callback function - sw $0 , 0($v0) - - la $a1, _vsync_irq_callback # Install VSync interrupt callback - jal InterruptCallback - li $a0, 0 - - jal RestartCallback - nop - - la $a0, cbhooks_msg - jal printf - addiu $sp, -16 - addiu $sp, 16 - - jal _96_remove # Remove CD handling left by the BIOS - nop - - la $a0, abouttoen_msg - jal printf - addiu $sp, -16 - addiu $sp, 16 - - jal ExitCriticalSection # Re-enable interrupts - nop - - la $a0, enableint_msg - jal printf - addiu $sp, -16 - addiu $sp, 16 - -.Lskip_hook_init: - - lui $a3, IOBASE - - lw $v0, GPU_GP1($a3) # Get video standard - lui $v1, 0x0010 - and $v0, $v1 - la $v1, _gpu_standard - beqz $v0, .Lnot_pal - sw $0 , 0($v1) - li $v0, 1 - sw $v0, 0($v1) -.Lnot_pal: - - lw $a0, 4($sp) # Get argument value - - lui $a3, IOBASE # Set base I/O again (likely destroyed - # by previous calls) - - li $v0, 0x1d00 # Configure timer 1 as Hblank counter - sw $v0, TIMER1_CTRL($a3) # Set timer 1 value - - beq $a0, 1, .Lgpu_init_1 - nop - beq $a0, 3, .Lgpu_init_3 - nop - - sw $0 , GPU_GP1($a3) # Reset the GPU - - b .Linit_done - nop - -.Lgpu_init_1: - - sw $0 , DMA2_CHCR($a3) # Stop any DMA - -.Lgpu_init_3: - - li $v0, 0x1 # Reset the command buffer - sw $v0, GPU_GP1($a3) - -.Linit_done: - - lw $ra, 0($sp) - lw $a0, 4($sp) # Return - jr $ra - addiu $sp, 8 - - -.global VSync # VSync function -.type VSync, @function -VSync: - - addiu $sp, -12 - sw $ra, 0($sp) - sw $s0, 4($sp) - - lui $a3, IOBASE # Get GPU status (for interlace sync) - lw $s0, GPU_GP1($a3) - -.Lhwait_loop: # Get Hblank time - lw $v0, TIMER1_VALUE($a3) - nop - lw $v1, TIMER1_VALUE($a3) - nop - bne $v0, $v1, .Lhwait_loop - nop - - la $a3, _vsync_lasthblank # Calculate Hblank time since last - lw $v1, 0($a3) - nop - subu $v0, $v1 - andi $v0, 0xffff - - beq $a0, 1, .Lhblank_exit # Return Hblank time only, no VSync - sw $v0, 8($sp) # Stored as return value - - bgez $a0, .Lvsync # Vsync if argument is 0 and up - nop - - la $v0, _vsync_rcnt # Return VSync count only - lw $v0, 0($v0) - nop - b .Lvsync_exit - sw $v0, 8($sp) - -.Lvsync: - - bnez $a0, .Lnot_zero - nop - li $a0, 1 - -.Lnot_zero: - - la $v0, _vsync_rcnt # Call vsync sub function (with timeout) - lw $v0, 0($v0) - addiu $a1, $a0, 1 - jal _vsync_sub - addu $a0, $v0, $a0 - - lui $v0, 0x40 - and $v0, $s0, $v0 - beqz $v0, .Lhblank_exit - nop - - lui $a3, IOBASE # Interlace wait logic - - lw $v0, GPU_GP1($a3) - nop - xor $v0, $s0, $v0 - bltz $v0, .Lhblank_exit - lui $a0, 0x8000 - -.Linterlace_wait: - lw $v0, GPU_GP1($a3) - nop - xor $v0, $s0, $v0 - and $v0, $a0 - beqz $v0, .Linterlace_wait - nop - -.Lhblank_exit: # Set current Hblank as last value - - la $a2, _vsync_lasthblank - -.Lhwait2_loop: - lw $v0, TIMER1_VALUE($a3) - nop - lw $v1, TIMER1_VALUE($a3) - sw $v0, 0($a2) - bne $v0, $v1, .Lhwait2_loop - nop - -.Lvsync_exit: - - lw $ra, 0($sp) - lw $s0, 4($sp) - lw $v0, 8($sp) - jr $ra - addiu $sp, 12 - - -.type _vsync_sub, @function -_vsync_sub: - - # a0 - VSync destination count - # a1 - Timeout ratio (number of vsyncs to wait relative to vsync count) - - addiu $sp, -4 - sw $ra, 0($sp) - - sll $a1, 15 # Timeout counter - - la $v0, _vsync_rcnt - lw $v0, 0($v0) - nop - bge $v0, $a0, .Lvsync_sub_exit - nop - -.Lvsync_wait: - - addiu $a1, -1 - - la $v1, 0xffffffff - bne $a1, $v1, .Lnot_timeout - nop - - la $a0, vsynctimeout_msg - jal puts - addiu $sp, -8 - - jal ChangeClearPAD - move $a0, $0 - - li $a0, 3 - jal ChangeClearRCnt - move $a1, $0 - - addiu $sp, 8 - b .Lvsync_sub_exit - li $v0, -1 - -.Lnot_timeout: - - la $v0, _vsync_rcnt - lw $v0, 0($v0) - nop - blt $v0, $a0, .Lvsync_wait - nop - -.Lvsync_sub_exit: - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - move $v0, $0 - - - -.type _vsync_irq_callback, @function -_vsync_irq_callback: - - lui $a0, IOBASE - - la $v1, _vsync_rcnt # Increment VSync root counter - lw $v0, 0($v1) - nop - addiu $v0, 1 - sw $v0, 0($v1) - - la $v0, _vsync_cb_func # Check if a callback function is set - lw $v0, 0($v0) - nop - beqz $v0, .Lno_callback - nop - - addiu $sp, -4 # Save return address - sw $ra, 0($sp) - jalr $v0 # Execute user callback function - nop - lw $ra, 0($sp) # Restore previous return address - addiu $sp, 4 - - lui $a0, IOBASE - -.Lno_callback: - - jr $ra - nop - - -.section .data - -# VSync root counter -.type _vsync_rcnt, @object -_vsync_rcnt: - .word 0 - -.type _vsync_lasthblank, @object -_vsync_lasthblank: - .word 0 - -.comm _vsync_cb_func, 4, 4 - -.comm _gpu_standard, 4, 4 -.comm _gpu_current_field, 4, 4 -.comm _hooks_installed, 4, 4 - - -.type vsynctimeout_msg, @object -vsynctimeout_msg: - .asciiz "VSync: timeout\n" - -.type resetgraph_msg, @object -resetgraph_msg: - .asciiz "ResetGraph:itb=%08x,ehk=%08x\n" - -.type enableint_msg, @object -enableint_msg: - .asciiz "ResetGraph:Interrupts enabled!\n" - -.type cbhooks_msg, @object -cbhooks_msg: - .asciiz "ResetGraph:Interrupt hooks enabled.\n" - -.type abouttoen_msg, @object -abouttoen_msg: - .asciiz "ResetGraph:About to init interrupts.\n" - -.type sr_msg, @object -sr_msg: - .asciiz "ResetGraph:SR=%x\n" - -.global psxgpu_credits -.type psxgpu_credits, @object -psxgpu_credits: - .ascii "psxgpu programs by Lameguy64\n" - .asciiz "2020 PSn00bSDK Project / Meido-Tek Productions\n" -
\ No newline at end of file diff --git a/libpsn00b/psxgpu/setdefdispenv.c b/libpsn00b/psxgpu/setdefdispenv.c deleted file mode 100644 index 2d7b2b4..0000000 --- a/libpsn00b/psxgpu/setdefdispenv.c +++ /dev/null @@ -1,22 +0,0 @@ -#include <sys/types.h> -#include <psxgpu.h> - -DISPENV *SetDefDispEnv(DISPENV *disp, int x, int y, int w, int h) { - - disp->disp.x = x; - disp->disp.y = y; - disp->disp.w = w; - disp->disp.h = h; - - disp->screen.x = 0; - disp->screen.y = 0; - disp->screen.w = 0; - disp->screen.h = 0; - - disp->isinter = 0; - disp->isrgb24 = 0; - disp->reverse = 0; - - return disp; - -} diff --git a/libpsn00b/psxgpu/setdefdrawenv.c b/libpsn00b/psxgpu/setdefdrawenv.c deleted file mode 100644 index 6fd6086..0000000 --- a/libpsn00b/psxgpu/setdefdrawenv.c +++ /dev/null @@ -1,27 +0,0 @@ -#include <sys/types.h> -#include <psxgpu.h> - -DRAWENV *SetDefDrawEnv(DRAWENV *draw, int x, int y, int w, int h) { - - draw->clip.x = x; - draw->clip.y = y; - draw->clip.w = w; - draw->clip.h = h; - - draw->ofs[0] = 0; - draw->ofs[1] = 0; - - draw->tw.x = 0; - draw->tw.y = 0; - draw->tw.w = 0; - draw->tw.h = 0; - - draw->tpage = 0x0a; - draw->dtd = 1; - draw->dfe = 0; - draw->isbg = 0; - setRGB0( draw, 0, 0, 0 ); - - return draw; - -} diff --git a/libpsn00b/psxgpu/setdispmask.s b/libpsn00b/psxgpu/setdispmask.s deleted file mode 100644 index d79006c..0000000 --- a/libpsn00b/psxgpu/setdispmask.s +++ /dev/null @@ -1,19 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .text - - -.global SetDispMask -.type SetDispMask, @function -SetDispMask: - lui $v1, IOBASE - andi $a0, 0x1 - lui $v0, 0x300 - ori $v0, 0x1 - sub $v0, $a0 - sw $v0, GPU_GP1($v1) - jr $ra - nop - diff --git a/libpsn00b/psxgpu/setvideomode.s b/libpsn00b/psxgpu/setvideomode.s deleted file mode 100644 index b89b285..0000000 --- a/libpsn00b/psxgpu/setvideomode.s +++ /dev/null @@ -1,50 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - - -.section .text - -.global SetVideoMode -.type SetVideoMode, @function -SetVideoMode: - addiu $sp, -4 - sw $ra, 0($sp) - - jal ReadGPUstat - nop - - srl $a1, $v0, 17 - andi $a1, 0x1f - - srl $v1, $v0, 14 # Reverse flag - andi $v1, 1 - sll $v1, 6 - or $a1, $v1 - - srl $v1, $v0, 16 # Horizontal resolution 2 - andi $v1, 1 - sll $v1, 6 - or $a1, $v1 - - andi $a1, 0xf7 # Mask off PAL bit - - la $v0, _gpu_standard - beqz $a0, .Lset_done - sw $0 , 0($v0) - li $v1, 1 - sw $v1, 0($v0) - b .Lset_done - or $a1, 0x8 -.Lset_done: - - lui $v0, 0x800 # Apply new mode - or $a1, $v0 - lui $v0, IOBASE - sw $a1, GPU_GP1($v0) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - diff --git a/libpsn00b/psxgpu/storeimage.s b/libpsn00b/psxgpu/storeimage.s deleted file mode 100644 index 554e83c..0000000 --- a/libpsn00b/psxgpu/storeimage.s +++ /dev/null @@ -1,76 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.set RECT_x, 0 -.set RECT_y, 2 -.set RECT_w, 4 -.set RECT_h, 6 - -.section .text - - -.global StoreImage -.type StoreImage, @function -StoreImage: - addiu $sp, -8 - sw $ra, 0($sp) - sw $s0, 4($sp) - - lui $s0, IOBASE # Set I/O segment base address - -.Lgpu_wait: # Wait for GPU to be ready for commands and DMA - jal ReadGPUstat - nop - srl $v0, 0x1a - andi $v0, 0x5 - li $v1, 5 - bne $v0, $v1, .Lgpu_wait - nop - - lui $v0, 0x400 # Set DMA direction to off - sw $v0, GPU_GP1($s0) - - lui $v0, 0x0100 # Clear GPU cache - sw $v0, GPU_GP0($s0) - - lui $v1, 0xc000 # Store image from VRAM - sw $v1, GPU_GP0($s0) - lw $v0, RECT_x($a0) # Set XY and dimensions of image - lw $v1, RECT_w($a0) - sw $v0, GPU_GP0($s0) - sw $v1, GPU_GP0($s0) - - lui $v0, 0x400 # Set DMA direction to VRAMtoCPU - ori $v0, 0x3 - sw $v0, GPU_GP1($s0) - - lhu $v0, RECT_w($a0) # Get rectangle size - lhu $v1, RECT_h($a0) - nop - mult $v0, $v1 # Calculate BCR value - mflo $v1 - srl $v1, 0x4 - sll $v1, 0x10 - ori $v1, 0x8 - - sw $a1, DMA2_MADR($s0) # Set DMA base address and transfer length - sw $v1, DMA2_BCR($s0) - -.Lgpu_wait_2: # Wait for GPU to be ready for commands and DMA - jal ReadGPUstat - nop - srl $v0, 27 - andi $v0, 0x1 - beqz $v0, .Lgpu_wait_2 - nop - - lui $v0, 0x100 # Start DMA transfer - ori $v0, 0x200 - sw $v0, DMA2_CHCR($s0) - - lw $ra, 0($sp) - lw $s0, 4($sp) - jr $ra - addiu $sp, 8 - diff --git a/libpsn00b/psxgpu/vsynccallback.s b/libpsn00b/psxgpu/vsynccallback.s deleted file mode 100644 index 4be29c8..0000000 --- a/libpsn00b/psxgpu/vsynccallback.s +++ /dev/null @@ -1,25 +0,0 @@ -.set noreorder - -.section .text - -.global VSyncCallback -.type VSyncCallback, @function -VSyncCallback: - addiu $sp, -8 - sw $ra, 0($sp) - - jal EnterCriticalSection - sw $a0, 4($sp) - - lw $a0, 4($sp) - la $v0, _vsync_cb_func - sw $a0, 0($v0) - - jal ExitCriticalSection - nop - - lw $ra, 0($sp) - addiu $sp, 8 - jr $ra - nop - diff --git a/libpsn00b/psxgte/applymatrixlv.s b/libpsn00b/psxgte/applymatrixlv.s index 332a2f8..3180d0f 100644 --- a/libpsn00b/psxgte/applymatrixlv.s +++ b/libpsn00b/psxgte/applymatrixlv.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/compmatrixlv.s b/libpsn00b/psxgte/compmatrixlv.s index 95da5e9..2908eb9 100644 --- a/libpsn00b/psxgte/compmatrixlv.s +++ b/libpsn00b/psxgte/compmatrixlv.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .set MATRIX_r11r12, 0 .set MATRIX_r13r21, 4 diff --git a/libpsn00b/psxgte/initgeom.s b/libpsn00b/psxgte/initgeom.s index 14ca293..d004ecc 100644 --- a/libpsn00b/psxgte/initgeom.s +++ b/libpsn00b/psxgte/initgeom.s @@ -1,6 +1,6 @@ .set noreorder -.include "gtereg.h" +.include "gtereg.inc" .section .text diff --git a/libpsn00b/psxgte/mulmatrix.s b/libpsn00b/psxgte/mulmatrix.s index 19dabe8..08c79c2 100644 --- a/libpsn00b/psxgte/mulmatrix.s +++ b/libpsn00b/psxgte/mulmatrix.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/mulmatrix0.s b/libpsn00b/psxgte/mulmatrix0.s index 874226b..c2fd859 100644 --- a/libpsn00b/psxgte/mulmatrix0.s +++ b/libpsn00b/psxgte/mulmatrix0.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/pushpopmatrix.s b/libpsn00b/psxgte/pushpopmatrix.s index d10687a..ca6b992 100644 --- a/libpsn00b/psxgte/pushpopmatrix.s +++ b/libpsn00b/psxgte/pushpopmatrix.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/readme.txt b/libpsn00b/psxgte/readme.txt index 13067ee..3242474 100644 --- a/libpsn00b/psxgte/readme.txt +++ b/libpsn00b/psxgte/readme.txt @@ -21,9 +21,9 @@ Library developer(s): Library header(s): - gtereg.h + gtereg.inc inline_c.h - inline_s.h + inline_s.inc psxgte.h diff --git a/libpsn00b/psxgte/scalematrix.s b/libpsn00b/psxgte/scalematrix.s index 3e83800..1b2b6dd 100644 --- a/libpsn00b/psxgte/scalematrix.s +++ b/libpsn00b/psxgte/scalematrix.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/scalematrixl.s b/libpsn00b/psxgte/scalematrixl.s index 014b85a..53c2d14 100644 --- a/libpsn00b/psxgte/scalematrixl.s +++ b/libpsn00b/psxgte/scalematrixl.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/square0.s b/libpsn00b/psxgte/square0.s index d037b7e..a8ca107 100644 --- a/libpsn00b/psxgte/square0.s +++ b/libpsn00b/psxgte/square0.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/squareroot.s b/libpsn00b/psxgte/squareroot.s index a038e18..72198b4 100644 --- a/libpsn00b/psxgte/squareroot.s +++ b/libpsn00b/psxgte/squareroot.s @@ -1,7 +1,7 @@ .set noreorder -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxgte/vectornormals.s b/libpsn00b/psxgte/vectornormals.s index 939c4d1..85e94e6 100644 --- a/libpsn00b/psxgte/vectornormals.s +++ b/libpsn00b/psxgte/vectornormals.s @@ -1,8 +1,8 @@ .set noreorder .set noat -.include "gtereg.h" -.include "inline_s.h" +.include "gtereg.inc" +.include "inline_s.inc" .section .text diff --git a/libpsn00b/psxpress/README.md b/libpsn00b/psxpress/README.md new file mode 100644 index 0000000..a894874 --- /dev/null +++ b/libpsn00b/psxpress/README.md @@ -0,0 +1,42 @@ + +# PSn00bSDK MDEC library + +This is a fully open source reimplementation of the official SDK's "data +compression" library. This library is made up of two parts, the MDEC API and +functions to decompress Huffman-encoded bitstreams (.BS files, or frames in +.STR files) into data to be fed to the MDEC. FMV playback is not part of this +library (nor the official one) per se, but can implemented by using these APIs +alongside some code to stream data from the CD drive. + +**Currently only version 1 and 2 bitstreams are supported**. + +## MDEC API + +The MDEC data input/output API is almost identical to the official one, with +only two minor differences: + +- `DecDCTPutEnv()` takes a second argument specifying whether the MDEC shall be + put into monochrome or color mode (the original API only supported color). +- A `DecDCTinRaw()` function was added for easier feeding of headerless data + buffers to the MDEC. This function does not affect how `DecDCTin()` works. + +## Decompression API + +The following functions are currently provided: + +- `DecDCTvlcStart()`, `DecDCTvlcContinue()`: a decompressor implementation that + uses a small (<1 KB) lookup table and leverages the GTE, written in assembly. + `DecDCTvlcCopyTable()` can optionally be called to temporarily move the table + to the scratchpad region to improve decompression speed. +- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: a different implementation using + a large (34 KB) lookup table in main RAM, written in C. The table must be + decompressed ahead of time using `DecDCTvlcBuild()`, but can be deallocated + when no longer needed. +- `DecDCTvlc()`, `DecDCTvlc2()`: wrappers around the functions listed above, + for compatibility with the Sony SDK. Using them is not recommended. + +## SPU ADPCM encoding API + +The Sony library has functions that can be used to convert raw 16-bit PCM audio +data to SPU ADPCM. These are currently unimplemented due to their limited +usefulness, but might be added at some point. diff --git a/libpsn00b/psxpress/generate_lookup_table.py b/libpsn00b/psxpress/generate_lookup_table.py new file mode 100644 index 0000000..b40771f --- /dev/null +++ b/libpsn00b/psxpress/generate_lookup_table.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +# Huffman lookup table generator script for psxpress +# (C) 2022 spicyjpeg - MPL licensed + +import sys, json +from array import array +from itertools import repeat +from argparse import ArgumentParser, FileType + +HUFFMAN_TREE = { + "10": 0xfe00, # End of block + "11": ( 0, 1 ), + "01": { + "1": ( 1, 1 ), + "00": ( 0, 2 ), + "01": ( 2, 1 ) + }, + "001": { + "01": ( 0, 3 ), + "10": ( 4, 1 ), + "11": ( 3, 1 ), + "00000": ( 13, 1 ), + "00001": ( 0, 6 ), + "00010": ( 12, 1 ), + "00011": ( 11, 1 ), + "00100": ( 3, 2 ), + "00101": ( 1, 3 ), + "00110": ( 0, 5 ), + "00111": ( 10, 1 ) + }, + "0001": { + "00": ( 7, 1 ), + "01": ( 6, 1 ), + "10": ( 1, 2 ), + "11": ( 5, 1 ) + }, + "00001": { + "00": ( 2, 2 ), + "01": ( 9, 1 ), + "10": ( 0, 4 ), + "11": ( 8, 1 ) + }, + "0000001": { + "000": ( 16, 1 ), + "001": ( 5, 2 ), + "010": ( 0, 7 ), + "011": ( 2, 3 ), + "100": ( 1, 4 ), + "101": ( 15, 1 ), + "110": ( 14, 1 ), + "111": ( 4, 2 ) + }, + "00000001": { + "0000": ( 0, 11 ), + "0001": ( 8, 2 ), + "0010": ( 4, 3 ), + "0011": ( 0, 10 ), + "0100": ( 2, 4 ), + "0101": ( 7, 2 ), + "0110": ( 21, 2 ), + "0111": ( 20, 1 ), + "1000": ( 0, 9 ), + "1001": ( 19, 1 ), + "1010": ( 18, 1 ), + "1011": ( 1, 5 ), + "1100": ( 3, 3 ), + "1101": ( 0, 8 ), + "1110": ( 6, 2 ), + "1111": ( 17, 1 ) + }, + "000000001": { + "0000": ( 10, 2 ), + "0001": ( 9, 2 ), + "0010": ( 5, 3 ), + "0011": ( 3, 4 ), + "0100": ( 2, 5 ), + "0101": ( 1, 7 ), + "0110": ( 1, 6 ), + "0111": ( 0, 15 ), + "1000": ( 0, 14 ), + "1001": ( 0, 13 ), + "1010": ( 0, 12 ), + "1011": ( 26, 1 ), + "1100": ( 25, 1 ), + "1101": ( 24, 1 ), + "1110": ( 23, 1 ), + "1111": ( 22, 1 ) + }, + "0000000001": { + "0000": ( 0, 31 ), + "0001": ( 0, 30 ), + "0010": ( 0, 29 ), + "0011": ( 0, 28 ), + "0100": ( 0, 27 ), + "0101": ( 0, 26 ), + "0110": ( 0, 25 ), + "0111": ( 0, 24 ), + "1000": ( 0, 23 ), + "1001": ( 0, 22 ), + "1010": ( 0, 21 ), + "1011": ( 0, 20 ), + "1100": ( 0, 19 ), + "1101": ( 0, 18 ), + "1110": ( 0, 17 ), + "1111": ( 0, 16 ) + }, + "00000000001": { + "0000": ( 0, 40 ), + "0001": ( 0, 39 ), + "0010": ( 0, 38 ), + "0011": ( 0, 37 ), + "0100": ( 0, 36 ), + "0101": ( 0, 35 ), + "0110": ( 0, 34 ), + "0111": ( 0, 33 ), + "1000": ( 0, 32 ), + "1001": ( 1, 14 ), + "1010": ( 1, 13 ), + "1011": ( 1, 12 ), + "1100": ( 1, 11 ), + "1101": ( 1, 10 ), + "1110": ( 1, 9 ), + "1111": ( 1, 8 ) + }, + "000000000001": { + "0000": ( 1, 18 ), + "0001": ( 1, 17 ), + "0010": ( 1, 16 ), + "0011": ( 1, 15 ), + "0100": ( 6, 3 ), + "0101": ( 16, 2 ), + "0110": ( 15, 2 ), + "0111": ( 14, 2 ), + "1000": ( 13, 2 ), + "1001": ( 12, 2 ), + "1010": ( 11, 2 ), + "1011": ( 31, 1 ), + "1100": ( 30, 1 ), + "1101": ( 29, 1 ), + "1110": ( 28, 1 ), + "1111": ( 27, 1 ) + } +} + +## Utilities + +def to_int10(value): + clamped = min(max(int(value), -0x200), 0x1ff) + return clamped + (0 if clamped >= 0 else 0x400) + +def uint32_to_lines(data, indent = "\t", columns = 6): + for offset in range(0, len(data), columns): + line = f"{indent}0x{data[offset]:08x}" + + for item in data[(offset + 1):(offset + columns)]: + line += f", 0x{item:08x}" + + yield line + +## Table generation + +def iterate_tree(tree): + for code, value in tree.items(): + if type(value) is dict: + # Iterate through any subtree recursively. + for suffix, _value in iterate_tree(value): + yield f"{code}{suffix}", _value + + elif type(value) is tuple: + run_length, ac = value + yield f"{code}0", (run_length << 10) | to_int10(ac) + yield f"{code}1", (run_length << 10) | to_int10(-ac) + + else: + yield code, value + +def generate_table(codes, table_bits, prefix_bits = 0): + table = array("I", repeat(0, 2 ** table_bits)) + + for code, value in codes: + used_bits = len(code) + free_bits = table_bits - (used_bits - prefix_bits) + index = int(code[prefix_bits:], 2) << free_bits + + # Fill out every entry in the table whose index starts with the same + # string of bits. + for combo in range(2 ** free_bits): + table[index | combo] = (used_bits << 16) | value + + return table + +def compress_table(table): + values = [] + last_value = table[0] + run_length = 0 + + for value in table[1:]: + if value == last_value and run_length < 0x7ff: + run_length += 1 + continue + + # The run length is stored in the top 11 bits of each value, which are + # otherwise unused. + values.append((run_length << 21) | last_value) + last_value = value + run_length = 0 + + values.append((run_length << 21) | last_value) + return array("I", values) + +## Main + +UNCOMPRESSED_TEMPLATE = """static const DECDCTTAB {name} = {{ + .lut = {{ +{short} + }}, + .lut00 = {{ +{long} + }} +}}; +""" +COMPRESSED_TEMPLATE = """static const uint32_t {name}[{length}] = {{ +{table} +}}; +""" + +def get_args(): + parser = ArgumentParser( + description = "Generates a Huffman lookup table structure, to be used by DecDCTvlc2()." + ) + parser.add_argument( + "-c", "--compress", + action = "store_true", + help = "generate run-length compressed data instead of a DECDCTTAB struct" + ) + parser.add_argument( + "-n", "--name", + type = str, + default = "_default_huffman_table", + help = "set the symbol name in the generated C source", + metavar = "file" + ) + parser.add_argument( + "-t", "--tree", + type = FileType("rt"), + help = "use a custom Huffman tree from the specified JSON file", + metavar = "json_file" + ) + parser.add_argument( + "-o", "--output", + type = FileType("wt"), + default = sys.stdout, + help = "where to output generated table (stdout by default)", + metavar = "file" + ) + + return parser.parse_args() + +def main(): + args = get_args() + tree = json.load(args.tree) if args.tree else HUFFMAN_TREE + + short_codes, short_bits = [], 0 + long_codes, long_bits = [], 0 + + for pair in iterate_tree(tree): + if (code := pair[0]).startswith("00000000"): + long_codes.append(pair) + long_bits = max(long_bits, len(code) - 8) + else: + short_codes.append(pair) + short_bits = max(short_bits, len(code)) + + short_table = generate_table(short_codes, short_bits, 0) + long_table = generate_table(long_codes, long_bits, 8) + + if args.compress: + short_table.extend(long_table) + table = compress_table(short_table) + + source = COMPRESSED_TEMPLATE.format( + name = args.name, + length = len(table), + table = ",\n".join(uint32_to_lines(table, "\t")) + ) + else: + source = UNCOMPRESSED_TEMPLATE.format( + name = args.name, + short = ",\n".join(uint32_to_lines(short_table, "\t\t")), + long = ",\n".join(uint32_to_lines(long_table, "\t\t")) + ) + + with args.output as _file: + _file.write(source) + +if __name__ == "__main__": + main() diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index ca4c75a..9c82d6b 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -9,7 +9,8 @@ #include <psxpress.h> #include <hwregs_c.h> -#define MDEC_SYNC_TIMEOUT 0x1000000 +#define DMA_CHUNK_LENGTH 32 +#define MDEC_SYNC_TIMEOUT 0x1000000 /* Default IDCT matrix and quantization tables */ @@ -82,7 +83,7 @@ static const DECDCTENV _default_mdec_env = { /* Public API */ -void DecDCTReset(int32_t mode) { +void DecDCTReset(int mode) { EnterCriticalSection(); DMA_DPCR |= 0x000000bb; // Enable DMA0 and DMA1 @@ -96,7 +97,7 @@ void DecDCTReset(int32_t mode) { DecDCTPutEnv(0, 0); } -void DecDCTPutEnv(const DECDCTENV *env, int32_t mono) { +void DecDCTPutEnv(const DECDCTENV *env, int mono) { const DECDCTENV *_env = env ? env : &_default_mdec_env; DecDCTinSync(0); @@ -109,14 +110,14 @@ void DecDCTPutEnv(const DECDCTENV *env, int32_t mono) { DecDCTinSync(0); } -void DecDCTin(const uint32_t *data, int32_t mode) { +void DecDCTin(const uint32_t *data, int mode) { uint32_t header = *data; if (mode == DECDCT_MODE_RAW) MDEC0 = header; else if (mode & DECDCT_MODE_24BPP) - MDEC0 = header | 0x30000000; + MDEC0 = 0x30000000 | (header & 0xffff); else - MDEC0 = header | 0x38000000 | ((mode & 2) << 24); // Bit 25 = mask + MDEC0 = 0x38000000 | (header & 0xffff) | ((mode & 2) << 24); // Bit 25 = mask DecDCTinRaw((const uint32_t *) &(data[1]), header & 0xffff); } @@ -125,20 +126,25 @@ void DecDCTin(const uint32_t *data, int32_t mode) { // data length as an argument rather than parsing it from the first 4 bytes of // the stream. void DecDCTinRaw(const uint32_t *data, size_t length) { + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { + printf("psxmdec: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + length += DMA_CHUNK_LENGTH - 1; + } + DMA_MADR(0) = (uint32_t) data; - if (length < 32) + if (length < DMA_CHUNK_LENGTH) DMA_BCR(0) = 0x00010000 | length; else - DMA_BCR(0) = 0x00000020 | ((length / 32) << 16); + DMA_BCR(0) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); DMA_CHCR(0) = 0x01000201; } -int32_t DecDCTinSync(int32_t mode) { +int DecDCTinSync(int mode) { if (mode) return (MDEC1 >> 29) & 1; - for (uint32_t i = MDEC_SYNC_TIMEOUT; i; i--) { + for (int i = MDEC_SYNC_TIMEOUT; i; i--) { if (!(MDEC1 & (1 << 29))) return 0; } @@ -151,19 +157,19 @@ void DecDCTout(uint32_t *data, size_t length) { DecDCToutSync(0); DMA_MADR(1) = (uint32_t) data; - if (length < 32) + if (length < DMA_CHUNK_LENGTH) DMA_BCR(1) = 0x00010000 | length; else - DMA_BCR(1) = 0x00000020 | ((length / 32) << 16); + DMA_BCR(1) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); DMA_CHCR(1) = 0x01000200; } -int32_t DecDCToutSync(int32_t mode) { +int DecDCToutSync(int mode) { if (mode) return (DMA_CHCR(1) >> 24) & 1; - for (uint32_t i = MDEC_SYNC_TIMEOUT; i; i--) { + for (int i = MDEC_SYNC_TIMEOUT; i; i--) { if (!(DMA_CHCR(1) & (1 << 24))) return 0; } diff --git a/libpsn00b/psxpress/vlc.c b/libpsn00b/psxpress/vlc.c new file mode 100644 index 0000000..4e3e283 --- /dev/null +++ b/libpsn00b/psxpress/vlc.c @@ -0,0 +1,130 @@ +/* + * PSn00bSDK MDEC library (support code for the main VLC decompressor) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <psxpress.h> + +/* Huffman code lookup table */ + +#define _val1(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff)) +#define _val2(rl, dc, len) (_val1(rl, dc) | (len << 16)) + +#define _pair(rl, dc) _val1(rl, dc), _val1(rl, -(dc)) +#define _pair2(rl, dc, len) _val2(rl, dc, len), _val2(rl, -(dc), len) +#define _pair3(rl, dc, len) \ + _val2(rl, dc, len), _val2(rl, dc, len), \ + _val2(rl, -(dc), len), _val2(rl, -(dc), len) +#define _pair4(rl, dc, len) \ + _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ + _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ + _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), \ + _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len) + +// This table isn't compressed since it makes no sense to compress less than a +// kilobyte's worth of data. +static const DECDCTTAB _default_huffman_table = { + .lut0 = { + // 11 x + _pair( 0, 1) + }, + .lut2 = { + // 01 0xx + _pair2( 0, 2, 5), _pair2( 2, 1, 5), + // 01 1x- + _pair3( 1, 1, 4) + }, + .lut3 = { + // 001 00xxxx + _pair2(13, 1, 9), _pair2( 0, 6, 9), _pair2(12, 1, 9), _pair2(11, 1, 9), + _pair2( 3, 2, 9), _pair2( 1, 3, 9), _pair2( 0, 5, 9), _pair2(10, 1, 9), + // 001 xxx--- + _pair4( 0, 3, 6), _pair4( 4, 1, 6), _pair4( 3, 1, 6) + }, + .lut4 = { + // 0001 xxx + _pair( 7, 1), _pair( 6, 1), _pair( 1, 2), _pair( 5, 1) + }, + .lut5 = { + // 00001 xxx + _pair( 2, 2), _pair( 9, 1), _pair( 0, 4), _pair( 8, 1) + }, + .lut7 = { + // 0000001 xxxx + _pair(16, 1), _pair( 5, 2), _pair( 0, 7), _pair( 2, 3), + _pair( 1, 4), _pair(15, 1), _pair(14, 1), _pair( 4, 2) + }, + .lut8 = { + // 00000001 xxxxx + _pair( 0, 11), _pair( 8, 2), _pair( 4, 3), _pair( 0, 10), + _pair( 2, 4), _pair( 7, 2), _pair(21, 1), _pair(20, 1), + _pair( 0, 9), _pair(19, 1), _pair(18, 1), _pair( 1, 5), + _pair( 3, 3), _pair( 0, 8), _pair( 6, 2), _pair(17, 1) + }, + .lut9 = { + // 000000001 xxxxx + _pair(10, 2), _pair( 9, 2), _pair( 5, 3), _pair( 3, 4), + _pair( 2, 5), _pair( 1, 7), _pair( 1, 6), _pair( 0, 15), + _pair( 0, 14), _pair( 0, 13), _pair( 0, 12), _pair(26, 1), + _pair(25, 1), _pair(24, 1), _pair(23, 1), _pair(22, 1) + }, + .lut10 = { + // 0000000001 xxxxx + _pair( 0, 31), _pair( 0, 30), _pair( 0, 29), _pair( 0, 28), + _pair( 0, 27), _pair( 0, 26), _pair( 0, 25), _pair( 0, 24), + _pair( 0, 23), _pair( 0, 22), _pair( 0, 21), _pair( 0, 20), + _pair( 0, 19), _pair( 0, 18), _pair( 0, 17), _pair( 0, 16) + }, + .lut11 = { + // 00000000001 xxxxx + _pair( 0, 40), _pair( 0, 39), _pair( 0, 38), _pair( 0, 37), + _pair( 0, 36), _pair( 0, 35), _pair( 0, 34), _pair( 0, 33), + _pair( 0, 32), _pair( 1, 14), _pair( 1, 13), _pair( 1, 12), + _pair( 1, 11), _pair( 1, 10), _pair( 1, 9), _pair( 1, 8) + }, + .lut12 = { + // 000000000001 xxxxx + _pair( 1, 18), _pair( 1, 17), _pair( 1, 16), _pair( 1, 15), + _pair( 6, 3), _pair(16, 2), _pair(15, 2), _pair(14, 2), + _pair(13, 2), _pair(12, 2), _pair(11, 2), _pair(31, 1), + _pair(30, 1), _pair(29, 1), _pair(28, 1), _pair(27, 1) + } +}; + +/* Internal globals */ + +// Note that DecDCTvlc() and DecDCTvlc2() do *not* share the same variables. +static VLC_Context _default_context; +static size_t _max_buffer_size = 0; + +const DECDCTTAB *_vlc_huffman_table = &_default_huffman_table; + +/* Stateful VLC decoder API (for Sony SDK compatibility) */ + +int DecDCTvlc(const uint32_t *bs, uint32_t *buf) { + if (bs) + return DecDCTvlcStart(&_default_context, buf, _max_buffer_size, bs); + else + return DecDCTvlcContinue(&_default_context, buf, _max_buffer_size); +} + +size_t DecDCTvlcSize(size_t size) { + size_t old_size = _max_buffer_size; + _max_buffer_size = size; + + return old_size; +} + +/* Lookup table relocation API */ + +void DecDCTvlcCopyTable(DECDCTTAB *addr) { + if (addr) { + _vlc_huffman_table = addr; + memcpy(addr, &_default_huffman_table, sizeof(DECDCTTAB)); + } else { + _vlc_huffman_table = &_default_huffman_table; + } +} diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s new file mode 100644 index 0000000..fe51642 --- /dev/null +++ b/libpsn00b/psxpress/vlc.s @@ -0,0 +1,404 @@ +# PSn00bSDK MDEC library (GTE-accelerated VLC decompressor) +# (C) 2022 spicyjpeg - MPL licensed +# +# Register map: +# - $a0 = ctx +# - $a1 = output +# - $a2 = max_size +# - $a3 = input +# - $t0 = window +# - $t1 = next_window +# - $t2 = remaining +# - $t3 = quant_scale +# - $t4 = is_v3 +# - $t5 = bit_offset +# - $t6 = block_index +# - $t7 = coeff_index +# - $t8 = _vlc_huffman_table +# - $t9 = &ac_jump_area + +.set noreorder + +.set VLC_Context_input, 0 +.set VLC_Context_window, 4 +.set VLC_Context_next_window, 8 +.set VLC_Context_remaining, 12 +.set VLC_Context_quant_scale, 16 +.set VLC_Context_is_v3, 18 +.set VLC_Context_bit_offset, 19 +.set VLC_Context_block_index, 20 +.set VLC_Context_coeff_index, 21 + +.set DECDCTSMALLTAB_lut0, 0 +.set DECDCTSMALLTAB_lut2, 4 +.set DECDCTSMALLTAB_lut3, 36 +.set DECDCTSMALLTAB_lut4, 292 +.set DECDCTSMALLTAB_lut5, 308 +.set DECDCTSMALLTAB_lut7, 324 +.set DECDCTSMALLTAB_lut8, 356 +.set DECDCTSMALLTAB_lut9, 420 +.set DECDCTSMALLTAB_lut10, 484 +.set DECDCTSMALLTAB_lut11, 548 +.set DECDCTSMALLTAB_lut12, 612 + +.section .text.DecDCTvlcStart +.global DecDCTvlcStart +.type DecDCTvlcStart, @function +DecDCTvlcStart: + # Create a new context on-the-fly without writing it to memory then jump + # into DecDCTvlcContinue(), skipping context loading. + lw $t0, 8($a3) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) + nop + srl $v0, $t0, 16 + sll $t0, 16 + + lw $t1, 12($a3) # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) + or $t0, $v0 + srl $v0, $t1, 16 + sll $t1, 16 + + lhu $t2, 0($a3) # remaining = bs->uncomp_length * 2 + or $t1, $v0 + + lhu $t3, 4($a3) # quant_scale = (bs->quant_scale & 63) << 10 + sll $t2, 1 + andi $t3, 63 + + lhu $t4, 6($a3) # is_v3 = !(bs->version < 3) + sll $t3, 10 + sltiu $t4, $t4, 3 + xori $t4, 1 + + li $t5, 32 # bit_offset = 32 + li $t6, 5 # block_index = 5 + li $t7, 0 # coeff_index = 0 + j _vlc_skip_context_load + addiu $a3, 16 # input = &(bs->data[2]) + +.section .text.DecDCTvlcContinue +.global DecDCTvlcContinue +.type DecDCTvlcContinue, @function +DecDCTvlcContinue: + lw $a3, VLC_Context_input($a0) + lw $t0, VLC_Context_window($a0) + lw $t1, VLC_Context_next_window($a0) + lw $t2, VLC_Context_remaining($a0) + lhu $t3, VLC_Context_quant_scale($a0) + lb $t4, VLC_Context_is_v3($a0) + lb $t5, VLC_Context_bit_offset($a0) + lb $t6, VLC_Context_block_index($a0) + lb $t7, VLC_Context_coeff_index($a0) + +_vlc_skip_context_load: + # Determine how many bytes to output. This whole block of code basically + # does this: + # max_size = min((max_size - 1) * 2, remaining) + # remaining -= max_size + bgtz $a2, .Lmax_size_valid # if (max_size <= 0) max_size = 0x7ffe0000 + addiu $a2, -1 # else max_size = (max_size - 1) * 2 + lui $a2, 0x3fff +.Lmax_size_valid: + sll $a2, 1 + + blt $a2, $t2, .Lmax_size_ok # if (max_size > remaining) max_size = remaining + lui $v1, 0x3800 + move $a2, $t2 +.Lmax_size_ok: + subu $t2, $a2 # remaining -= max_size + + # Write the length of the data that will be decoded to first 4 bytes of the + # output buffer, which will be then parsed by DecDCTin(). + srl $v0, $a2, 1 # output[0] = 0x38000000 | (max_size / 2) + or $v0, $v1 + sw $v0, 0($a1) + + # Obtain the addresses of the lookup table and jump area in advance so that + # they don't have to be retrieved for each coefficient decoded. + lw $t8, _vlc_huffman_table + la $t9, .Lac_jump_area + + beqz $a2, .Lstop_processing + addiu $a1, 4 # output = (uint16_t *) &output[1] + +.Lprocess_next_code_loop: # while (max_size) + # This is the "hot" part of the decoder, executed for each code in the + # bitstream. The first step is to determine if the next code is a DC or AC + # coefficient. The GTE is also given the task of counting the number of + # leading zeroes/ones, which takes 2 more cycles. + bnez $t7, .Lprocess_ac_coefficient + mtc2 $t0, $30 + bnez $t4, .Lprocess_dc_v3_coefficient + #nop + +.Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) + # The DC coefficient in version 2 frames is not compressed. + srl $v0, $t0, 22 # *output = (window >> (32 - 10)) | quant_scale + or $v0, $t3 + addiu $t7, 1 # coeff_index++ + sll $t0, 10 # window <<= 10 + addiu $t5, -10 # bit_offset -= 10 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) + # TODO: version 3 is currently not supported. + jr $ra + li $v0, -1 + #b .Lwrite_value + +.Lprocess_ac_coefficient: # if (coeff_index) + # Check whether the prefix code is one of the shorter, more common ones. + srl $v0, $t0, 30 + li $v1, 3 + beq $v0, $v1, .Lac_prefix_11 + li $v1, 2 + beq $v0, $v1, .Lac_prefix_10 + li $v1, 1 + beq $v0, $v1, .Lac_prefix_01 + #srl $v0, $t0, 29 + #beq $v0, $v1, .Lac_prefix_001 + #nop + + # If the code is longer, retrieve the number of leading zeroes from the GTE + # and use it as an index into the jump area. Each block in the area is 8 + # instructions long and handles decoding a specific prefix. + mfc2 $v0, $31 + nop + andi $v0, 15 # jump_addr = &ac_jump_area[(prefix % 16) * 8 * sizeof(u32)] + sll $v0, 5 + addu $v0, $t9 + jr $v0 + nop + +.Lac_prefix_11: + # Prefix 11 is followed by a single bit. + srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16) + andi $v0, 2 + addu $v0, $t8 # value = table->lut0[index] + lhu $v0, DECDCTSMALLTAB_lut0($v0) + sll $t0, 3 # window <<= 3 + addiu $t5, -3 # bit_offset -= 3 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_jump_area: +.Lac_prefix_10: + # Prefix 10 marks the end of a block. + li $v0, 0xfe00 # value = 0xfe00 + sll $t0, 2 # window <<= 2 + addiu $t5, -2 # bit_offset -= 2 + addiu $t6, -1 # block_index-- + bgez $t6, .Lwrite_value + li $t7, 0 # coeff_index = 0 + b .Lwrite_value + li $t6, 5 # if (block_index < 0) block_index = 5 + +.Lac_prefix_01: + # Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a + # 3-bit lookup index starting with 0. A 32-bit lookup table is used, + # containing both MDEC codes and lengths. + srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32) + andi $v0, 28 + addu $v0, $t8 # value = table->lut2[index] + lw $v0, DECDCTSMALLTAB_lut2($v0) + addiu $t7, 1 # coeff_index++ + b .Lupdate_window_and_write + srl $v1, $v0, 16 # length = value >> 16 + .word 0 + +.Lac_prefix_001: + # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a + # 3-bit lookup index starting with 01/10/11. + srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32) + andi $v0, 252 + addu $v0, $t8 # value = table->lut3[index] + lw $v0, DECDCTSMALLTAB_lut3($v0) + addiu $t7, 1 # coeff_index++ + b .Lupdate_window_and_write + srl $v1, $v0, 16 # length = value >> 16 + .word 0 + +.Lac_prefix_0001: + # Prefix 0001 is followed by a 3-bit lookup index. + srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16) + andi $v0, 14 + addu $v0, $t8 # value = table->lut4[index] + lhu $v0, DECDCTSMALLTAB_lut4($v0) + sll $t0, 7 # window <<= 4 + 3 + addiu $t5, -7 # bit_offset -= 4 + 3 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_00001: + # Prefix 00001 is followed by a 3-bit lookup index. + srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16) + andi $v0, 14 + addu $v0, $t8 # value = table->lut5[index] + lhu $v0, DECDCTSMALLTAB_lut5($v0) + sll $t0, 8 # window <<= 5 + 3 + addiu $t5, -8 # bit_offset -= 5 + 3 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_000001: + # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. + srl $v0, $t0, 10 # value = window >> (32 - 6 - 16) + sll $t0, 22 # window <<= 6 + 16 + addiu $t5, -22 # bit_offset -= 6 + 16 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + .word 0, 0, 0 + +.Lac_prefix_0000001: + # Prefix 0000001 is followed by a 4-bit lookup index. + srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16) + andi $v0, 30 + addu $v0, $t8 # value = table->lut7[index] + lhu $v0, DECDCTSMALLTAB_lut7($v0) + sll $t0, 11 # window <<= 7 + 4 + addiu $t5, -11 # bit_offset -= 7 + 4 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_00000001: + # Prefix 00000001 is followed by a 5-bit lookup index. + srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16) + andi $v0, 62 + addu $v0, $t8 # value = table->lut8[index] + lhu $v0, DECDCTSMALLTAB_lut8($v0) + sll $t0, 13 # window <<= 8 + 5 + addiu $t5, -13 # bit_offset -= 8 + 5 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_000000001: + # Prefix 000000001 is followed by a 5-bit lookup index. + srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16) + andi $v0, 62 + addu $v0, $t8 # value = table->lut9[index] + lhu $v0, DECDCTSMALLTAB_lut9($v0) + sll $t0, 14 # window <<= 9 + 5 + addiu $t5, -14 # bit_offset -= 9 + 5 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_0000000001: + # Prefix 0000000001 is followed by a 5-bit lookup index. + srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16) + andi $v0, 62 + addu $v0, $t8 # value = table->lut10[index] + lhu $v0, DECDCTSMALLTAB_lut10($v0) + sll $t0, 15 # window <<= 10 + 5 + addiu $t5, -15 # bit_offset -= 10 + 5 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_00000000001: + # Prefix 00000000001 is followed by a 5-bit lookup index. + srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16) + andi $v0, 62 + addu $v0, $t8 # value = table->lut11[index] + lhu $v0, DECDCTSMALLTAB_lut11($v0) + sll $t0, 16 # window <<= 11 + 5 + addiu $t5, -16 # bit_offset -= 11 + 5 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + +.Lac_prefix_000000000001: + # Prefix 000000000001 is followed by a 5-bit lookup index. + srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16) + andi $v0, 62 + addu $v0, $t8 # value = table->lut12[index] + lhu $v0, DECDCTSMALLTAB_lut12($v0) + sll $t0, 17 # window <<= 12 + 5 + addiu $t5, -17 # bit_offset -= 12 + 5 + b .Lwrite_value + addiu $t7, 1 # coeff_index++ + + # Prefix 0000000000001 is not valid. + beqz $t0, .Lstop_processing + nop + jr $ra + li $v0, -1 + .word 0, 0, 0, 0 + + # Prefix 00000000000001 is not valid. + beqz $t0, .Lstop_processing + nop + jr $ra + li $v0, -1 + .word 0, 0, 0, 0 + + # Prefix 000000000000001 is not valid. + beqz $t0, .Lstop_processing + nop + jr $ra + li $v0, -1 + .word 0, 0, 0, 0 + + # Prefix 0000000000000001 is not valid. + beqz $t0, .Lstop_processing + nop + jr $ra + li $v0, -1 + #.word 0, 0, 0, 0 + +.Lupdate_window_and_write: + sllv $t0, $t0, $v1 # window <<= length + subu $t5, $v1 # bit_offset -= length +.Lwrite_value: + sh $v0, 0($a1) +.Lfeed_bitstream: + # Update the window. This makes sure the next iteration of the loop will be + # able to read up to 32 bits from the bitstream. + bgez $t5, .Lskip_feeding # if (bit_offset < 0) + addiu $a2, -1 # max_size-- + + subu $v0, $0, $t5 # window = next_window << (-bit_offset) + sllv $t0, $t1, $v0 + lw $t1, 0($a3) # next_window = (*input << 16) | (*input >> 16) + addiu $t5, 32 # bit_offset += 32 + srl $v0, $t1, 16 + sll $t1, 16 + or $t1, $v0 + addiu $a3, 4 # input++ + +.Lskip_feeding: + srlv $v0, $t1, $t5 # window |= next_window >> bit_offset + or $t0, $v0 + + bnez $a2, .Lprocess_next_code_loop + addiu $a1, 2 # output++ + +.Lstop_processing: + # If remaining = 0, skip flushing the context, pad the output buffer with + # end-of-block codes if necessary and return 0. Otherwise flush the context + # and return 1. + beqz $t2, .Lpad_output_buffer + nop + + sw $a3, VLC_Context_input($a0) + sw $t0, VLC_Context_window($a0) + sw $t1, VLC_Context_next_window($a0) + sw $t2, VLC_Context_remaining($a0) + sh $t3, VLC_Context_quant_scale($a0) + sb $t4, VLC_Context_is_v3($a0) + sb $t5, VLC_Context_bit_offset($a0) + sb $t6, VLC_Context_block_index($a0) + sb $t7, VLC_Context_coeff_index($a0) + + jr $ra + li $v0, 1 + +.Lpad_output_buffer: + beqz $a2, .Lreturn_zero + li $v0, 0xfe00 +.Lpad_output_buffer_loop: # while (max_size) + sh $v0, 0($a1) # *output = 0xfe00 + addiu $a2, -1 # max_size-- + bnez $a2, .Lpad_output_buffer_loop + addiu $a1, 2 # output++ + +.Lreturn_zero: + jr $ra + li $v0, 0 diff --git a/libpsn00b/psxpress/vlc2.c b/libpsn00b/psxpress/vlc2.c new file mode 100644 index 0000000..73b54b2 --- /dev/null +++ b/libpsn00b/psxpress/vlc2.c @@ -0,0 +1,240 @@ +/* + * PSn00bSDK MDEC library (alternate VLC decompressor and support code) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <stddef.h> +#include <psxpress.h> + +#define _min(x, y) (((x) < (y)) ? (x) : (y)) + +/* Huffman code lookup table */ + +#define TABLE_LENGTH 226 + +// This table is run-length compressed, with the number of repetitions of each +// value stored in the upper 11 bits which would be otherwise unused. It is +// decompressed at runtime by DecDCTvlcBuild(). +static const uint32_t _compressed_table[TABLE_LENGTH] = { + 0x03e00000, 0x000d000b, 0x000d03f5, 0x000d2002, 0x000d23fe, 0x000d1003, + 0x000d13fd, 0x000d000a, 0x000d03f6, 0x000d0804, 0x000d0bfc, 0x000d1c02, + 0x000d1ffe, 0x000d5402, 0x000d57fe, 0x000d5001, 0x000d53ff, 0x000d0009, + 0x000d03f7, 0x000d4c01, 0x000d4fff, 0x000d4801, 0x000d4bff, 0x000d0405, + 0x000d07fb, 0x000d0c03, 0x000d0ffd, 0x000d0008, 0x000d03f8, 0x000d1802, + 0x000d1bfe, 0x000d4401, 0x000d47ff, 0x006b4001, 0x006b43ff, 0x006b1402, + 0x006b17fe, 0x006b0007, 0x006b03f9, 0x006b0803, 0x006b0bfd, 0x006b0404, + 0x006b07fc, 0x006b3c01, 0x006b3fff, 0x006b3801, 0x006b3bff, 0x006b1002, + 0x006b13fe, 0x0fe00000, 0x03e80802, 0x03e80bfe, 0x03e82401, 0x03e827ff, + 0x03e80004, 0x03e803fc, 0x03e82001, 0x03e823ff, 0x07e71c01, 0x07e71fff, + 0x07e71801, 0x07e71bff, 0x07e70402, 0x07e707fe, 0x07e71401, 0x07e717ff, + 0x01e93401, 0x01e937ff, 0x01e90006, 0x01e903fa, 0x01e93001, 0x01e933ff, + 0x01e92c01, 0x01e92fff, 0x01e90c02, 0x01e90ffe, 0x01e90403, 0x01e907fd, + 0x01e90005, 0x01e903fb, 0x01e92801, 0x01e92bff, 0x0fe60003, 0x0fe603fd, + 0x0fe61001, 0x0fe613ff, 0x0fe60c01, 0x0fe60fff, 0x1fe50002, 0x1fe503fe, + 0x1fe50801, 0x1fe50bff, 0x3fe40401, 0x3fe407ff, 0xffe2fe00, 0x7fe30001, + 0x7fe303ff, 0x03e00000, 0x00110412, 0x001107ee, 0x00110411, 0x001107ef, + 0x00110410, 0x001107f0, 0x0011040f, 0x001107f1, 0x00111803, 0x00111bfd, + 0x00114002, 0x001143fe, 0x00113c02, 0x00113ffe, 0x00113802, 0x00113bfe, + 0x00113402, 0x001137fe, 0x00113002, 0x001133fe, 0x00112c02, 0x00112ffe, + 0x00117c01, 0x00117fff, 0x00117801, 0x00117bff, 0x00117401, 0x001177ff, + 0x00117001, 0x001173ff, 0x00116c01, 0x00116fff, 0x00300028, 0x003003d8, + 0x00300027, 0x003003d9, 0x00300026, 0x003003da, 0x00300025, 0x003003db, + 0x00300024, 0x003003dc, 0x00300023, 0x003003dd, 0x00300022, 0x003003de, + 0x00300021, 0x003003df, 0x00300020, 0x003003e0, 0x0030040e, 0x003007f2, + 0x0030040d, 0x003007f3, 0x0030040c, 0x003007f4, 0x0030040b, 0x003007f5, + 0x0030040a, 0x003007f6, 0x00300409, 0x003007f7, 0x00300408, 0x003007f8, + 0x006f001f, 0x006f03e1, 0x006f001e, 0x006f03e2, 0x006f001d, 0x006f03e3, + 0x006f001c, 0x006f03e4, 0x006f001b, 0x006f03e5, 0x006f001a, 0x006f03e6, + 0x006f0019, 0x006f03e7, 0x006f0018, 0x006f03e8, 0x006f0017, 0x006f03e9, + 0x006f0016, 0x006f03ea, 0x006f0015, 0x006f03eb, 0x006f0014, 0x006f03ec, + 0x006f0013, 0x006f03ed, 0x006f0012, 0x006f03ee, 0x006f0011, 0x006f03ef, + 0x006f0010, 0x006f03f0, 0x00ee2802, 0x00ee2bfe, 0x00ee2402, 0x00ee27fe, + 0x00ee1403, 0x00ee17fd, 0x00ee0c04, 0x00ee0ffc, 0x00ee0805, 0x00ee0bfb, + 0x00ee0407, 0x00ee07f9, 0x00ee0406, 0x00ee07fa, 0x00ee000f, 0x00ee03f1, + 0x00ee000e, 0x00ee03f2, 0x00ee000d, 0x00ee03f3, 0x00ee000c, 0x00ee03f4, + 0x00ee6801, 0x00ee6bff, 0x00ee6401, 0x00ee67ff, 0x00ee6001, 0x00ee63ff, + 0x00ee5c01, 0x00ee5fff, 0x00ee5801, 0x00ee5bff +}; + +/* Internal globals */ + +// Note that DecDCTvlc() and DecDCTvlc2() do *not* share the same variables. +static VLC_Context _default_context; +static size_t _max_buffer_size = 0; + +const DECDCTTAB2 *_vlc_huffman_table2 = 0; + +/* VLC decoder */ + +#define _get_bits_unsigned(length) (((uint32_t) window) >> (32 - (length))) +#define _get_bits_signed(length) (((int32_t) window) >> (32 - (length))) +#define _advance_window(num) \ + window <<= (num); \ + bit_offset -= (num); + +int __attribute__((optimize(3))) DecDCTvlcContinue2( + VLC_Context *ctx, uint32_t *buf, size_t max_size +) { + const uint32_t *input = ctx->input; + uint32_t remaining = ctx->remaining; + uint32_t window = ctx->window; + uint32_t next_window = ctx->next_window; + uint16_t quant_scale = ctx->quant_scale; + int block_index = ctx->block_index; + int coeff_index = ctx->coeff_index; + int bit_offset = ctx->bit_offset; + int is_v3 = ctx->is_v3; + + //if (!_vlc_huffman_table2) + //return -1; + if (!max_size) + max_size = 0x7fffffff; + + // Write the length of the data that will be decoded to first 4 bytes of + // the output buffer, which will be then parsed by DecDCTin(). + max_size = _min((max_size - 1) * 2, remaining); + remaining -= max_size; + + *buf = 0x38000000 | (max_size / 2); + uint16_t *output = (uint16_t *) &buf[1]; + + for (; max_size; max_size--) { + uint32_t value; + + if (coeff_index) { + // Parse the next AC coefficient. Most codes are decompressed via + // the lookup table, however some need special handling. + if ((window >> 30) == 0b10) { + // Prefix 10 marks the end of a block. + *output = 0xfe00; + _advance_window(2); + + coeff_index = -1; + block_index++; + if (block_index > 5) + block_index = 0; + } else if ((window >> 26) == 0b000001) { + // Prefix 000001 is an escape code followed by a full 16-bit + // MDEC value. + *output = (uint16_t) _get_bits_unsigned(22); + _advance_window(22); + } else if (window >> 24) { + // The first lookup table is for codes that not start with + // 00000000. + value = _vlc_huffman_table2->lut[_get_bits_unsigned(13)]; + _advance_window(value >> 16); + *output = (uint16_t) value; + } else { + // If the code starts with 00000000, use the second lookup + // table. + value = _vlc_huffman_table2->lut00[_get_bits_unsigned(17)]; + _advance_window(value >> 16); + *output = (uint16_t) value; + } + } else { + // Parse the DC (first) coefficient for this block. Version 2 + // simply stores the signed 10-bit value as-is, while version 3 + // uses a delta encoding combined with a compression method similar + // to exp-Golomb. + if (is_v3) { + // TODO: version 3 is currently not supported. + return -1; + } else { + value = _get_bits_unsigned(10); + *output = value | quant_scale; + _advance_window(10); + } + } + + output++; + coeff_index++; + + // Update the bitstream window. For whatever reason Sony's DecDCTvlc() + // implementation inefficiently reads the input stream 16 bits at a + // time and processes each 16-bit word starting from the the MSB, so an + // endianness conversion is necessary to preserve bit order when + // reading 32 bits at a time. Also note that the PS1 CPU is not capable + // of shifting by more than 31 bits - it will shift by 0 bits instead! + if (bit_offset < 0) { + window = next_window << (-bit_offset); + bit_offset += 32; + next_window = (*input << 16) | (*input >> 16); + input++; + }; + window |= next_window >> bit_offset; + } + + // Pad the buffer with end-of-block codes if necessary. + for (; max_size; max_size--) + *(output++) = 0xfe00; + + if (!remaining) + return 0; + + ctx->input = input; + ctx->remaining = remaining; + ctx->window = window; + ctx->next_window = next_window; + ctx->block_index = block_index; + ctx->coeff_index = coeff_index; + ctx->bit_offset = bit_offset; + return 1; +} + +int DecDCTvlcStart2( + VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs +) { + const BS_Header *header = (const BS_Header *) bs; + const uint32_t *input = (const uint32_t *) &header[1]; + + if (!_vlc_huffman_table2) + return -1; + if (header->version > 3) + return -1; + + ctx->input = &input[2]; + ctx->remaining = (header->mdec0_header & 0xffff) * 2; + ctx->window = (input[0] << 16) | (input[0] >> 16); + ctx->next_window = (input[1] << 16) | (input[1] >> 16); + ctx->quant_scale = (header->quant_scale & 63) << 10; + ctx->block_index = 0; + ctx->coeff_index = 0; + ctx->bit_offset = 32; + ctx->is_v3 = (header->version == 3); + + return DecDCTvlcContinue2(ctx, buf, max_size); +} + +/* Stateful VLC decoder API (for Sony SDK compatibility) */ + +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table) { + if (table) + _vlc_huffman_table2 = table; + + if (bs) + return DecDCTvlcStart2(&_default_context, buf, _max_buffer_size, bs); + else + return DecDCTvlcContinue2(&_default_context, buf, _max_buffer_size); +} + +size_t DecDCTvlcSize2(size_t size) { + size_t old_size = _max_buffer_size; + _max_buffer_size = size; + + return old_size; +} + +/* Lookup table decompressor */ + +void DecDCTvlcBuild(DECDCTTAB2 *table) { + uint32_t *output = (uint32_t *) table; + _vlc_huffman_table2 = table; + + for (int i = 0; i < TABLE_LENGTH; i++) { + uint32_t value = _compressed_table[i] & 0x001fffff; + + for (int j = (_compressed_table[i] >> 21); j >= 0; j--) + *(output++) = value; + } +} diff --git a/libpsn00b/psxsio/_sio_control.s b/libpsn00b/psxsio/_sio_control.s index bf3b9b9..6378def 100644 --- a/libpsn00b/psxsio/_sio_control.s +++ b/libpsn00b/psxsio/_sio_control.s @@ -1,6 +1,6 @@ .set noreorder -.include "hwregs_a.h" +.include "hwregs_a.inc" .section .text diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c new file mode 100644 index 0000000..55a3dba --- /dev/null +++ b/libpsn00b/psxspu/common.c @@ -0,0 +1,157 @@ +/* + * PSn00bSDK SPU library (common functions) + * (C) 2022 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <stdio.h> +#include <psxspu.h> +#include <hwregs_c.h> + +#define WRITABLE_AREA_ADDR 0x200 +#define DMA_CHUNK_LENGTH 16 +#define STATUS_TIMEOUT 0x100000 + +/* Internal globals */ + +static SPU_TransferMode _transfer_mode = SPU_TRANSFER_BY_DMA; +static uint16_t _transfer_addr = WRITABLE_AREA_ADDR; + +/* SPU initialization */ + +static void _wait_status(uint16_t mask, uint16_t value) { + for (int i = STATUS_TIMEOUT; i; i--) { + if ((SPU_STAT & mask) == value) + return; + } + + printf("psxspu: status register timeout (0x%04x)\n", SPU_STAT); +} + +void SpuInit(void) { + SPU_CTRL = 0x0000; // SPU disabled + _wait_status(0x001f, 0x0000); + + SPU_MASTER_VOL_L = 0; + SPU_MASTER_VOL_R = 0; + SPU_REVERB_VOL_L = 0; + SPU_REVERB_VOL_R = 0; + SPU_KEY_OFF = 0x00ffffff; + SPU_FM_MODE = 0; + SPU_NOISE_MODE = 0; + SPU_REVERB_ON = 0; + SPU_REVERB_ADDR = 0xfffe; + SPU_CD_VOL_L = 0; + SPU_CD_VOL_R = 0; + SPU_EXT_VOL_L = 0; + SPU_EXT_VOL_R = 0; + + DMA_DPCR |= 0x000b0000; // Enable DMA4 + DMA_CHCR(4) = 0x00000201; // Stop DMA4 + + SPU_CTRL = 0xc011; // Enable SPU, DAC, CD audio, set manual transfer mode + _wait_status(0x001f, 0x0011); + + // Upload a dummy ADPCM block to the first 16 bytes of SPU RAM. This may be + // freely used or overwritten. + SPU_ADDR = WRITABLE_AREA_ADDR; + _wait_status(0x0400, 0x0000); + + SPU_DATA = 0x0500; + for (int i = 7; i; i--) + SPU_DATA = 0x0000; + + // "Play" the dummy block on all channels. This will reset the start + // address and ADSR envelope status of each channel. + for (int i = 0; i < 24; i++) { + SPU_CH_VOL_L(i) = 0; + SPU_CH_VOL_R(i) = 0; + SPU_CH_FREQ(i) = 0x1000; + SPU_CH_ADDR(i) = WRITABLE_AREA_ADDR; + } + + // Sony's implementation leaves everything muted, however it makes sense to + // turn up at least the master and CD audio volume by default. + SPU_KEY_ON = 0x00ffffff; + SPU_MASTER_VOL_L = 0x3fff; + SPU_MASTER_VOL_R = 0x3fff; + SPU_CD_VOL_L = 0x3fff; + SPU_CD_VOL_R = 0x3fff; +} + +/* SPU RAM transfer API */ + +static void _load_store_data(uint32_t *data, size_t length, int mode) { + if (length % 4) + printf("psxspu: can't transfer a number of bytes that isn't multiple of 4\n"); + + length /= 4; + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { + printf("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + length += DMA_CHUNK_LENGTH - 1; + } + + SPU_CTRL &= 0xffcf; // Disable DMA request + _wait_status(0x0030, 0x0000); + + // Enable DMA request for writing (2) or reading (3) + SPU_ADDR = _transfer_addr; + SPU_CTRL |= mode << 4; + _wait_status(0x0400, 0x0000); + + DMA_MADR(4) = (uint32_t) data; + if (length < DMA_CHUNK_LENGTH) + DMA_BCR(4) = 0x00010000 | length; + else + DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + + DMA_CHCR(4) = 0x01000200 | ((mode & 1) ^ 1); +} + +void SpuRead(uint32_t *data, size_t size) { + _load_store_data(data, size, 3); +} + +void SpuWrite(const uint32_t *data, size_t size) { + if (_transfer_addr < WRITABLE_AREA_ADDR) + return; + + // I/O transfer mode is not that useful, but whatever. + if (_transfer_mode) { + SPU_ADDR = _transfer_addr; + SPU_CTRL = (SPU_CTRL & 0xffcf) | 0x0010; // Manual transfer mode + _wait_status(0x0400, 0x0000); + + for (int i = size; i; i -= 4) { + uint32_t value = *(data++); + + SPU_DATA = (uint16_t) value; + SPU_DATA = (uint16_t) (value >> 16); + } + + return; + } + + _load_store_data((uint32_t *) data, size, 2); +} + +SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) { + _transfer_mode = mode; + return mode; +} + +uint32_t SpuSetTransferStartAddr(uint32_t addr) { + if (addr > 0x7ffff) + return 0; + + _transfer_addr = (addr + 7) / 8; + return addr; +} + +int SpuIsTransferCompleted(int mode) { + if (!mode) + return ((SPU_STAT >> 10) & 1) ^ 1; + + _wait_status(0x0400, 0x0000); + return 1; +} diff --git a/libpsn00b/psxspu/readme.txt b/libpsn00b/psxspu/readme.txt index 3ed90d0..07bbf97 100644 --- a/libpsn00b/psxspu/readme.txt +++ b/libpsn00b/psxspu/readme.txt @@ -3,29 +3,24 @@ PSX SPU Library, part of PSn00bSDK Licensed under Mozilla Public License - Open source implementation of the SPU library written mostly in MIPS -assembly. Currently only supports SPU init, uploading sample data using DMA -transfer and basic sample playback but is currently lacking a bunch of -important functions. - - Very work in progress currently. - +Open source implementation of the SPU library written entirely in C. Currently +only supports SPU initialization, reading/writing SPU RAM using DMA and basic +sample playback. Most of the official API is not going to be implemented as the +vast majority of it is just inefficient wrappers around accessing SPU registers +directly, which can be done already using the macros defined in hwregs_c.h. Library developer(s): - Lameguy64 - - + Lameguy64 (initial implementation in assembly) + spicyjpeg + Library header(s): psxspu.h - Todo list: * SPU RAM allocation routines yet to be implemented (heap must only be stored in main RAM and not SPU RAM like in the official SDK). - * SpuKeyOn() is actually not part of the official library. - * SPU reverb configuration functions yet to be implemented. diff --git a/libpsn00b/psxspu/spuinit.s b/libpsn00b/psxspu/spuinit.s deleted file mode 100644 index 6966213..0000000 --- a/libpsn00b/psxspu/spuinit.s +++ /dev/null @@ -1,124 +0,0 @@ -.set noreorder -.set noat - -.include "hwregs_a.h" - -.section .text - - -.global SpuInit -.type SpuInit, @function -SpuInit: - - addiu $sp, -4 - sw $ra, 0($sp) - - lui $v1, IOBASE - - # Stop and mute everything - - sh $0 , SPU_CTRL($v1) # Clear control settings - jal SpuCtrlSync - move $a0, $0 - - sh $0 , SPU_MASTER_VOL_L($v1) # Clear master volume - sh $0 , SPU_MASTER_VOL_R($v1) - - sh $0 , SPU_REVERB_VOL_L($v1) # Clear reverb volume - sh $0 , SPU_REVERB_VOL_R($v1) - - sh $0 , SPU_CD_VOL_L($v1) # Clear CD volume - sh $0 , SPU_CD_VOL_R($v1) - - sh $0 , SPU_EXT_VOL_L($v1) # Clear external audio volume - sh $0 , SPU_EXT_VOL_R($v1) - - sw $0 , SPU_FM_MODE($v1) # Turn off FM modes - sw $0 , SPU_NOISE_MODE($v1) # Turn off noise modes - sw $0 , SPU_REVERB_ON($v1) # Turn off reverb modes - - li $v0, 0xfffe - sh $v0, SPU_REVERB_ADDR($v1) - - lui $v0, 0x0200; - ori $v0, 0x3fff; - - # Clear all voices - - addiu $a1, $sp, -20 - sw $0 , 0($a1) - sw $0 , 4($a1) - sw $0 , 8($a1) - sw $0 , 12($a1) - - li $a2, 23 - -.Lclear_voices: - jal SpuSetVoiceRaw - move $a0, $a2 - addiu $a2, -1 - bgez $a2, .Lclear_voices - nop - - addiu $v0, $0, -1 # Set all keys to off - sw $v0, SPU_KEY_OFF($v1) - - li $v0, 0x4 # Set SPU data transfer control - sh $v0, SPU_DMA_CTRL($v1) # (usually always 0x4) - - lw $v0, DMA_DPCR($v1) # Enable DMA channel 4 (SPU DMA) - lui $at, 0xb - or $v0, $at - sw $v0, DMA_DPCR($v1) - - li $v0, 0xC001 # Enable SPU - sh $v0, SPU_CTRL($v1) - jal SpuCtrlSync - move $a0, $v0 - - li $v0, 0x3fff # Activate master volume - sh $v0, SPU_MASTER_VOL_L($v1) - sh $v0, SPU_MASTER_VOL_R($v1) - - sh $v0, SPU_CD_VOL_L($v1) # Activate CD volume - sh $v0, SPU_CD_VOL_R($v1) - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop - - -# Waits until bits 0-5 of SPUSTAT are equal to SPUCNT -# -# Destroys v0, v1, a0 -# -.global SpuCtrlSync -.type SpuCtrlSync, @function -SpuCtrlSync: - lui $v1, IOBASE - andi $a0, 0x3f -.Lctrl_wait: - lhu $v0, SPU_STAT($v1) # Get SPUSTAT value - nop - andi $v0, 0x3f - bne $v0, $a0, .Lctrl_wait # Wait until SPUCNT and SPUSTAT are equal - nop - jr $ra - nop - - -# Waits until SPU has finished transfers -# -.global SpuWait -.type SpuWait, @function -SpuWait: - lui $v0, IOBASE - lhu $v0, SPU_STAT($v0) - nop - andi $v0, 0x400 - bnez $v0, SpuWait - nop - jr $ra - nop -
\ No newline at end of file diff --git a/libpsn00b/psxspu/spukeyon.s b/libpsn00b/psxspu/spukeyon.s deleted file mode 100644 index 33592c2..0000000 --- a/libpsn00b/psxspu/spukeyon.s +++ /dev/null @@ -1,17 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .data - - -.global SpuKeyOn -.type SpuKeyOn, @function -SpuKeyOn: - lui $v1, IOBASE - li $v0, 1 - sll $v0, $a0 - sw $v0, SPU_KEY_ON($v1) - sw $v0, SPU_KEY_ON($v1) - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/psxspu/spureverbon.s b/libpsn00b/psxspu/spureverbon.s deleted file mode 100644 index 635fac3..0000000 --- a/libpsn00b/psxspu/spureverbon.s +++ /dev/null @@ -1,16 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .data - - -.global SpuReverbOn -.type SpuReverbOn, @function -SpuReverbOn: - lui $v1, IOBASE - li $v0, 1 - sll $v0, $a0 - sw $v0, SPU_REVERB_ON($v1) - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/psxspu/spusetkey.s b/libpsn00b/psxspu/spusetkey.s deleted file mode 100644 index 4ad0cff..0000000 --- a/libpsn00b/psxspu/spusetkey.s +++ /dev/null @@ -1,26 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .data - - -.global SpuSetKey -.type SpuSetKey, @function -SpuSetKey: - # a0 - 0: key off, 1: key on - # a1 - Voice bit mask - - lui $a2, IOBASE - - beqz $a0, .Lkey_off - nop - - jr $ra - sw $a1, SPU_KEY_ON($a2) - -.Lkey_off: - - jr $ra - sw $a1, SPU_KEY_OFF($a2) -
\ No newline at end of file diff --git a/libpsn00b/psxspu/spusetreverb.s b/libpsn00b/psxspu/spusetreverb.s deleted file mode 100644 index 8257812..0000000 --- a/libpsn00b/psxspu/spusetreverb.s +++ /dev/null @@ -1,25 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - -.section .data - - -.global SpuSetReverb -.type SpuSetReverb, @function -SpuSetReverb: - addiu $sp, -4 - sw $ra, 0($sp) - - lui $v1, IOBASE - lhu $v0, SPU_CTRL($v1) - nop - ori $v0, 0x80 # Enable reverb - sh $v0, SPU_CTRL($v1) - jal SpuCtrlSync - move $a0, $v0 - - lw $ra, 0($sp) - addiu $sp, 4 - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/psxspu/spusetreverbaddr.s b/libpsn00b/psxspu/spusetreverbaddr.s deleted file mode 100644 index 089a91a..0000000 --- a/libpsn00b/psxspu/spusetreverbaddr.s +++ /dev/null @@ -1,25 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - - -.section .text - -.global SpuSetReverbAddr -.type SpuSetReverbAddr, @function -SpuSetReverbAddr: - lui $a3, IOBASE - srl $a0, 3 - sh $a0, SPU_REVERB_ADDR($a3) - jr $ra - nop - - -.global SpuSetReverbVolume -.type SpuSetReverbVolume, @function -SpuSetReverbVolume: - lui $a3, IOBASE - sh $a0, SPU_REVERB_VOL_L($a3) - sh $a1, SPU_REVERB_VOL_R($a3) - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/psxspu/spusetvoiceraw.s b/libpsn00b/psxspu/spusetvoiceraw.s deleted file mode 100644 index 43450f6..0000000 --- a/libpsn00b/psxspu/spusetvoiceraw.s +++ /dev/null @@ -1,60 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - - -.set PARAM_L, 0 -.set PARAM_R, 2 -.set PARAM_FREQ, 4 -.set PARAM_ADDR, 6 -.set PARAM_LOOP, 8 -.set PARAM_RES, 10 -.set PARAM_ADSR, 12 - - -.section .text - -.global SpuSetVoiceRaw -.type SpuSetVoiceRaw, @function -SpuSetVoiceRaw: - - # a0 - Voice number - # a1 - Address to parameters - - sll $a0, 4 - addiu $a0, SPU_VOICE_BASE - - lui $v1, IOBASE - or $a0, $v1 - - lhu $v0, PARAM_L($a1) - nop - sh $v0, SPU_VOICE_VOL_L($a0) - - lhu $v0, PARAM_R($a1) - nop - sh $v0, SPU_VOICE_VOL_R($a0) - - lhu $v0, PARAM_FREQ($a1) - nop - sh $v0, SPU_VOICE_FREQ($a0) - - lhu $v0, PARAM_ADDR($a1) - nop - sh $v0, SPU_VOICE_ADDR($a0) - - lhu $v0, PARAM_LOOP($a1) - nop - sh $v0, SPU_VOICE_LOOP($a0) - - - lw $v0, PARAM_ADSR($a1) - nop - sh $v0, SPU_VOICE_ADSR_L($a0) - srl $v0, 16 - sh $v0, SPU_VOICE_ADSR_H($a0) - - - jr $ra - nop -
\ No newline at end of file diff --git a/libpsn00b/psxspu/transfer.s b/libpsn00b/psxspu/transfer.s deleted file mode 100644 index adcdb33..0000000 --- a/libpsn00b/psxspu/transfer.s +++ /dev/null @@ -1,108 +0,0 @@ -.set noreorder - -.include "hwregs_a.h" - - -.section .text - -.global SpuSetTransferMode -.type SpuSetTransferMode, @function -SpuSetTransferMode: - la $v0, _spu_transfer_mode - sb $a0, 0($v0) - jr $ra - move $v0, $a0 - - -.global SpuSetTransferStartAddr -.type SpuSetTransferStartAddr, @function -SpuSetTransferStartAddr: - li $v0, 0x1000 # Check if value is valid - blt $a0, $v0, .Lbad_value - nop - lui $v0, 8 # 0x7ffff = (8<<16)-1 - addiu $v0, -1 - bgt $a0, $v0, .Lbad_value - nop - - la $v1, _spu_transfer_addr - srl $v0, $a0, 3 # Set transfer destination address - sh $v0, 0($v1) - - jr $ra - move $v0, $a0 - -.Lbad_value: - jr $ra - move $v0, $0 - - -.global SpuWrite -.type SpuWrite, @function -SpuWrite: - addiu $sp, -8 - sw $ra, 0($sp) - sw $a0, 4($sp) - - lui $a3, IOBASE - - lhu $v0, SPU_CTRL($a3) # Set transfer mode to Stop - nop - andi $v0, 0xffcf - sh $v0, SPU_CTRL($a3) - jal SpuCtrlSync - move $a0, $v0 - - la $v1, _spu_transfer_addr # Set SPU write address - lhu $v1, 0($v1) - nop - sh $v1, SPU_ADDR($a3) - - lhu $v0, SPU_CTRL($a3) # Set transfer mode to DMA write - nop - ori $v0, 0x20 - sh $v0, SPU_CTRL($a3) - #jal SpuCtrlSync # Locks up on most emulators (bit 5 in - #move $a0, $v0 # SPUSTAT likely not updating, seems to - # be okay to not wait for it on real HW) - - lw $a0, 4($sp) - -.Ldma_wait: # Wait for SPU to be ready for DMA - lhu $v0, SPU_STAT($a3) - nop - andi $v0, 0x400 # Bit 8 in SPUSTAT never changes to 1 on - bnez $v0, .Ldma_wait # emulators so use bit 10 instead - nop - - sw $a0, DMA4_MADR($a3) # Set DMA source address - - li $v0, 0x10 # 16 words per block (64 bytes) - addiu $a1, 63 # Add by 63 to ensure all bytes get sent - srl $a1, 6 # Equivalent to divide by 64 - andi $a1, 0xffff - sll $a1, 16 - or $v0, $a1 - sw $v0, DMA4_BCR($a3) - - lui $v0, 0x0100 # Commence transfer - ori $v0, 0x0201 - sw $v0, DMA4_CHCR($a3) - - lw $ra, 0($sp) - addiu $sp, 8 - jr $ra - nop - - -.section .data - -.global _spu_transfer_mode -.type _spu_transfer_mode, @object -_spu_transfer_mode: - .word 0x0 - -.global _spu_transfer_addr -.type _spu_transfer_addr, @object -_spu_transfer_addr: - .word 0x200
\ No newline at end of file |
