diff options
| author | spicyjpeg <thatspicyjpeg@gmail.com> | 2023-05-11 23:42:43 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-11 23:42:43 +0200 |
| commit | 04d7728350cbd04dd86cd894e906c98673e3f9a7 (patch) | |
| tree | 08e8c7dd495d1c4c6fcf5f7ba6b4b10693dc42f6 /libpsn00b | |
| parent | eaec942f56ceec9c14de5c4185a02602abadd50a (diff) | |
| parent | 58a8306d24fe29d965aa8b40ddc37c3163c0a2f9 (diff) | |
| download | psn00bsdk-04d7728350cbd04dd86cd894e906c98673e3f9a7.tar.gz | |
Merge pull request #70 from Lameguy64/v0.23-wip
Header cleanups, PCDRV, more safety checks, libc and mkpsxiso fixes (v0.23)
Diffstat (limited to 'libpsn00b')
69 files changed, 2925 insertions, 1674 deletions
diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake index 9e7a4d3..e34ff63 100644 --- a/libpsn00b/cmake/internal_setup.cmake +++ b/libpsn00b/cmake/internal_setup.cmake @@ -280,23 +280,22 @@ function(psn00bsdk_add_cd_image name image_name config_file) cmake_path(HASH config_file _hash) - set(CD_IMAGE_NAME ${image_name}) - set(CD_CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml) - configure_file("${config_file}" ${CD_CONFIG_FILE}) + set(_xml_file ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml) + configure_file("${config_file}" ${_xml_file}) add_custom_command( - OUTPUT ${CD_IMAGE_NAME}.bin ${CD_IMAGE_NAME}.cue + OUTPUT ${image_name}.bin ${image_name}.cue COMMAND ${MKPSXISO} -y - -o ${CD_IMAGE_NAME}.bin -c ${CD_IMAGE_NAME}.cue ${CD_CONFIG_FILE} - COMMENT "Building CD image ${CD_IMAGE_NAME}" + -o ${image_name}.bin -c ${image_name}.cue ${_xml_file} + COMMENT "Building CD image ${image_name}" VERBATIM ${ARGN} ) add_custom_target( ${name} ALL DEPENDS - ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.bin - ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.cue + ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.bin + ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.cue ) endfunction() diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h index 1b2bda2..8f8df74 100644 --- a/libpsn00b/include/assert.h +++ b/libpsn00b/include/assert.h @@ -1,13 +1,12 @@ /* * PSn00bSDK assert macro and internal logging - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * - * Note that the _sdk_log() macro is used internally by PSn00bSDK to output - * debug messages and warnings. + * The _sdk_*() macros are used internally by PSn00bSDK to output messages when + * building in debug mode. */ -#ifndef __ASSERT_H -#define __ASSERT_H +#pragma once #include <stdio.h> @@ -25,6 +24,9 @@ void _assert_abort(const char *file, int line, const char *expr); #define assert(expr) #define _sdk_log(fmt, ...) +#define _sdk_assert(expr, fmt, ...) +#define _sdk_validate_args_void(expr) +#define _sdk_validate_args(expr, ret) #else @@ -32,11 +34,27 @@ void _assert_abort(const char *file, int line, const char *expr); ((expr) ? ((void) 0) : _assert_abort(__FILE__, __LINE__, #expr)) #ifdef SDK_LIBRARY_NAME -#define _sdk_log(fmt, ...) printf(SDK_LIBRARY_NAME ": " fmt, ##__VA_ARGS__) +#define _sdk_log(fmt, ...) \ + printf(SDK_LIBRARY_NAME ": " fmt __VA_OPT__(,) __VA_ARGS__) #else -#define _sdk_log(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define _sdk_log(fmt, ...) \ + printf(fmt __VA_OPT__(,) __VA_ARGS__) #endif -#endif +#define _sdk_assert(expr, ret, fmt, ...) \ + if (!(expr)) { \ + _sdk_log(fmt, __VA_ARGS__); \ + return ret; \ + } +#define _sdk_validate_args_void(expr) \ + if (!(expr)) { \ + _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \ + return; \ + } +#define _sdk_validate_args(expr, ret) \ + if (!(expr)) { \ + _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \ + return ret; \ + } #endif diff --git a/libpsn00b/include/cassert b/libpsn00b/include/cassert new file mode 100644 index 0000000..0923486 --- /dev/null +++ b/libpsn00b/include/cassert @@ -0,0 +1,8 @@ +/* + * PSn00bSDK assert macro and internal logging + * (C) 2022-2023 spicyjpeg - MPL licensed + */ + +#pragma once + +#include <assert.h> diff --git a/libpsn00b/include/cctype b/libpsn00b/include/cctype new file mode 100644 index 0000000..b73ad34 --- /dev/null +++ b/libpsn00b/include/cctype @@ -0,0 +1,22 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +namespace std { +extern "C" { + +int isprint(int ch); +int isgraph(int ch); +int isspace(int ch); +int isblank(int ch); +int isalpha(int ch); +int isdigit(int ch); + +int tolower(int ch); +int toupper(int ch); + +} +} diff --git a/libpsn00b/include/cstdint b/libpsn00b/include/cstdint new file mode 100644 index 0000000..3b1bc4a --- /dev/null +++ b/libpsn00b/include/cstdint @@ -0,0 +1,34 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + * + * This is a replacement for the <cstdint> header included with GCC, which seems + * to be broken (at least in GCC 12.2.0) as it requires some macros to be set. + */ + +#pragma once + +#include <stdint.h> + +namespace std { + +#define _DEF_TYPE(bits, prefix) \ + using ::prefix##bits##_t; \ + using ::prefix##_fast##bits##_t; \ + using ::prefix##_least##bits##_t; + +_DEF_TYPE( 8, int) +_DEF_TYPE( 8, uint) +_DEF_TYPE(16, int) +_DEF_TYPE(16, uint) +_DEF_TYPE(32, int) +_DEF_TYPE(32, uint) + +#undef _DEF_TYPE + +using ::intmax_t; +using ::uintmax_t; +using ::intptr_t; +using ::uintptr_t; + +} diff --git a/libpsn00b/include/cstdio b/libpsn00b/include/cstdio new file mode 100644 index 0000000..800d1a2 --- /dev/null +++ b/libpsn00b/include/cstdio @@ -0,0 +1,32 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstdarg> + +namespace std { +extern "C" { + +/* String I/O API (provided by BIOS) */ + +int printf(const char *fmt, ...); +char *gets(char *str); +void puts(const char *str); +int getchar(void); +void putchar(int ch); + +/* String formatting API (built-in) */ + +int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap); +int vsprintf(char *string, const char *fmt, va_list ap); +int sprintf(char *string, const char *fmt, ...); +int snprintf(char *string, unsigned int size, const char *fmt, ...); + +int vsscanf(const char *str, const char *format, va_list ap); +int sscanf(const char *str, const char *fmt, ...); + +} +} diff --git a/libpsn00b/include/cstdlib b/libpsn00b/include/cstdlib new file mode 100644 index 0000000..4fa859d --- /dev/null +++ b/libpsn00b/include/cstdlib @@ -0,0 +1,59 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstddef> + +namespace std { + +/* Definitions */ + +static constexpr int RAND_MAX = 0x7fff; + +/* Structure definitions */ + +struct HeapUsage { + size_t total; // Total size of heap + stack + size_t heap; // Amount of memory currently reserved for heap + size_t stack; // Amount of memory currently reserved for stack + size_t alloc; // Amount of memory currently allocated + size_t alloc_max; // Maximum amount of memory ever allocated +}; + +/* API */ + +extern "C" { + +extern int __argc; +extern const char **__argv; + +void abort(void); + +int rand(void); +void srand(int seed); + +int abs(int j); +long labs(long i); + +long strtol(const char *str, char **str_end, int base); +long long strtoll(const char *str, char **str_end, int base); +//float strtof(const char *str, char **str_end); +//double strtod(const char *str, char **str_end); +//long double strtold(const char *str, char **str_end); + +void InitHeap(void *addr, size_t size); +void *sbrk(ptrdiff_t incr); + +void TrackHeapUsage(ptrdiff_t alloc_incr); +void GetHeapUsage(HeapUsage *usage); + +void *malloc(size_t size); +void *calloc(size_t num, size_t size); +void *realloc(void *ptr, size_t size); +void free(void *ptr); + +} +} diff --git a/libpsn00b/include/cstring b/libpsn00b/include/cstring new file mode 100644 index 0000000..1ce7246 --- /dev/null +++ b/libpsn00b/include/cstring @@ -0,0 +1,38 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstddef> + +namespace std { +extern "C" { + +void *memset(void *dest, int ch, size_t count); +void *memcpy(void *dest, const void *src, size_t count); +void *memccpy(void *dest, const void *src, int ch, size_t count); +void *memmove(void *dest, const void *src, size_t count); +int memcmp(const void *lhs, const void *rhs, size_t count); +void *memchr(const void *ptr, int ch, size_t count); + +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t count); +int strcmp(const char *lhs, const char *rhs); +int strncmp(const char *lhs, const char *rhs, size_t count); +char *strchr(const char *str, int ch); +char *strrchr(const char *str, int ch); +char *strpbrk(const char *str, const char *breakset); +char *strstr(const char *str, const char *substr); + +size_t strlen(const char *str); +char *strcat(char *dest, const char *src); +char *strncat(char *dest, const char *src, size_t count); +char *strdup(const char *str); +char *strndup(const char *str, size_t count); + +char *strtok(char *str, const char *delim); + +} +} diff --git a/libpsn00b/include/ctype.h b/libpsn00b/include/ctype.h index 24ee9d9..2fe0a42 100644 --- a/libpsn00b/include/ctype.h +++ b/libpsn00b/include/ctype.h @@ -1,20 +1,24 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __CTYPE_H -#define __CTYPE_H +#pragma once #ifdef __cplusplus extern "C" { #endif -int tolower(int chr); -int toupper(int chr); +int isprint(int ch); +int isgraph(int ch); +int isspace(int ch); +int isblank(int ch); +int isalpha(int ch); +int isdigit(int ch); + +int tolower(int ch); +int toupper(int ch); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/dlfcn.h b/libpsn00b/include/dlfcn.h index 6192430..5e1e3b6 100644 --- a/libpsn00b/include/dlfcn.h +++ b/libpsn00b/include/dlfcn.h @@ -3,8 +3,7 @@ * (C) 2021-2022 spicyjpeg - MPL licensed */ -#ifndef __DLFCN_H -#define __DLFCN_H +#pragma once #include <stdint.h> #include <stddef.h> @@ -215,5 +214,3 @@ void *DL_GetDLLSymbol(const DLL *dll, const char *name); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/elf.h b/libpsn00b/include/elf.h index abfb3d5..b0ddf71 100644 --- a/libpsn00b/include/elf.h +++ b/libpsn00b/include/elf.h @@ -9,8 +9,7 @@ * converted to enums. */ -#ifndef __ELF_H -#define __ELF_H +#pragma once #include <stdint.h> @@ -121,18 +120,3 @@ typedef enum { STT_LOPROC = 13, /* Start of processor-specific */ STT_HIPROC = 15 /* End of processor-specific */ } Elf32_st_type; - -// If you need to add more constants, you may use the following Python snippet -// to quickly convert #defines to enums: -/* -import re -t = """<paste #defines here>""" -t = re.sub( - r"(0x[0-9a-f]+|0b[01]+|[0-9]+)", - lambda m: f"= {m.group(1)},", - t.replace("#define ", "\t").replace("#define\t", "\t") -) -print("typedef enum {\n" + t + "\n} NAME;") -*/ - -#endif diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h index 7015101..2152986 100644 --- a/libpsn00b/include/hwregs_c.h +++ b/libpsn00b/include/hwregs_c.h @@ -3,8 +3,7 @@ * (C) 2022 spicyjpeg - MPL licensed */ -#ifndef __HWREGS_C_H -#define __HWREGS_C_H +#pragma once #include <stdint.h> @@ -35,7 +34,7 @@ #define CD_DATA _MMIO8(IOBASE | 0x1802) #define CD_IRQ _MMIO8(IOBASE | 0x1803) -#define CD_REG(N) _MMIO8(IOBASE | 0x1800 + (N)) +#define CD_REG(N) _MMIO8((IOBASE | 0x1800) + (N)) /* SPU */ @@ -74,13 +73,13 @@ // These are not named SPU_VOICE_* to avoid name clashes with SPU attribute // flags defined in psxspu.h. -#define SPU_CH_VOL_L(N) _MMIO16(IOBASE | 0x1c00 + 16 * (N)) -#define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N)) -#define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N)) -#define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N)) -#define SPU_CH_ADSR1(N) _MMIO16(IOBASE | 0x1c08 + 16 * (N)) -#define SPU_CH_ADSR2(N) _MMIO16(IOBASE | 0x1c0a + 16 * (N)) -#define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N)) +#define SPU_CH_VOL_L(N) _MMIO16((IOBASE | 0x1c00) + (16 * (N))) +#define SPU_CH_VOL_R(N) _MMIO16((IOBASE | 0x1c02) + (16 * (N))) +#define SPU_CH_FREQ(N) _MMIO16((IOBASE | 0x1c04) + (16 * (N))) +#define SPU_CH_ADDR(N) _MMIO16((IOBASE | 0x1c06) + (16 * (N))) +#define SPU_CH_ADSR1(N) _MMIO16((IOBASE | 0x1c08) + (16 * (N))) +#define SPU_CH_ADSR2(N) _MMIO16((IOBASE | 0x1c0a) + (16 * (N))) +#define SPU_CH_LOOP_ADDR(N) _MMIO16((IOBASE | 0x1c0e) + (16 * (N))) /* MDEC */ @@ -92,11 +91,11 @@ // IMPORTANT: even though SIO_DATA is a 32-bit register, it should only be // accessed as 8-bit. Reading it as 16 or 32-bit works fine on real hardware, // but leads to problems in some emulators. -#define SIO_DATA(N) _MMIO8 (IOBASE | 0x1040 + 16 * (N)) -#define SIO_STAT(N) _MMIO16(IOBASE | 0x1044 + 16 * (N)) -#define SIO_MODE(N) _MMIO16(IOBASE | 0x1048 + 16 * (N)) -#define SIO_CTRL(N) _MMIO16(IOBASE | 0x104a + 16 * (N)) -#define SIO_BAUD(N) _MMIO16(IOBASE | 0x104e + 16 * (N)) +#define SIO_DATA(N) _MMIO8 ((IOBASE | 0x1040) + (16 * (N))) +#define SIO_STAT(N) _MMIO16((IOBASE | 0x1044) + (16 * (N))) +#define SIO_MODE(N) _MMIO16((IOBASE | 0x1048) + (16 * (N))) +#define SIO_CTRL(N) _MMIO16((IOBASE | 0x104a) + (16 * (N))) +#define SIO_BAUD(N) _MMIO16((IOBASE | 0x104e) + (16 * (N))) /* IRQ controller */ @@ -108,15 +107,15 @@ #define DMA_DPCR _MMIO32(IOBASE | 0x10f0) #define DMA_DICR _MMIO32(IOBASE | 0x10f4) -#define DMA_MADR(N) _MMIO32(IOBASE | 0x1080 + 16 * (N)) -#define DMA_BCR(N) _MMIO32(IOBASE | 0x1084 + 16 * (N)) -#define DMA_CHCR(N) _MMIO32(IOBASE | 0x1088 + 16 * (N)) +#define DMA_MADR(N) _MMIO32((IOBASE | 0x1080) + (16 * (N))) +#define DMA_BCR(N) _MMIO32((IOBASE | 0x1084) + (16 * (N))) +#define DMA_CHCR(N) _MMIO32((IOBASE | 0x1088) + (16 * (N))) /* Timers */ -#define TIMER_VALUE(N) _MMIO32(IOBASE | 0x1100 + 16 * (N)) -#define TIMER_CTRL(N) _MMIO32(IOBASE | 0x1104 + 16 * (N)) -#define TIMER_RELOAD(N) _MMIO32(IOBASE | 0x1108 + 16 * (N)) +#define TIMER_VALUE(N) _MMIO32((IOBASE | 0x1100) + (16 * (N))) +#define TIMER_CTRL(N) _MMIO32((IOBASE | 0x1104) + (16 * (N))) +#define TIMER_RELOAD(N) _MMIO32((IOBASE | 0x1108) + (16 * (N))) /* Memory/bus control */ @@ -130,5 +129,3 @@ #define BUS_EXP2_CFG _MMIO32(IOBASE | 0x101c) #define BUS_COM_DELAY _MMIO32(IOBASE | 0x1020) #define BUS_RAM_SIZE _MMIO32(IOBASE | 0x1060) - -#endif diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h index 5facc1c..cb550b9 100644 --- a/libpsn00b/include/inline_c.h +++ b/libpsn00b/include/inline_c.h @@ -16,8 +16,7 @@ * compiled object files. */ -#ifndef _INLINE_C_H -#define _INLINE_C_H +#pragma once /* GTE load macros */ @@ -1612,5 +1611,3 @@ : "g"( r0 ) ) #define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) - -#endif // _INLINE_C_H diff --git a/libpsn00b/include/ioctl.h b/libpsn00b/include/ioctl.h deleted file mode 100644 index 5c56422..0000000 --- a/libpsn00b/include/ioctl.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _IOCTL_H -#define _IOCTL_H - -#ifndef NULL -#define NULL 0 -#endif - -#ifndef EOF -#define EOF -1 -#endif - -// General -#define FIONBLOCK (('f'<<8)|1) -#define FIOCSCAN (('f'<<8)|2) - -// disk -#define DIO_FORMAT (('d'<<8)|1) - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/lzp/lzp.h b/libpsn00b/include/lzp/lzp.h index 456de02..1aeea30 100644 --- a/libpsn00b/include/lzp/lzp.h +++ b/libpsn00b/include/lzp/lzp.h @@ -1,20 +1,29 @@ -/*! \file lzp.h - * \brief Main library header +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed */ -/*! \mainpage - * \version 0.20b - * \author John Wilbert 'Lameguy64' Villamor +/** + * @file lzp.h + * @brief Utility library for file bundling and compression * - * \section creditsSection Credits - * - LZ77 data compression/decompression routines based from Ilya Muravyov's - * crush.cpp released under public domain. Refined and ported to C by Lameguy64. - * - CRC calculation routines based from Lammert Bies' lib_crc routines. + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). * + * This header provides the LZ77 compression API and functions to parse and + * decompress .LZP archives after they have been loaded into memory. + * + * @section creditsSection Credits + * - LZ77 data compression/decompression routines based from Ilya Muravyov's + * crush.cpp released under public domain. Refined and ported to C by + * Lameguy64. + * - CRC calculation routines based from Lammert Bies' lib_crc routines. */ -#ifndef _LZPACK_H -#define _LZPACK_H +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum); #ifdef __cplusplus } #endif - - -#endif // _LZPACK_H diff --git a/libpsn00b/include/lzp/lzqlp.h b/libpsn00b/include/lzp/lzqlp.h index 32ce0d7..127f263 100644 --- a/libpsn00b/include/lzp/lzqlp.h +++ b/libpsn00b/include/lzp/lzqlp.h @@ -1,5 +1,23 @@ -#ifndef _QLP_H -#define _QLP_H +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed + */ + +/** + * @file lzqlp.h + * @brief Utility library for file bundling + * + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). + * + * This header provides functions to parse .QLP archives and retrieve pointers + * to their contents after they have been loaded into memory. + */ + +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -23,9 +41,17 @@ typedef struct { uint32_t offs; } QLP_FILE; + +// Function prototypes +#ifdef __cplusplus +extern "C" { +#endif + int qlpFileCount(const QLP_HEAD* qlpfile); const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile); const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile); int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile); -#endif // _QLP_H
\ No newline at end of file +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/include/malloc.h b/libpsn00b/include/malloc.h deleted file mode 100644 index 75c3711..0000000 --- a/libpsn00b/include/malloc.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _MALLOC_H -#define _MALLOC_H - -#warning "<malloc.h> is deprecated, include <stdlib.h> instead" - -#include <stdlib.h> - -#endif // _MALLOC_H
\ No newline at end of file diff --git a/libpsn00b/include/psxapi.h b/libpsn00b/include/psxapi.h index 7353ed2..35ee040 100644 --- a/libpsn00b/include/psxapi.h +++ b/libpsn00b/include/psxapi.h @@ -1,10 +1,21 @@ /* * PSn00bSDK kernel API library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXAPI_H -#define __PSXAPI_H +/** + * @file psxapi.h + * @brief Kernel API library header + * + * @details This header provides access to most of the APIs made available by + * the system's BIOS, including basic file I/O, TTY output, controller and + * memory card drivers, threads, events as well as kernel memory allocation. + * + * For more information and up-to-date documentation on kernel APIs, see: + * https://psx-spx.consoledev.net/kernelbios/ + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -12,13 +23,38 @@ /* Definitions */ -#define DescHW 0xf0000000 -#define DescSW 0xf4000000 - -#define HwCARD (DescHW|0x11) -#define HwCARD_1 (DescHW|0x12) -#define HwCARD_0 (DescHW|0x13) -#define SwCARD (DescHW|0x02) +// TODO: these desperately need to be cleaned up + +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +#define DescMask 0xff000000 // Event descriptor mask +#define DescTH DescMask +#define DescHW 0xf0000000 // Hardware event (IRQ) +#define DescEV 0xf1000000 // Event event +#define DescRC 0xf2000000 // Root counter event +#define DescUEV 0xf3000000 // User event +#define DescSW 0xf4000000 // BIOS event + +#define HwVBLANK (DescHW|0x01) // VBlank +#define HwGPU (DescHW|0x02) // GPU +#define HwCdRom (DescHW|0x03) // CDROM +#define HwDMAC (DescHW|0x04) // DMA +#define HwRTC0 (DescHW|0x05) // Timer 0 +#define HwRTC1 (DescHW|0x06) // Timer 1 +#define HwRTC2 (DescHW|0x07) // Timer 2 +#define HwCNTL (DescHW|0x08) // Controller +#define HwSPU (DescHW|0x09) // SPU +#define HwPIO (DescHW|0x0a) // PIO & lightgun +#define HwSIO (DescHW|0x0b) // Serial + +#define HwCPU (DescHW|0x10) // Processor exception +#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions) +#define HwCard_0 (DescHW|0x12) +#define HwCard_1 (DescHW|0x13) +#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions) +#define SwMATH (DescSW|0x02) #define EvSpIOE 0x0004 #define EvSpERROR 0x8000 @@ -135,8 +171,6 @@ struct JMP_BUF { uint32_t gp; }; -// Not recommended to use these functions to install IRQ handlers - typedef struct { uint32_t *next; uint32_t *func2; @@ -158,7 +192,8 @@ typedef struct { #define FastExitCriticalSection() \ (IRQ_MASK = __saved_irq_mask) -/*#define FastEnterCriticalSection() { \ +#if 0 +#define FastEnterCriticalSection() { \ uint32_t r0, r1; \ __asm__ volatile( \ "mfc0 %0, $12;" \ @@ -179,9 +214,10 @@ typedef struct { "nop;" \ : "=r"(r0) :: \ ); \ -}*/ +} +#endif -/* API */ +/* BIOS API */ #ifdef __cplusplus extern "C" { @@ -199,23 +235,28 @@ int DisableEvent(int event); void DeliverEvent(uint32_t cl, uint32_t spec); void UnDeliverEvent(uint32_t cl, uint32_t spec); -int open(const char *name, int mode); +int open(const char *path, int mode); int close(int fd); -int seek(int fd, uint32_t offset, int mode); -int read(int fd, uint8_t *buff, size_t len); -int write(int fd, const uint8_t *buff, size_t len); +int lseek(int fd, uint32_t offset, int mode); +int read(int fd, void *buff, size_t len); +int write(int fd, const void *buff, size_t len); +int getc(int fd); +int putc(int ch, int fd); int ioctl(int fd, int cmd, int arg); +int isatty(int fd); struct DIRENTRY *firstfile(const char *wildcard, struct DIRENTRY *entry); struct DIRENTRY *nextfile(struct DIRENTRY *entry); -int erase(const char *name); -int chdir(const char *path); +int erase(const char *path); +int undelete(const char *path); +int cd(const char *path); -//#define cd(p) chdir(p) +int _get_errno(void); +int _get_error(int fd); -int AddDev(DCB *dcb); -int DelDev(const char *name); -void ListDev(void); -void AddDummyTty(void); +int AddDrv(DCB *dcb); +int DelDrv(const char *name); +void ListDrv(void); +void add_nullcon_driver(void); int EnterCriticalSection(void); void ExitCriticalSection(void); @@ -254,30 +295,33 @@ int ResetRCnt(int spec); void ChangeClearPAD(int mode); void ChangeClearRCnt(int t, int m); -uint32_t OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp); -int CloseTh(uint32_t thread); -int ChangeTh(uint32_t thread); +int OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp); +int CloseTh(int thread); +int ChangeTh(int thread); -int Exec(struct EXEC *exec, int argc, char **argv); +int Exec(struct EXEC *exec, int argc, const char **argv); +int LoadExec(const char *path, int argc, const char **argv); void FlushCache(void); void b_setjmp(struct JMP_BUF *buf); void b_longjmp(const struct JMP_BUF *buf, int param); -void SetDefaultExitFromException(void); -void SetCustomExitFromException(const struct JMP_BUF *buf); +void ResetEntryInt(void); +void HookEntryInt(const struct JMP_BUF *buf); void ReturnFromException(void); +int SetConf(int evcb, int tcb, uint32_t sp); +void GetConf(int *evcb, int *tcb, uint32_t *sp); +void SetMem(int size); + int GetSystemInfo(int index); void *GetB0Table(void); void *GetC0Table(void); -void *_kernel_malloc(int size); -void _kernel_free(void *ptr); +void *alloc_kernel_memory(int size); +void free_kernel_memory(void *ptr); void _boot(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h index 503bc83..fc9c391 100644 --- a/libpsn00b/include/psxcd.h +++ b/libpsn00b/include/psxcd.h @@ -21,8 +21,7 @@ * library extension is considered for future development. */ -#ifndef __PSXCD_H -#define __PSXCD_H +#pragma once #include <stdint.h> @@ -811,6 +810,48 @@ int CdMode(void); int CdStatus(void); /** + * @brief Returns the CD-ROM controller's region code. + * + * @details Reads region information from the drive using a CdlTest command. + * This can be used to reliably determine the system's region without having to + * resort to workarounds like probing the BIOS ROM. + * + * This function may return incorrect results and trigger error callbacks on + * emulators or consoles equipped with CD-ROM drive emulation devices such as + * the PSIO. It is not affected by modchips. + * + * @return Region code or 0 if the region cannot be determined + */ +CdlRegionCode CdGetRegion(void); + +/** + * @brief Attempts to disable the CD-ROM controller's region check. + * + * @details Sends undocumented commands to the drive in an attempt to disable + * the region string check, in order to allow reading data from non-PS1 discs + * as well as CD-Rs without needing a modchip. As unlocking commands are region + * specific, the drive's region must be obtained beforehand using CdGetRegion() + * and passed to this function. The unlock persists even if the lid is opened, + * but not if a CdlReset command is issued. + * + * Unlocking is only supported on US, European and Net Yaroze consoles (not on + * Japanese models, devkits and most emulators). This function will return 1 + * without doing anything if CdlRegionDebug is passed as region, as debug + * consoles can already read unlicensed discs. + * + * NOTE: if any callbacks were set using CdReadyCallback() or CdSyncCallback() + * prior to calling CdUnlock(), they will be called with an error code as part + * of the unlocking sequence, even if the unlock was successful. It is thus + * recommended to call this function before setting any callbacks. + * + * @param region + * @return 1 if the drive was successfully unlocked, 0 otherwise + * + * @see CdGetRegion() + */ +int CdUnlock(CdlRegionCode region); + +/** * @brief Retrieves the disc's table of contents. * * @details Retrieves the track entries from a CD's table of contents (TOC). The @@ -832,21 +873,6 @@ int CdStatus(void); int CdGetToc(CdlLOC *toc); /** - * @brief Returns the CD-ROM controller's region code. - * - * @details Attempts to fetch region information from the drive using a CdlTest - * command. This can be used to reliably determine the system's region without - * having to resort to workarounds like probing the BIOS ROM. - * - * This function may return incorrect results on emulators or consoles equipped - * with CD-ROM drive emulation devices such as the PSIO. It is not affected by - * modchips. - * - * @return Region code or 0 if the region cannot be determined - */ -CdlRegionCode CdGetRegion(void); - -/** * @brief Sets the CD-ROM volume mixing matrix. * * @details Sets the volume levels of the CD-ROM drive's audio output (used for @@ -1046,5 +1072,3 @@ int CdLoadSession(int session); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h index ae4611e..8dd1dd5 100644 --- a/libpsn00b/include/psxetc.h +++ b/libpsn00b/include/psxetc.h @@ -1,6 +1,6 @@ /* * PSn00bSDK interrupt management library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ /** @@ -13,8 +13,7 @@ * separate header). */ -#ifndef __PSXETC_H -#define __PSXETC_H +#pragma once /* IRQ and DMA channel definitions */ @@ -68,7 +67,7 @@ extern "C" { * | ID | Channel | Used by | * | --: | :--------------- | :-------------------------------------- | * | 0 | IRQ_VBLANK | psxgpu (use VSyncCallback() instead) | - * | 1 | IRQ_GPU | | + * | 1 | IRQ_GPU | psxgpu (use DrawSyncCallback() instead) | * | 2 | IRQ_CD | psxcd (use CdReadyCallback() instead) | * | 3 | IRQ_DMA | psxetc (use DMACallback() instead) | * | 4 | IRQ_TIMER0 | | @@ -156,19 +155,50 @@ void *DMACallback(DMA_Channel dma, void (*func)(void)); void *GetDMACallback(DMA_Channel dma); /** - * @brief Initializes the interrupt dispatcher. + * @brief Enables, disables or sets the priority of a DMA channel. + * + * @details Enables the specified DMA channel and configures its priority (if + * priority >= 0) or disables it (if priority = -1). The priority value must be + * in 0-7 range, with 0 being the highest priority and 7 the lowest. + * + * All channels are disabled upon calling ResetCallback(); most libraries will + * re-enable them as needed. By default the priority is set to 3 for all + * channels. + * + * @param dma + * @param priority Priority in 0-7 range or -1 to disable the channel + * @return Previously set priority in 0-7 range, -1 if the channel was disabled + */ +int SetDMAPriority(DMA_Channel dma, int priority); + +/** + * @brief Gets the priority of a DMA channel. + * + * @details Returns the currently set priority value for the specified DMA + * channel in 0-7 range, with 0 being the highest priority and 7 the lowest. + * Returns -1 if the channel is not enabled. + * + * @param dma + * @return Priority in 0-7 range, -1 if the channel is disabled + * + * @see SetDMAPriority() + */ +int GetDMAPriority(DMA_Channel dma); + +/** + * @brief Initializes the interrupt dispatcher and DMA controller. * * @details Sets up the interrupt handling system, hooks the BIOS to dispatch - * interrupts to the library and clears all registered callbacks. This function - * must be called once at the beginning of the program, prior to registering - * any IRQ or DMA callbacks. + * interrupts to the library, clears all registered callbacks and disables all + * DMA channels. This function must be called once at the beginning of the + * program, prior to registering any IRQ or DMA callbacks. * * ResetCallback() is called by psxgpu's ResetGraph(), so invoking it manually * is usually not required. Calling ResetCallback() after ResetGraph() will * actually result in improper initialization, as ResetGraph() registers * several callbacks used internally by psxgpu. * - * @return 0 or -1 if the was already initialized + * @return 0 or -1 if the dispatcher was already initialized */ int ResetCallback(void); @@ -196,6 +226,11 @@ void RestartCallback(void); * Note that interrupts are (obviously) disabled until RestartCallback() is * called. * + * WARNING: any ongoing background processing or DMA transfer must be stopped + * before calling StopCallback(), otherwise crashes may occur. This includes + * flushing psxgpu's command queue using DrawSync(), stopping CD-ROM reading + * and calling StopPAD() to disable the BIOS controller driver if used. + * * @see RestartCallback() */ void StopCallback(void); @@ -203,5 +238,3 @@ void StopCallback(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 26e560f..d7f1ad5 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -1,10 +1,26 @@ /* * PSn00bSDK GPU library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXGPU_H -#define __PSXGPU_H +/** + * @file psxgpu.h + * @brief GPU library header + * + * @details This library provides access to the PS1's GPU through a fully + * asynchronous command queue, which allows GPU commands to be batched and sent + * efficiently in the background without stalling the CPU. Helper structures + * and macros to initialize, generate and link GPU display lists in memory are + * also provided, in addition to support for asynchronous VRAM data transfers + * and a debug font API that can be used to easily draw text overlays for + * debugging purposes. + * + * This library is for the most part a drop-in replacement for the official + * SDK's GPU library and is only missing a handful of functions, mainly related + * to Kanji debug fonts and command queue pausing. + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -28,6 +44,11 @@ typedef enum _GPU_VideoMode { MODE_PAL = 1 } GPU_VideoMode; +typedef enum _GPU_DrawOpType { + DRAWOP_TYPE_DMA = 1, + DRAWOP_TYPE_GPU_IRQ = 2 +} GPU_DrawOpType; + /* Structure macros */ #define setVector(v, _x, _y, _z) \ @@ -83,7 +104,7 @@ typedef enum _GPU_VideoMode { (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ (p)->u2 = (_u2), (p)->v2 = (_v2) - + #define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \ (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ @@ -101,9 +122,12 @@ typedef enum _GPU_VideoMode { #define setlen(p, _len) (((P_TAG *) (p))->len = (uint8_t) (_len)) #define setaddr(p, _addr) (((P_TAG *) (p))->addr = (uint32_t) (_addr)) #define setcode(p, _code) (((P_TAG *) (p))->code = (uint8_t) (_code)) +#define setcode_T(p, _code) (((P_TAG_T *) (p))->code = (uint8_t) (_code)) + #define getlen(p) (((P_TAG *) (p))->len) #define getaddr(p) (((P_TAG *) (p))->addr) #define getcode(p) (((P_TAG *) (p))->code) +#define getcode_T(p) (((P_TAG_T *) (p))->code) #define nextPrim(p) (void *) (0x80000000 | (((P_TAG *) (p))->addr)) #define isendprim(p) ((((P_TAG *) (p))->addr) == 0xffffff) @@ -114,16 +138,20 @@ typedef enum _GPU_VideoMode { #define setSemiTrans(p, abe) \ ((abe) ? (getcode(p) |= 2) : (getcode(p) &= ~2)) +#define setSemiTrans_T(p, abe) \ + ((abe) ? (getcode_T(p) |= 2) : (getcode_T(p) &= ~2)) #define setShadeTex(p, tge) \ ((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1)) +#define setShadeTex_T(p, tge) \ + ((tge) ? (getcode_T(p) |= 1) : (getcode_T(p) &= ~1)) #define getTPage(tp, abr, x, y) ( \ - (((x) / 64) & 15) | \ - ((((y) / 256) & 1) << 4) | \ - (((abr) & 3) << 5) | \ - (((tp) & 3) << 7) | \ - ((((y) / 512) & 1) << 11) \ + (((x) & 0x3c0) >> 6) | \ + (((y) & 0x100) >> 4) | \ + (((y) & 0x200) << 2) | \ + (((abr) & 3) << 5) | \ + (((tp) & 3) << 7) \ ) #define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f)) @@ -147,59 +175,109 @@ typedef enum _GPU_VideoMode { #define setTile(p) setlen(p, 3), setcode(p, 0x60) #define setLineF2(p) setlen(p, 3), setcode(p, 0x40) #define setLineG2(p) setlen(p, 4), setcode(p, 0x50) -#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), \ - (p)->pad = 0x55555555 -#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), \ - (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0 -#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), \ - (p)->pad = 0x55555555 -#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), \ - (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 -#define setFill(p) setlen(p, 3), setcode(p, 0x02) -#define setVram2Vram(p) setlen(p, 8), setcode(p, 0x80), \ +#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), (p)->pad = 0x55555555 +#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0 +#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), (p)->pad = 0x55555555 +#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 +#define setFill(p) setlen(p, 3), setcode(p, 0x02) +#define setBlit(p) setlen(p, 8), setcode(p, 0x80), \ (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0 -#define setDrawTPage(p, dfe, dtd, tpage) \ - setlen(p, 1), \ +#define setPolyF3_T(p) setcode_T(p, 0x20) +#define setPolyFT3_T(p) setcode_T(p, 0x24) +#define setPolyG3_T(p) setcode_T(p, 0x30) +#define setPolyGT3_T(p) setcode_T(p, 0x34) +#define setPolyF4_T(p) setcode_T(p, 0x28) +#define setPolyFT4_T(p) setcode_T(p, 0x2c) +#define setPolyG4_T(p) setcode_T(p, 0x38) +#define setPolyGT4_T(p) setcode_T(p, 0x3c) +#define setSprt8_T(p) setcode_T(p, 0x74) +#define setSprt16_T(p) setcode_T(p, 0x7c) +#define setSprt_T(p) setcode_T(p, 0x64) +#define setTile1_T(p) setcode_T(p, 0x68) +#define setTile8_T(p) setcode_T(p, 0x70) +#define setTile16_T(p) setcode_T(p, 0x78) +#define setTile_T(p) setcode_T(p, 0x60) +#define setLineF2_T(p) setcode_T(p, 0x40) +#define setLineG2_T(p) setcode_T(p, 0x50) +#define setLineF3_T(p) setcode_T(p, 0x48), (p)->pad = 0x55555555 +#define setLineG3_T(p) setcode_T(p, 0x58), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0 +#define setLineF4_T(p) setcode_T(p, 0x4c), (p)->pad = 0x55555555 +#define setLineG4_T(p) setcode_T(p, 0x5c), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 +#define setFill_T(p) setcode_T(p, 0x02) +#define setBlit_T(p) setcode_T(p, 0x80), \ + (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0 + +#define setDrawTPage_T(p, dfe, dtd, tpage) \ (p)->code[0] = (0xe1000000 | \ (tpage) | \ - ((dtd) << 9) | \ - ((dfe) << 10) \ + (((dtd) & 1) << 9) | \ + (((dfe) & 1) << 10) \ ) +#define setDrawTPage(p, dfe, dtd, tpage) \ + setlen(p, 1), setDrawTPage_T(p, dfe, dtd, tpage) -#define setDrawOffset(p, _x, _y) \ - setlen(p, 1), \ - (p)->code[0] = (0xe5000000 | \ - ((_x) % 1024) | \ - (((_y) % 1024) << 11) \ +#define setTexWindow_T(p, r) \ + (p)->code[0] = (0xe2000000 | \ + ((r)->w & 0x1f) | \ + (((r)->h & 0x1f) << 5) | \ + (((r)->x & 0x1f) << 10) | \ + (((r)->y & 0x1f) << 15) \ ) +#define setTexWindow(p, r) \ + setlen(p, 1), setTexWindow_T(p, r) -#define setDrawMask(p, sb, mt) \ - setlen(p, 1), \ - (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1)) - -#define setDrawArea(p, r) \ - setlen(p, 2), \ +#define setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) \ (p)->code[0] = (0xe3000000 | \ - ((r)->x % 1024) | \ - (((r)->y % 1024) << 10) \ + ((_x0) & 0x3ff) | \ + (((_y0) & 0x3ff) << 10) \ ), \ (p)->code[1] = (0xe4000000 | \ - (((r)->x + (r)->w - 1) % 1024) | \ - ((((r)->y + (r)->h - 1) % 1024) << 10) \ + ((_x1) & 0x3ff) | \ + (((_y1) & 0x3ff) << 10) \ ) +#define setDrawAreaXY(p, _x0, _y0, _x1, _y1) \ + setlen(p, 2), setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) + +#define setDrawArea_T(p, r) \ + setDrawAreaXY_T(p, \ + (r)->x, \ + (r)->y, \ + (r)->x + (r)->w - 1, \ + (r)->y + (r)->h - 1 \ + ) +#define setDrawArea(p, r) \ + setlen(p, 2), setDrawArea_T(p, r) -#define setTexWindow(p, r) \ - setlen(p, 1), \ - (p)->code[0] = (0xe2000000 | \ - ((r)->w % 32) | \ - (((r)->h % 32) << 5) | \ - (((r)->x % 32) << 10) | \ - (((r)->y % 32) << 15) \ +#define setDrawOffset_T(p, _x, _y) \ + (p)->code[0] = (0xe5000000 | \ + ((_x) & 0x7ff) | \ + (((_y) & 0x7ff) << 11) \ ) +#define setDrawOffset(p, _x, _y) \ + setlen(p, 1), setDrawOffset_T(p, _x, _y) + +#define setDrawStp_T(p, pbw, mt) \ + (p)->code[0] = (0xe6000000 | (pbw) | ((mt) << 1)) +#define setDrawStp(p, pbw, mt) \ + setlen(p, 1), setDrawStp_T(p, pbw, mt) + +#define setDrawIRQ_T(p) \ + (p)->code[0] = 0x1f000000 +#define setDrawIRQ(p) \ + setlen(p, 1), setDrawIRQ_T(p) /* Primitive structure definitions */ +typedef struct _P_TAG_T { + uint32_t color:24; + uint32_t code:8; +} P_TAG_T; + typedef struct _P_TAG { uint32_t addr:24; uint32_t len:8; @@ -212,25 +290,31 @@ typedef struct _P_COLOR { uint32_t pad:8; } P_COLOR; -typedef struct _POLY_F3 { - uint32_t tag; +// These macros are used to define two variants of each primitive, a regular one +// and a "tagless" one (_T suffix) without the OT/display list header. +#define _DEF_PRIM(name, ...) \ + typedef struct _##name##_T { __VA_ARGS__ } name##_T; \ + typedef struct _##name { uint32_t tag; __VA_ARGS__ } name; +#define _DEF_ALIAS(name, target) \ + typedef struct _##target##_T name##_T; \ + typedef struct _##target name; + +_DEF_PRIM(POLY_F3, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; -} POLY_F3; +) -typedef struct _POLY_F4 { - uint32_t tag; +_DEF_PRIM(POLY_F4, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; int16_t x3, y3; -} POLY_F4; +) -typedef struct _POLY_FT3 { - uint32_t tag; +_DEF_PRIM(POLY_FT3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -241,10 +325,9 @@ typedef struct _POLY_FT3 { int16_t x2, y2; uint8_t u2, v2; uint16_t pad; -} POLY_FT3; +) -typedef struct _POLY_FT4 { - uint32_t tag; +_DEF_PRIM(POLY_FT4, uint8_t r0, g0, b0, code; uint16_t x0, y0; uint8_t u0, v0; @@ -258,20 +341,18 @@ typedef struct _POLY_FT4 { int16_t x3, y3; uint8_t u3, v3; uint16_t pad1; -} POLY_FT4; +) -typedef struct _POLY_G3 { - uint32_t tag; +_DEF_PRIM(POLY_G3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, pad0; int16_t x1, y1; uint8_t r2, g2, b2, pad1; int16_t x2, y2; -} POLY_G3; +) -typedef struct _POLY_G4 { - uint32_t tag; +_DEF_PRIM(POLY_G4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, pad0; @@ -280,10 +361,9 @@ typedef struct _POLY_G4 { int16_t x2, y2; uint8_t r3, g3, b3, pad2; int16_t x3, y3; -} POLY_G4; +) -typedef struct _POLY_GT3 { - uint32_t tag; +_DEF_PRIM(POLY_GT3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -296,10 +376,9 @@ typedef struct _POLY_GT3 { int16_t x2, y2; uint8_t u2, v2; uint16_t pad2; -} POLY_GT3; +) -typedef struct _POLY_GT4 { - uint32_t tag; +_DEF_PRIM(POLY_GT4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -316,34 +395,30 @@ typedef struct _POLY_GT4 { int16_t x3, y3; uint8_t u3, v3; uint16_t pad4; -} POLY_GT4; +) -typedef struct _LINE_F2 { - uint32_t tag; +_DEF_PRIM(LINE_F2, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; -} LINE_F2; +) -typedef struct _LINE_G2 { - uint32_t tag; +_DEF_PRIM(LINE_G2, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; int16_t x1, y1; -} LINE_G2; +) -typedef struct _LINE_F3 { - uint32_t tag; +_DEF_PRIM(LINE_F3, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; uint32_t pad; -} LINE_F3; +) -typedef struct _LINE_G3 { - uint32_t tag; +_DEF_PRIM(LINE_G3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; @@ -351,20 +426,18 @@ typedef struct _LINE_G3 { uint8_t r2, g2, b2, p2; int16_t x2, y2; uint32_t pad; -} LINE_G3; +) -typedef struct _LINE_F4 { - uint32_t tag; +_DEF_PRIM(LINE_F4, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; int16_t x3, y3; uint32_t pad; -} LINE_F4; +) -typedef struct _LINE_G4 { - uint32_t tag; +_DEF_PRIM(LINE_G4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; @@ -374,88 +447,80 @@ typedef struct _LINE_G4 { uint8_t r3, g3, b3, p3; int16_t x3, y3; uint32_t pad; -} LINE_G4; +) -typedef struct _TILE { - uint32_t tag; +_DEF_PRIM(TILE, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t w, h; -} TILE; +) -struct _TILE_FIXED { - uint32_t tag; +_DEF_PRIM(TILE_1, uint8_t r0, g0, b0, code; int16_t x0, y0; -}; -typedef struct _TILE_FIXED TILE_1; -typedef struct _TILE_FIXED TILE_8; -typedef struct _TILE_FIXED TILE_16; +) +_DEF_ALIAS(TILE_8, TILE_1) +_DEF_ALIAS(TILE_16, TILE_1) -typedef struct _SPRT { - uint32_t tag; +_DEF_PRIM(SPRT, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; uint16_t clut; uint16_t w, h; -} SPRT; +) -struct _SPRT_FIXED { - uint32_t tag; +_DEF_PRIM(SPRT_1, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; uint16_t clut; -}; -typedef struct _SPRT_FIXED SPRT_8; -typedef struct _SPRT_FIXED SPRT_16; - -typedef struct _DR_ENV { - uint32_t tag; - uint32_t code[8]; -} DR_ENV; - -typedef struct _DR_AREA { - uint32_t tag; - uint32_t code[2]; -} DR_AREA; - -typedef struct _DR_OFFSET { - uint32_t tag; - uint32_t code[1]; -} DR_OFFSET; - -typedef struct _DR_TWIN { - uint32_t tag; - uint32_t code[2]; -} DR_TWIN; - -typedef struct _DR_TPAGE { - uint32_t tag; - uint32_t code[1]; -} DR_TPAGE; - -typedef struct _DR_MASK { - uint32_t tag; - uint32_t code[1]; -} DR_MASK; +) +_DEF_ALIAS(SPRT_8, SPRT_1) +_DEF_ALIAS(SPRT_16, SPRT_1) -typedef struct _FILL { - uint32_t tag; +_DEF_PRIM(FILL, uint8_t r0, g0, b0, code; - uint16_t x0, y0; // Note: coordinates must be in 16 pixel steps + uint16_t x0, y0; uint16_t w, h; -} FILL; +) -typedef struct _VRAM2VRAM { - uint32_t tag; +_DEF_PRIM(DR_MOVE, uint8_t p0, p1, p2, code; uint16_t x0, y0; uint16_t x1, y1; uint16_t w, h; - uint32_t pad[4]; -} VRAM2VRAM; +) + +_DEF_PRIM(DR_AREA, + uint32_t code[2]; +) +_DEF_PRIM(DR_OFFSET, + uint32_t code[1]; +) +_DEF_PRIM(DR_TWIN, + uint32_t code[1]; +) +_DEF_PRIM(DR_TPAGE, + uint32_t code[1]; +) +_DEF_PRIM(DR_STP, + uint32_t code[1]; +) +_DEF_PRIM(DR_IRQ, + uint32_t code[1]; +) + +_DEF_PRIM(DR_ENV, + DR_TPAGE_T tpage; + DR_TWIN_T twin; + DR_AREA_T area; + DR_OFFSET_T offset; + FILL_T fill; +) + +#undef _DEF_PRIM +#undef _DEF_ALIAS /* Structure definitions */ @@ -478,13 +543,13 @@ typedef struct _DISPENV { typedef struct _DRAWENV { RECT clip; // Drawing area int16_t ofs[2]; // GPU draw offset (relative to draw area) - RECT tw; // Texture window (doesn't do anything atm) + RECT tw; // Texture window uint16_t tpage; // Initial tpage value uint8_t dtd; // Dither processing flag (simply OR'ed to tpage) uint8_t dfe; // Drawing to display area blocked/allowed (simply OR'ed to tpage) uint8_t isbg; // Clear draw area if non-zero uint8_t r0, g0, b0; // Draw area clear color (if isbg iz nonzero) - DR_ENV dr_env; // Draw mode packet area (used by PutDrawEnv) + DR_ENV dr_env; // GPU primitive cache area (used internally) } DRAWENV; typedef struct _TIM_IMAGE { @@ -521,31 +586,35 @@ void PutDrawEnv(DRAWENV *env); void PutDrawEnvFast(DRAWENV *env); int GetODE(void); +int IsIdleGPU(int timeout); int VSync(int mode); void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); -int EnqueueDrawOp( - void (*func)(uint32_t, uint32_t, uint32_t), - uint32_t arg1, - uint32_t arg2, - uint32_t arg3 -); +void SetDrawOpType(GPU_DrawOpType type); +int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3); int DrawSync(int mode); void *DrawSyncCallback(void (*func)(void)); int LoadImage(const RECT *rect, const uint32_t *data); int StoreImage(const RECT *rect, uint32_t *data); -//int MoveImage(const RECT *rect, int x, int y); +int MoveImage(const RECT *rect, int x, int y); void LoadImage2(const RECT *rect, const uint32_t *data); void StoreImage2(const RECT *rect, uint32_t *data); -//void MoveImage2(const RECT *rect, int x, int y); +void MoveImage2(const RECT *rect, int x, int y); void ClearOTagR(uint32_t *ot, size_t length); void ClearOTag(uint32_t *ot, size_t length); int DrawOTag(const uint32_t *ot); +int DrawOTagIRQ(const uint32_t *ot); int DrawOTagEnv(const uint32_t *ot, DRAWENV *env); +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env); void DrawOTag2(const uint32_t *ot); +void DrawOTagIRQ2(const uint32_t *ot); +int DrawBuffer(const uint32_t *buf, size_t length); +int DrawBufferIRQ(const uint32_t *buf, size_t length); +void DrawBuffer2(const uint32_t *buf, size_t length); +void DrawBufferIRQ2(const uint32_t *buf, size_t length); void DrawPrim(const uint32_t *pri); void AddPrim(uint32_t *ot, const void *pri); @@ -565,5 +634,3 @@ char *FntFlush(int id); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h index 91dfd6a..2200a55 100644 --- a/libpsn00b/include/psxgte.h +++ b/libpsn00b/include/psxgte.h @@ -14,8 +14,7 @@ * registers and issue commands to the GTE. */ -#ifndef __PSXGTE_H -#define __PSXGTE_H +#pragma once #include <stdint.h> @@ -259,5 +258,3 @@ void Square0(VECTOR *v0, VECTOR *v1); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxkernel.h b/libpsn00b/include/psxkernel.h deleted file mode 100644 index 0c55bcb..0000000 --- a/libpsn00b/include/psxkernel.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _PSXKERNEL_H -#define _PSXKERNEL_H - -// Event descriptors -#define DescMask 0xff000000 // Event descriptor mask -#define DescTH DescMask -#define DescHW 0xf0000000 // Hardware event (IRQ) -#define DescEV 0xf1000000 // Event event -#define DescRC 0xf2000000 // Root counter event -#define DescUEV 0xf3000000 // User event -#define DescSW 0xf4000000 // BIOS event - -// Hardware events -#define HwVBLANK (DescHW|0x01) // VBlank -#define HwGPU (DescHW|0x02) // GPU -#define HwCdRom (DescHW|0x03) // CDROM -#define HwDMAC (DescHW|0x04) // DMA -#define HwRTC0 (DescHW|0x05) // Timer 0 -#define HwRTC1 (DescHW|0x06) // Timer 1 -#define HwRTC2 (DescHW|0x07) // Timer 2 -#define HwCNTL (DescHW|0x08) // Controller -#define HwSPU (DescHW|0x09) // SPU -#define HwPIO (DescHW|0x0a) // PIO & lightgun -#define HwSIO (DescHW|0x0b) // Serial - -#define HwCPU (DescHW|0x10) // Processor exception -#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions) -#define HwCard_0 (DescHW|0x12) // Memory card (unused) -#define HwCard_1 (DescHW|0x13) // Memory card (unused) -#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions) -#define SwMATH (DescSW|0x02) // Libmath related apparently, unknown purpose - -#define RCntCNT0 (DescRC|0x00) // Root counter 0 (dot clock) -#define RCntCNT1 (DescRC|0x01) // Horizontal sync -#define RCntCNT2 (DescRC|0x02) // 1/8 of system clock -#define RCntCNT3 (DescRC|0x03) // Vertical blank - -#define RCntMdINTR 0x1000 // General interrupt -#define RCntMdNOINTR 0x2000 // New device -#define RCntMdSC 0x0001 // Counter becomes zero -#define RCntMdSP 0x0000 // Unknown purpose -#define RCntMdFR 0x0000 -#define RCntMdGATE 0x0010 // Command acknowledged - -#endif // _PSXKERNEL_H
\ No newline at end of file diff --git a/libpsn00b/include/psxpad.h b/libpsn00b/include/psxpad.h index 32f7f8a..09f28c4 100644 --- a/libpsn00b/include/psxpad.h +++ b/libpsn00b/include/psxpad.h @@ -11,8 +11,7 @@ * Reference: https://gist.github.com/scanlime/5042071 */ -#ifndef _PSXPAD_H -#define _PSXPAD_H +#pragma once #include <stdint.h> @@ -234,5 +233,3 @@ typedef struct __attribute__((packed)) _MemCardRequest { uint8_t checksum; // = lba_h ^ lba_l ^ data (CMD_WRITE only) uint8_t dummy2[3]; } MemCardRequest; - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index dc1d52c..f26e030 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -1,6 +1,6 @@ /* * PSn00bSDK MDEC library - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ /** @@ -17,11 +17,12 @@ * FMV playback is not part of this library per se, but can implemented using * the APIs defined here alongside some code to stream data from the CD drive. * - * Currently only version 1 and 2 .BS files are supported. + * Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI + * bitstreams are not supported, but no encoder is publicly available for those + * anyway. */ -#ifndef __PSXPRESS_H -#define __PSXPRESS_H +#pragma once #include <stdint.h> #include <stddef.h> @@ -34,28 +35,26 @@ typedef struct _DECDCTENV { int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point) } DECDCTENV; -// This is the "small" lookup table used by DecDCTvlc(). It can be copied to -// the scratchpad. +typedef struct _VLC_TableV2 { + uint16_t ac0[2]; + uint32_t ac2[8], ac3[64]; + uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; + uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; +} VLC_TableV2; + +typedef struct _VLC_TableV3 { + uint16_t ac0[2]; + uint32_t ac2[8], ac3[64]; + uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; + uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; + uint8_t dc[128], dc_len[9]; + uint8_t _reserved[3]; +} VLC_TableV3; + typedef struct _DECDCTTAB { - uint16_t lut0[2]; - uint32_t lut2[8]; - uint32_t lut3[64]; - uint16_t lut4[8]; - uint16_t lut5[8]; - uint16_t lut7[16]; - uint16_t lut8[32]; - uint16_t lut9[32]; - uint16_t lut10[32]; - uint16_t lut11[32]; - uint16_t lut12[32]; + uint32_t ac[8192], ac00[512]; } DECDCTTAB; -// This is the "large" table used by DecDCTvlc2(). -typedef struct _DECDCTTAB2 { - uint32_t lut[8192]; - uint32_t lut00[512]; -} DECDCTTAB2; - typedef enum _DECDCTMODE { DECDCT_MODE_24BPP = 1, DECDCT_MODE_16BPP = 0, @@ -66,8 +65,9 @@ typedef enum _DECDCTMODE { typedef struct _VLC_Context { const uint32_t *input; uint32_t window, next_window, remaining; - uint16_t quant_scale; int8_t is_v3, bit_offset, block_index, coeff_index; + uint16_t quant_scale; + int16_t last_y, last_cr, last_cb; } VLC_Context; // Despite what some docs claim, the "number of 32-byte blocks" and "always @@ -233,8 +233,9 @@ int DecDCToutSync(int mode); * frame) into a buffer that can be passed to DecDCTin(). This function uses a * small (<1 KB) lookup table combined with the GTE to accelerate the process; * performance is roughly on par with DecDCTvlcStart2() if the lookup table - * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTable(). The - * contents of the GTE's LZCR register, if any, will be destroyed. + * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTableV2() or + * DecDCTvlcCopyTableV3(). The contents of the GTE's LZCS and LZCR registers, + * if any, will be destroyed. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the @@ -244,8 +245,6 @@ int DecDCToutSync(int mode); * can be different). If max_size = 0, the entire frame will always be decoded * in one shot. * - * Only bitstream version 2 is currently supported. - * * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the * first time. Attempting to call this function with the GTE disabled will * result in a crash. @@ -256,7 +255,7 @@ int DecDCToutSync(int mode); * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure * - * @see DecDCTvlcContinue(), DecDCTvlcCopyTable() + * @see DecDCTvlcContinue(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); @@ -275,7 +274,8 @@ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint3 * context returned 0; in that case the context shall be discarded or reused to * decode another bitstream. * - * The contents of the GTE's LZCR register, if any, will be destroyed. + * The contents of the GTE's LZCS and LZCR registers, if any, will be + * destroyed. * * See DecDCTvlcStart() for more details. * @@ -309,7 +309,7 @@ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); * @param buf * @return 0, 1 if more data needs to be output or -1 in case of failure * - * @see DecDCTvlcSize(), DecDCTvlcCopyTable() + * @see DecDCTvlcSize(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); @@ -332,23 +332,60 @@ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); size_t DecDCTvlcSize(size_t size); /** - * @brief Moves the lookup table used by the .BS decompressor to the scratchpad - * region. + * @brief Copies the lookup tables used by the .BS decompressor (v1/v2) to the + * scratchpad region. * - * @details Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), - * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified - * address. A copy of this table is always present in main RAM, however this - * function can be used to copy it to the scratchpad region to boost - * decompression performance. + * @details Copies the lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this + * table is always present in main RAM, however this function can be used to + * copy it to the scratchpad region to boost decompression performance. + * + * This function copies a 676-byte table (VLC_TableV2 structure) containing + * only the data necessary for decoding version 1 and 2 bitstreams, to help + * save scratchpad space. If support for version 3 is required, + * DecDCTvlcCopyTableV3() can be used instead to copy the full 816-byte table. * * The address passed to this function is saved. Calls to DecDCTvlcStart(), * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table - * copied. Call DecDCTvlcCopyTable(0) to revert to using the library's internal - * table in main RAM. + * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to + * using the library's internal table in main RAM. + * + * WARNING: attempting to decode a version 3 .BS file or .STR frame after + * calling this function will result in undefined behavior and potentially a + * crash. To re-enable version 3 decoding, use DecDCTvlcCopyTableV3() to copy + * the full table to the scratchpad or revert to using the built-in table in + * main RAM. + * + * @param addr Pointer to free 676-byte area in scratchpad region or 0 to reset * - * @param addr Pointer to free area in scratchpad region or 0 to reset + * @see DecDCTvlcCopyTableV3() */ -void DecDCTvlcCopyTable(DECDCTTAB *addr); +void DecDCTvlcCopyTableV2(VLC_TableV2 *addr); + +/** + * @brief Copies the lookup tables used by the .BS decompressor (v1/v2/v3) to + * the scratchpad region. + * + * @details Copies the lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this + * table is always present in main RAM, however this function can be used to + * copy it to the scratchpad region to boost decompression performance. + * + * This function copies the full 816-byte table (VLC_TableV3 structure), + * including the data used to decode version 3 bitstreams. If support for + * version 3 is not required, DecDCTvlcCopyTableV2() can be used instead to + * save scratchpad space by only copying the first 676 bytes of the table. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart(), + * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table + * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to + * using the library's internal table in main RAM. + * + * @param addr Pointer to free 816-byte area in scratchpad region or 0 to reset + * + * @see DecDCTvlcCopyTableV2() + */ +void DecDCTvlcCopyTableV3(VLC_TableV3 *addr); /** * @brief Decompresses or begins decompressing a .BS file into MDEC codes @@ -360,8 +397,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr); * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. * Depending on the specific bitstream being decoded DecDCTvlcStart2() might be * slightly faster or slower than DecDCTvlcStart() with its lookup table copied - * to the scratchpad (see DecDCTvlcCopyTable()). DecDCTvlcStart() with the - * table in main RAM tends to be much slower. + * to the scratchpad (see DecDCTvlcCopyTableV2() and DecDCTvlcCopyTableV3()). + * DecDCTvlcStart() with the table in main RAM tends to be much slower. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the @@ -371,7 +408,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr); * buffer can be different). If max_size = 0, the entire frame will always be * decoded in one shot. * - * Only bitstream version 2 is currently supported. + * This function only supports decoding version 1 and 2 bitstreams. Use + * DecDCTvlcStart() to decode a version 3 bitstream. * * @param ctx Pointer to VLC_Context structure (which will be initialized) * @param buf @@ -432,7 +470,7 @@ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); * * @see DecDCTvlcSize2(), DecDCTvlcBuild() */ -int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table); +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table); /** * @brief Sets the maximum amount of data to be decompressed (alternate @@ -458,7 +496,7 @@ size_t DecDCTvlcSize2(size_t size); * the .BS decompressor. * * @details Generates the lookup table required by DecDCTvlcStart2(), - * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the + * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB structure) into the * specified buffer. Since the table is relatively large (34 KB), it is * recommended to only generate it in a dynamically-allocated buffer when * needed and deallocate the buffer afterwards. @@ -468,10 +506,8 @@ size_t DecDCTvlcSize2(size_t size); * * @param table */ -void DecDCTvlcBuild(DECDCTTAB2 *table); +void DecDCTvlcBuild(DECDCTTAB *table); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxsio.h b/libpsn00b/include/psxsio.h index 449e43a..8932830 100644 --- a/libpsn00b/include/psxsio.h +++ b/libpsn00b/include/psxsio.h @@ -18,8 +18,7 @@ * debugging purposes. */ -#ifndef __PSXSIO_H -#define __PSXSIO_H +#pragma once #include <stdint.h> @@ -280,5 +279,3 @@ void DelSIO(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxsn.h b/libpsn00b/include/psxsn.h new file mode 100644 index 0000000..1acbc18 --- /dev/null +++ b/libpsn00b/include/psxsn.h @@ -0,0 +1,51 @@ +/* + * PSn00bSDK kernel API library (host file access) + * (C) 2023 spicyjpeg - MPL licensed + */ + +/** + * @file psxsn.h + * @brief Host file access API header + * + * @details This header provides stubs for the PCDRV API, which grants read and + * write access to a directory on the host's filesystem when the executable is + * running on an emulator or through a debugger that supports the PCDRV + * protocol, such as Unirom or pcsx-redux. These functions are completely + * separate and independent from the BIOS file API and do not register any + * device drivers. + * + * Note that in the official SDK these functions are provided by libsn, while + * in PSn00bSDK they are part of libpsxapi. + */ + +#pragma once + +#include <stddef.h> + +typedef enum _PCDRV_OpenMode { + PCDRV_MODE_READ = 0, + PCDRV_MODE_WRITE = 1, + PCDRV_MODE_READ_WRITE = 2 +} PCDRV_OpenMode; + +typedef enum _PCDRV_SeekMode { + PCDRV_SEEK_SET = 0, + PCDRV_SEEK_CUR = 1, + PCDRV_SEEK_END = 2 +} PCDRV_SeekMode; + +#ifdef __cplusplus +extern "C" { +#endif + +int PCinit(void); +int PCcreat(const char *path); +int PCopen(const char *path, PCDRV_OpenMode mode); +int PCclose(int fd); +int PCread(int fd, void *data, size_t length); +int PCwrite(int fd, const void *data, size_t length); +int PClseek(int fd, int offset, PCDRV_SeekMode mode); + +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h index cdc3ac7..b544952 100644 --- a/libpsn00b/include/psxspu.h +++ b/libpsn00b/include/psxspu.h @@ -1,10 +1,25 @@ /* * PSn00bSDK SPU library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXSPU_H -#define __PSXSPU_H +/** + * @file psxspu.h + * @brief SPU library header + * + * @details The PSn00bSDK SPU library allows for SPU initialization, DMA + * transfers (both sample data uploads and capture buffer reads) and provides + * helper macros for accessing SPU control registers, which can be used to + * control sample playback on each channel, configure reverb and enable more + * advanced features such as interrupts. + * + * This library currently has fewer functions than its Sony SDK counterpart, in + * part because it is not yet complete but also since the vast majority of the + * Sony library's functions are redundant, inefficient and can be replaced with + * simple SPU register writes. + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -12,6 +27,7 @@ /* Definitions */ +#if 0 typedef enum _SPU_AttrMask { SPU_VOICE_VOLL = 1 << 0, // Left volume SPU_VOICE_VOLR = 1 << 1, // Right volume @@ -33,6 +49,7 @@ typedef enum _SPU_AttrMask { SPU_VOICE_ADSR_ADSR1 = 1 << 17, SPU_VOICE_ADSR_ADSR2 = 1 << 18 } SPU_AttrMask; +#endif typedef enum _SPU_TransferMode { SPU_TRANSFER_BY_DMA = 0, @@ -46,6 +63,7 @@ typedef enum _SPU_WaitMode { /* Structure definitions */ +#if 0 typedef struct _SpuVolume { int16_t left, right; } SpuVolume; @@ -72,6 +90,7 @@ typedef struct _SpuCommonAttr { SpuVolume mvol, mvolmode, mvolx; SpuExtAttr cd, ext; } SpuCommonAttr; +#endif /* Macros */ @@ -137,11 +156,11 @@ size_t SpuRead(uint32_t *data, size_t size); size_t SpuWrite(const uint32_t *data, size_t size); size_t SpuWritePartly(const uint32_t *data, size_t size); SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode); +SPU_TransferMode SpuGetTransferMode(void); uint32_t SpuSetTransferStartAddr(uint32_t addr); +uint32_t SpuGetTransferStartAddr(void); int SpuIsTransferCompleted(int mode); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/stdio.h b/libpsn00b/include/stdio.h index 8aaf4c7..1bb5b74 100644 --- a/libpsn00b/include/stdio.h +++ b/libpsn00b/include/stdio.h @@ -1,39 +1,26 @@ -#ifndef _STDIO_H -#define _STDIO_H +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ -#include <stdarg.h> +#pragma once -// BIOS seek modes -#ifndef SEEK_SET -#define SEEK_SET 0 -#endif -#ifndef SEEK_CUR -#define SEEK_CUR 1 -#endif -#ifndef SEEK_END -#define SEEK_END 2 /* warning: reportedly buggy */ -#endif +#include <stdarg.h> #ifdef __cplusplus extern "C" { #endif -// The following functions use the BIOS -extern void printf (const char *__format, ...); - -extern int getc(int __fd); -extern int putc(int __char, int __fd); +/* String I/O API (provided by BIOS) */ -#define fputc(__char, __fd) putc(__char, __fd) -#define fgetc(__char, __fd) getc(__char, __fd) +int printf(const char *fmt, ...); +char *gets(char *str); +void puts(const char *str); +int getchar(void); +void putchar(int ch); -// Console TTY -extern void gets(char *__s); -extern void puts(const char *__s); -extern int getchar(void); -extern void putchar(int __c); +/* String formatting API (built-in) */ -// The following functions do not use the BIOS int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap); int vsprintf(char *string, const char *fmt, va_list ap); int sprintf(char *string, const char *fmt, ...); @@ -45,5 +32,3 @@ int sscanf(const char *str, const char *fmt, ...); #ifdef __cplusplus } #endif - -#endif // _STDIO_H
\ No newline at end of file diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h index 049d067..19761df 100644 --- a/libpsn00b/include/stdlib.h +++ b/libpsn00b/include/stdlib.h @@ -1,10 +1,9 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STDLIB_H -#define __STDLIB_H +#pragma once #include <stddef.h> @@ -39,11 +38,11 @@ void srand(int seed); int abs(int j); long labs(long i); -long strtol(const char *nptr, char **endptr, int base); -long long strtoll(const char *nptr, char **endptr, int base); -float strtof(const char *nptr, char **endptr); -double strtod(const char *nptr, char **endptr); -long double strtold(const char *nptr, char **endptr); +long strtol(const char *str, char **str_end, int base); +long long strtoll(const char *str, char **str_end, int base); +//float strtof(const char *str, char **str_end); +//double strtod(const char *str, char **str_end); +//long double strtold(const char *str, char **str_end); void InitHeap(void *addr, size_t size); void *sbrk(ptrdiff_t incr); @@ -59,5 +58,3 @@ void free(void *ptr); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/string.h b/libpsn00b/include/string.h index ceee066..6310b1a 100644 --- a/libpsn00b/include/string.h +++ b/libpsn00b/include/string.h @@ -1,37 +1,40 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STRING_H -#define __STRING_H +#pragma once + +#include <stddef.h> #ifdef __cplusplus extern "C" { #endif -int strcmp(const char *dst , const char *src); -int strncmp(const char *dst , const char *src , int len); -char *strpbrk(const char *dst , const char *src); -char *strtok(char *s , char *set); -char *strstr(const char *big , const char *little); - -char *strcat(char *s , const char *append); -char *strncat(char *s , const char *append, int n); -char *strcpy(char *dst , const char *src); -char *strncpy(char *dst , const char *src , int n); -int strlen(const char *s); -char *strchr(const char *s , int c); -char *strrchr(const char *s , int c); - -void *memmove(void *dst , const void *src , int n); -void *memchr(void *s , int c , int n); -void *memcpy(void *dst , const void *src , int n); -void *memset(void *dst , char c , int n); -int memcmp(const void *b1 , const void *b2 , int n); +void *memset(void *dest, int ch, size_t count); +void *memcpy(void *dest, const void *src, size_t count); +void *memccpy(void *dest, const void *src, int ch, size_t count); +void *memmove(void *dest, const void *src, size_t count); +int memcmp(const void *lhs, const void *rhs, size_t count); +void *memchr(const void *ptr, int ch, size_t count); + +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t count); +int strcmp(const char *lhs, const char *rhs); +int strncmp(const char *lhs, const char *rhs, size_t count); +char *strchr(const char *str, int ch); +char *strrchr(const char *str, int ch); +char *strpbrk(const char *str, const char *breakset); +char *strstr(const char *str, const char *substr); + +size_t strlen(const char *str); +char *strcat(char *dest, const char *src); +char *strncat(char *dest, const char *src, size_t count); +char *strdup(const char *str); +char *strndup(const char *str, size_t count); + +char *strtok(char *str, const char *delim); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/strings.h b/libpsn00b/include/strings.h index 7223ab9..0595637 100644 --- a/libpsn00b/include/strings.h +++ b/libpsn00b/include/strings.h @@ -3,8 +3,7 @@ * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STRINGS_H -#define __STRINGS_H +#pragma once #include <string.h> @@ -15,5 +14,3 @@ #define bcmp(b1, b2, len) memcmp(b1, b2, len) #define index(s, c) strchr(s, c) #define rindex(s, c) strrchr(s, c) - -#endif diff --git a/libpsn00b/include/sys/fcntl.h b/libpsn00b/include/sys/fcntl.h index dfbf5b2..54c2d05 100644 --- a/libpsn00b/include/sys/fcntl.h +++ b/libpsn00b/include/sys/fcntl.h @@ -1,8 +1,10 @@ -#ifndef _SYS_FCNTL_H -#define _SYS_FCNTL_H +/* + * PSn00bSDK kernel API library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once -// File control mode flags for BIOS file functions -// (many weren't documented in nocash docs) #define FREAD 0x1 // Read #define FWRITE 0x2 // Write #define FNBLOCK 0x4 // Non-blocking read access @@ -16,5 +18,3 @@ #define FNBUF 0x4000 // No ring buffer and terminal interrupt #define FASYNC 0x8000 // Asynchronous I/O #define FNBLOCKS(a) (a<<16) // Number of blocks? (from nocash docs) - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/sys/ioctl.h b/libpsn00b/include/sys/ioctl.h new file mode 100644 index 0000000..af65e5d --- /dev/null +++ b/libpsn00b/include/sys/ioctl.h @@ -0,0 +1,13 @@ +/* + * PSn00bSDK kernel API library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#define EOF -1 + +#define FIONBLOCK (('f'<<8)|1) +#define FIOCSCAN (('f'<<8)|2) + +#define DIOFORMAT (('d'<<8)|1) diff --git a/libpsn00b/include/sys/types.h b/libpsn00b/include/sys/types.h index da43590..9075b5e 100644 --- a/libpsn00b/include/sys/types.h +++ b/libpsn00b/include/sys/types.h @@ -1,13 +1,13 @@ -#ifndef _TYPES_H -#define _TYPES_H +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ -//#warning "<sys/types.h> and u_* types are deprecated, include <stdint.h> instead" +#pragma once -//#include <stdint.h> +//#warning "<sys/types.h> and u_* types are deprecated, use <stdint.h> instead" typedef unsigned char u_char; typedef unsigned short u_short; typedef unsigned int u_int; typedef unsigned long u_long; - -#endif // _TYPES_H
\ No newline at end of file diff --git a/libpsn00b/libc/memcmp.s b/libpsn00b/libc/memcmp.s deleted file mode 100644 index ec1e729..0000000 --- a/libpsn00b/libc/memcmp.s +++ /dev/null @@ -1,31 +0,0 @@ -# High speed ASM memcmp implementation by Lameguy64 -# -# Part of PSn00bSDK - -.set noreorder - -.section .text - -# Arguments: -# a0 - buffer 1 address -# a1 - buffer 2 address -# a2 - bytes to compare -.global memcmp -.type memcmp, @function -memcmp: - blez $a2, .Lexit - addi $a2, -1 - lbu $v0, 0($a0) - lbu $v1, 0($a1) - addiu $a0, 1 - bne $v0, $v1, .Lmismatch - addiu $a1, 1 - b memcmp - nop -.Lmismatch: - jr $ra - sub $v0, $v1 -.Lexit: - jr $ra - move $v0, $0 -
\ No newline at end of file diff --git a/libpsn00b/libc/memcpy.s b/libpsn00b/libc/memcpy.s deleted file mode 100644 index 26edb37..0000000 --- a/libpsn00b/libc/memcpy.s +++ /dev/null @@ -1,28 +0,0 @@ -# High speed ASM memcpy implementation by Lameguy64 -# -# Part of PSn00bSDK - -.set noreorder - -.section .text - -# Arguments: -# a0 - destination address -# a1 - source adress -# a2 - bytes to copy -.global memcpy -.type memcpy, @function -memcpy: - move $v0, $a0 -.Lloop: - blez $a2, .Lexit - addi $a2, -1 - lbu $a3, 0($a1) - addiu $a1, 1 - sb $a3, 0($a0) - b .Lloop - addiu $a0, 1 -.Lexit: - jr $ra - nop -
\ No newline at end of file diff --git a/libpsn00b/libc/memmove.s b/libpsn00b/libc/memmove.s deleted file mode 100644 index 843ece7..0000000 --- a/libpsn00b/libc/memmove.s +++ /dev/null @@ -1,42 +0,0 @@ -.set noreorder - -.section .text - -# Arguments -# a0 - destination address -# a1 - source address -# a2 - bytes to move -.global memmove -.type memmove, @function -memmove: - move $v0, $a0 - sltu $v1, $a0, $a1 - blez $v1, .Linit_backward -.Lloop_forward: - blez $a2, .Lexit - addi $a2, -1 - lbu $v1, 0($a1) - addiu $a1, 1 - sb $v1, 0($a0) - addiu $a0, 1 - b .Lloop_forward - nop -.Linit_backward: - addu $a0, $a2 - addu $a1, $a2 - addiu $a0, -1 - addiu $a1, -1 - b .Lloop_backward - nop -.Lloop_backward: - blez $a2, .Lexit - addi $a2, -1 - lbu $v1, 0($a1) - addiu $a1, -1 - sb $v1, 0($a0) - addiu $a0, -1 - b .Lloop_backward - nop -.Lexit: - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/libc/start.c b/libpsn00b/libc/start.c index 9ff09c8..dcbad2d 100644 --- a/libpsn00b/libc/start.c +++ b/libpsn00b/libc/start.c @@ -11,11 +11,13 @@ #define KERNEL_ARG_STRING ((const char *) 0x80000180) #define KERNEL_RETURN_VALUE ((volatile int *) 0x8000dffc) -/* Argument parsing */ +/* BIOS argv parser (unused, interferes with child executable argv passing) */ int __argc; const char **__argv; +#if 0 + #define ARGC_MAX 16 static const char *_argv_buffer[ARGC_MAX]; @@ -48,6 +50,8 @@ static void _parse_kernel_args(void) { } } +#endif + /* Main */ // These are defined by the linker script. Note that these are *NOT* pointers, @@ -66,11 +70,10 @@ extern int main(int argc, const char* argv[]); // Even though _start() usually takes no arguments, this implementation allows // parent executables to pass args directly to child executables without having // to overwrite the arg strings in kernel RAM. -void _start_inner(int32_t override_argc, const char **override_argv) { +void _start_inner(int argc, const char **argv) { //__asm__ volatile("la $gp, _gp;"); - // Clear BSS 4 bytes at a time. BSS is always aligned to 4 bytes by the - // linker script. + // BSS is always aligned to 4 bytes by the linker script. for (uint32_t *i = (uint32_t *) __bss_start; i < (uint32_t *) _end; i++) *i = 0; @@ -78,17 +81,14 @@ void _start_inner(int32_t override_argc, const char **override_argv) { // RAM. Note that InitHeap() can be called again in main(). InitHeap((void *) _end + 4, (void *) 0x801ffff8 - (void *) _end); - if (override_argv) { - __argc = override_argc; - __argv = override_argv; - } else { - _parse_kernel_args(); - } + //_parse_kernel_args(); + __argc = argc; + __argv = argv; // Call the global constructors (if any) to initialize global objects // before calling main(). Constructors are put by the linker script in a // length-prefixed array in reverse order. - for (uint32_t i = (uint32_t) __CTOR_LIST__[0]; i >= 1; i--) + for (int i = (int) __CTOR_LIST__[0]; i >= 1; i--) __CTOR_LIST__[i](); // Store main()'s return value into the kernel return value area (for child @@ -96,6 +96,6 @@ void _start_inner(int32_t override_argc, const char **override_argv) { *KERNEL_RETURN_VALUE = main(__argc, __argv); // Call global destructors (in forward order). - for (uint32_t i = 0; i < (uint32_t) __DTOR_LIST__[0]; i++) + for (int i = 0; i < (int) __DTOR_LIST__[0]; i++) __DTOR_LIST__[i + 1](); } diff --git a/libpsn00b/libc/string.c b/libpsn00b/libc/string.c index a1a9a05..dbc2621 100644 --- a/libpsn00b/libc/string.c +++ b/libpsn00b/libc/string.c @@ -1,295 +1,457 @@ /* - * string.c - * - * Inherited from PSXSDK C library + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#include <stdio.h> -#include <string.h> +#include <stdint.h> +#include <stddef.h> #include <stdlib.h> +#include <string.h> // Uncomment to enable strtod(), strtold() and strtof(). Note that these // functions use extremely slow software floats. //#define ALLOW_FLOAT -int tolower(int chr) -{ - return (chr >='A' && chr<='Z') ? (chr + 32) : (chr); +/* Character manipulation */ + +int isprint(int ch) { + return (ch >= ' ') && (ch <= '~'); } -int toupper(int chr) -{ - return (chr >='a' && chr<='z') ? (chr - 32) : (chr); +int isgraph(int ch) { + return (ch > ' ') && (ch <= '~'); } -// Need to be replaced with MIPS assembler equivalents +int isspace(int ch) { + return (ch == ' ') || ((ch >= '\t') && (ch <= '\r')); +} -void *memchr(void *s , int c , int n) -{ - while(n--) - { - if(*((unsigned char*)s) == (unsigned char)c) - return s; - - s++; - } - - return NULL; +int isblank(int ch) { + return (ch == ' ') || (ch == '\t'); } -char *strncpy(char *dst, const char *src, int len) -{ - char *odst=dst; +int isalpha(int ch) { + return ((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z')); +} - while(*src && len) - { - *(dst++) = *(src++); - len--; - } - - *dst = 0; - - return odst; +int isdigit(int ch) { + return (ch >= '0') && (ch <= '9'); } -char *strcpy(char *dst, const char *src) -{ - char *odst = dst; +int tolower(int ch) { + if ((ch >= 'A') && (ch <= 'Z')) + ch += 'a' - 'A'; - while(*(dst++) = *(src++)); - return odst; + return ch; } -char *strcat(char *dst, const char *src) -{ - char *o=dst; - - while(*dst) - dst++; - - strcpy(dst, src); - - return o; +int toupper(int ch) { + if ((ch >= 'a') && (ch <= 'z')) + ch += 'A' - 'a'; + + return ch; } -char *strncat(char *s, const char *append, int len) -{ - char *o=s; - - while(*s) - s++; - - strncpy(s, append, len); - - return o; +/* Memory buffer manipulation */ + +// TODO: replace more of these with optimized assembly implementations + +/*void *memset(void *dest, int ch, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + + for (; count; count--) + *(_dest++) = (uint8_t) ch; + + return dest; +}*/ + +void *memcpy(void *restrict dest, const void *restrict src, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; + + for (; count; count--) + *(_dest++) = *(_src++); + + return dest; } -int strlen(const char *str) -{ - int i = 0; - while(*(str++))i++; - return i; +void *memccpy(void *restrict dest, const void *restrict src, int ch, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; + + for (; count; count--) { + uint8_t a = *(_src++); + + *(_dest++) = a; + if (a == ch) + return (void *) _dest; + } + + return 0; } -char *strchr(const char *s, int c) -{ - int x; +void *memmove(void *dest, const void *src, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; - for(x = 0; x <= strlen(s); x++) - if(s[x] == c) return (char*)&s[x]; + if (_dest == _src) + return dest; + if ((_dest >= &_src[count]) || (&_dest[count] <= _src)) + return memcpy(dest, src, count); - return NULL; + if (_dest < _src) { // Copy forwards + for (; count; count--) + *(_dest++) = *(_src++); + } else { // Copy backwards + _src += count; + _dest += count; + + for (; count; count--) + *(--_dest) = *(--_src); + } + + return dest; } -char *strrchr(const char *s, int c) -{ - int x; +int memcmp(const void *lhs, const void *rhs, size_t count) { + const uint8_t *_lhs = (const uint8_t *) lhs; + const uint8_t *_rhs = (const uint8_t *) rhs; + + for (; count; count--) { + uint8_t a = *(_lhs++), b = *(_rhs++); - for(x = strlen(s); x>=0; x--) - if(s[x] == c) return (char*)&s[x]; + if (a != b) + return a - b; + } - return NULL; + return 0; } -char *strpbrk(const char *s, const char *charset) -{ - int x,y; +void *memchr(const void *ptr, int ch, size_t count) { + const uint8_t *_ptr = (const uint8_t *) ptr; - for(x = 0; x < strlen(s); x++) - for(y = 0; y < strlen(charset); y++) - if(s[x] == charset[y]) return (char*)&s[x]; + for (; count; count--, _ptr++) { + if (*_ptr == ch) + return (void *) _ptr; + } - return NULL; + return 0; } -char *strstr(const char *big, const char *little) -{ - int ls = strlen(little); - int bs = strlen(big); - int x; +/* String manipulation */ - if(ls == 0) - return (char*)big; - - if(ls > bs) - return NULL; +char *strcpy(char *restrict dest, const char *restrict src) { + char *_dest = dest; - for(x = 0; x <= bs-ls; x++) - if(memcmp(little, &big[x], ls) == 0) - return (char*)&big[x]; + while (*src) + *(_dest++) = *(src++); - return NULL; + *_dest = 0; + return dest; } -int strcmp(const char *s1, const char *s2) -{ - while((*s1) && (*s2) && (*s1 == *s2)) - { - s1++; - s2++; +char *strncpy(char *restrict dest, const char *restrict src, size_t count) { + char *_dest = dest; + + for (; count && *src; count--) + *(_dest++) = *(src++); + for (; count; count--) + *(_dest++) = 0; + + return dest; +} + +int strcmp(const char *lhs, const char *rhs) { + for (;;) { + char a = *(lhs++), b = *(rhs++); + + if (a != b) + return a - b; + if (!a && !b) + return 0; + } +} + +int strncmp(const char *lhs, const char *rhs, size_t count) { + for (; count && *lhs && *rhs; count--) { + char a = *(lhs++), b = *(rhs++); + + if (a != b) + return a - b; } - return(*s1-*s2); + return 0; } -int strncmp(const char *s1, const char *s2, int len) -{ - int p = 0; +char *strchr(const char *str, int ch) { + for (; *str; str++) { + if (*str == ch) + return (char *) str; + } - while(*s1 && *s2 && (*s1 == *s2) && p<len) - { - p++; + return 0; +} + +char *strrchr(const char *str, int ch) { + size_t length = strlen(str); + + for (str += length; length; length--) { + str--; + if (*str == ch) + return (char *) str; + } + + return 0; +} + +char *strpbrk(const char *str, const char *breakset) { + for (; *str; str++) { + char a = *str; - if(p<len) - { - s1++; - s2++; + for (const char *ch = breakset; *ch; ch++) { + if (a == *ch) + return (char *) str; } } - return *s1-*s2; + return 0; } -// Requires a malloc implementation -char *strdup(const char *str) -{ - char *ns = (void*)malloc(strlen(str) + 1); +char *strstr(const char *str, const char *substr) { + size_t length = strlen(substr); - if(ns == NULL) - return NULL; - - strcpy(ns, str); - return ns; + if (!length) + return (char *) str; + + for (; *str; str++) { + if (!memcmp(str, substr, length)) + return (char *) str; + } + + return 0; } -char *strndup(const char *str, int len) -{ - int n=strlen(str); - char *ns = (void*)malloc((n+1)>len?len:(n+1)); +size_t strlen(const char *str) { + size_t length = 0; - if(ns == NULL) - return NULL; - - strncpy(ns, str, (n+1)>len?len:(n+1)); - return ns; + for (; *str; str++) + length++; + + return length; } - -long long strtoll(const char *nptr, char **endptr, int base) -{ - int r = 0; - int t = 0; - int n = 0; - - if(*nptr == '-') - { - nptr++; - n = 1; + +// Non-standard, used internally +size_t strnlen(const char *str, size_t count) { + size_t length = 0; + + for (; *str && (length < count); str++) + length++; + + return length; +} + +char *strcat(char *restrict dest, const char *restrict src) { + char *_dest = &dest[strlen(dest)]; + + while (*src) + *(_dest++) = *(src++); + + *_dest = 0; + return dest; +} + +char *strncat(char *restrict dest, const char *restrict src, size_t count) { + char *_dest = &dest[strlen(dest)]; + + for (; count && *src; count--) + *(_dest++) = *(src++); + + *_dest = 0; + return dest; +} + +char *strdup(const char *str) { + size_t length = strlen(str) + 1; + char *copy = malloc(length); + + if (!copy) + return 0; + + memcpy(copy, str, length); + return copy; +} + +char *strndup(const char *str, size_t count) { + size_t length = strnlen(str, count) + 1; + char *copy = malloc(length); + + if (!copy) + return 0; + + memcpy(copy, str, length); + return copy; +} + +/* String tokenizer */ + +static char *_strtok_ptr = 0, *_strtok_end_ptr = 0; + +char *strtok(char *restrict str, const char *restrict delim) { + if (str) { + _strtok_ptr = str; + _strtok_end_ptr = &str[strlen(str)]; } - if(base == 0) - if(*nptr == '0') - base = 8; - else - base = 10; + if (_strtok_ptr >= _strtok_end_ptr) + return 0; + if (!(*_strtok_ptr)) + return 0; + + char *split = strstr(_strtok_ptr, delim); + char *token = _strtok_ptr; + + if (split) { + *(split++) = 0; + _strtok_ptr = split; + } else { + _strtok_ptr += strlen(token); + } - if(!(base >= 2 && base <= 36)) + return token; +} + +/* Number parsers */ + +long long strtoll(const char *restrict str, char **restrict str_end, int base) { + if (!str) return 0; - if(base == 16 && *nptr == '0') - { - if(*(nptr+1) == 'x' || *(nptr+1) == 'X') - nptr+=2; + while (isspace(*str)) + str++; + + int negative = (*str == '-'); + if (negative) + str++; + + while (isspace(*str)) + str++; + + // Parse any base prefix if present. If a base was specified make sure it + // matches, otherwise use it to determine which base the value is in. + long long value = 0; + + if (*str == '0') { + int _base; + + switch (str[1]) { + case 0: + goto _exit_loop; + + case 'X': + case 'x': + _base = 16; + str += 2; + break; + + case 'O': + case 'o': + _base = 8; + str += 2; + break; + + case 'B': + case 'b': + _base = 2; + str += 2; + break; + + default: + // Numbers starting with a zero are *not* interpreted as octal + // unless base = 8. + _base = 0; + str++; + } + + if (!base) + base = _base; + else if (base != _base) + return 0; } - while(*nptr) - { - switch(*nptr) - { - case '0'...'9': - t = *nptr - '0'; - break; - case 'a' ... 'z': - t = (*nptr - 'a') + 10; - break; + if (!base) + base = 10; + else if ((base < 2) || (base > 36)) + return 0; + + // Parse the actual value. + for (; *str; str++) { + char ch = *str; + int digit; + + switch (ch) { + case '0' ... '9': + digit = ch - '0'; + break; + case 'A' ... 'Z': - t = (*nptr - 'A') + 10; - break; + digit = (ch - 'A') + 10; + break; + + case 'a' ... 'z': + digit = (ch - 'a') + 10; + break; + default: - t = 1000; - break; + goto _exit_loop; } - if(t>=base) - break; - - r*=base; - r+=t; - nptr++; + value = (value * base) + digit; } - if(endptr)*endptr = (char*)nptr; - return n?-r:r; +_exit_loop: + if (str_end) + *str_end = (char *) str; + + return negative ? (-value) : value; } -long strtol(const char *nptr, char **endptr, int base) -{ - return (long)strtoll(nptr, endptr, base); +long strtol(const char *restrict str, char **restrict str_end, int base) { + return (long) strtoll(str, str_end, base); } #ifdef ALLOW_FLOAT -double strtod(const char *nptr, char **endptr) -{ +double strtod(const char *restrict str, char **restrict str_end) { char strbuf[64]; int x = 0; int y; double i=0, d=0; int s=1; - if(*nptr == '-') + if(*str == '-') { - nptr++; + str++; s=-1; } - while(*nptr >= '0' && *nptr <= '9' && x < 18) - strbuf[x++] = *(nptr++); + while(*str >= '0' && *str <= '9' && x < 18) + strbuf[x++] = *(str++); strbuf[x] = 0; i = (double)strtoll(strbuf, NULL, 10); - if(*nptr == '.') + if(*str == '.') { - nptr++; + str++; x = 0; - while(*nptr >= '0' && *nptr <= '9' && x < 7) - strbuf[x++] = *(nptr++); + while(*str >= '0' && *str <= '9' && x < 7) + strbuf[x++] = *(str++); strbuf[x] = 0; - if(endptr != NULL) *endptr = (char*)nptr; + if(str_end != NULL) *str_end = (char*)str; y=1; @@ -301,67 +463,19 @@ double strtod(const char *nptr, char **endptr) } else { - if(endptr != NULL) - *endptr = (char*)nptr; + if(str_end != NULL) + *str_end = (char*)str; } return (i + d)*s; } -#endif - -/* implementation by Lameguy64, behaves like OpenWatcom's strtok() */ -/* BIOS strtok seemed either bugged, or designed for wide chars */ - -static char *_strtok_curpos; -static char *_strtok_endpos; - -char *strtok( char *s1, char *s2 ) -{ - char *c,*t; - - if( s1 ) - { - _strtok_curpos = s1; - _strtok_endpos = s1+strlen( s1 ); - } - else - { - if( _strtok_curpos >= _strtok_endpos ) - return( NULL ); - } - - if( !*_strtok_curpos ) - return( NULL ); - - if( c = strstr( _strtok_curpos, s2 ) ) - { - *c = 0; - t = _strtok_curpos; - _strtok_curpos = c+1; - return( t ); - } - else - { - t = _strtok_curpos; - _strtok_curpos += strlen( t ); - return( t ); - } - - return( NULL ); - -} /* strtok */ - -#ifdef ALLOW_FLOAT - -long double strtold(const char *nptr, char **endptr) -{ - return (long double)strtod(nptr, endptr); +long double strtold(const char *restrict str, char **restrict str_end) { + return (long double) strtod(str, str_end); } -float strtof(const char *nptr, char **endptr) -{ - return (float)strtod(nptr, endptr); +float strtof(const char *restrict str, char **restrict str_end) { + return (float) strtod(str, str_end); } #endif diff --git a/libpsn00b/lzp/bit.h b/libpsn00b/lzp/bit.h index 321160a..5e7ed23 100644 --- a/libpsn00b/lzp/bit.h +++ b/libpsn00b/lzp/bit.h @@ -1,5 +1,5 @@ -#ifndef _LZP_BIT_H -#define _LZP_BIT_H + +#pragma once extern const unsigned char* inPtr; extern int inBytes; @@ -21,6 +21,3 @@ int get_bits(int n); #ifdef __cplusplus } #endif - - -#endif // _LZP_BIT_H diff --git a/libpsn00b/lzp/compress.c b/libpsn00b/lzp/compress.c index 9cfc64d..16cb606 100644 --- a/libpsn00b/lzp/compress.c +++ b/libpsn00b/lzp/compress.c @@ -1,7 +1,7 @@ // Based on ilia muraviev's CRUSH compressor program which falls under public domain #include <string.h> -#if LZP_USE_MALLOC == TRUE +#ifdef LZP_USE_MALLOC #include <stdlib.h> #endif @@ -11,7 +11,7 @@ // Internal structure for hash table allocation sizes -#if LZP_NO_COMPRESS == FALSE +#ifndef LZP_NO_COMPRESS struct { short WindowSize; // Window size (17 - 23) @@ -67,7 +67,7 @@ struct { // LZ77 // -#if LZP_NO_COMPRESS == FALSE +#ifndef LZP_NO_COMPRESS int update_hash1(int h, int c) { @@ -108,13 +108,13 @@ int get_penalty(int a, int b) { int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) { - #if LZP_USE_MALLOC == FALSE +#ifndef LZP_USE_MALLOC int head[HASH1_SIZE+HASH2_SIZE]; int prev[W_SIZE]; - #else +#else int* head = malloc(4*(HASH1_SIZE+HASH2_SIZE)); int* prev = malloc(4*W_SIZE); - #endif +#endif int max_chain[] = {4, 256, 1<<12}; @@ -319,10 +319,10 @@ int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) { flush_bits(); - #if LZP_USE_MALLOC == TRUE +#ifdef LZP_USE_MALLOC free(head); free(prev); - #endif +#endif return(outBytes); diff --git a/libpsn00b/include/lzconfig.h b/libpsn00b/lzp/lzconfig.h index cb8a830..83579a3 100644 --- a/libpsn00b/include/lzconfig.h +++ b/libpsn00b/lzp/lzconfig.h @@ -3,29 +3,9 @@ * \details Define settings will only take effect when you recompile the library. */ -#ifndef _LZP_CONFIG_H -#define _LZP_CONFIG_H +#pragma once - -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - - -/* Set to TRUE to compile without data compression routines useful if you - * plan to use this library on a program that does not require said routines - * especially on a platform with limited memory (such as the PlayStation). - * - * This define will rule out lzCompress(), lzSetHashSizes() and - * lzResetHashSizes() functions and their associated functions. - */ -#define LZP_NO_COMPRESS TRUE - - -/* Set to TRUE to make default compression table sizes to maximum and works best +/* Uncomment to make default compression table sizes to maximum and works best * when compressing large amounts of data. LZP_USE_MALLOC must be set to TRUE to * prevent stack overflow errors. * @@ -34,21 +14,16 @@ * * This define only affects lzCompress(). */ -#define LZP_MAX_COMPRESS FALSE - +//#define LZP_MAX_COMPRESS /* Uncomment to make the library use malloc() instead of array initializers to * allocate hash tables. Enabling this is a must if you plan to use large hash * and window table sizes. */ -#define LZP_USE_MALLOC FALSE +//#define LZP_USE_MALLOC -/* Hash table sizes (in power-of-two multiple units) - * - * These define only affect lzCompress(). - */ -#if LZP_MAX_COMPRESS == TRUE +#if defined(PSN00BSDK) && !defined(LZP_MAX_COMPRESS) // Minimal defaults #define LZP_WINDOW_SIZE 17 @@ -57,12 +32,11 @@ #else +#define LZP_USE_MALLOC + // Maximum defaults #define LZP_WINDOW_SIZE 17 #define LZP_HASH1_SIZE 22 #define LZP_HASH2_SIZE 24 #endif - - -#endif // _LZP_CONFIG_H diff --git a/libpsn00b/lzp/lzp.h b/libpsn00b/lzp/lzp.h index 456de02..1aeea30 100644 --- a/libpsn00b/lzp/lzp.h +++ b/libpsn00b/lzp/lzp.h @@ -1,20 +1,29 @@ -/*! \file lzp.h - * \brief Main library header +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed */ -/*! \mainpage - * \version 0.20b - * \author John Wilbert 'Lameguy64' Villamor +/** + * @file lzp.h + * @brief Utility library for file bundling and compression * - * \section creditsSection Credits - * - LZ77 data compression/decompression routines based from Ilya Muravyov's - * crush.cpp released under public domain. Refined and ported to C by Lameguy64. - * - CRC calculation routines based from Lammert Bies' lib_crc routines. + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). * + * This header provides the LZ77 compression API and functions to parse and + * decompress .LZP archives after they have been loaded into memory. + * + * @section creditsSection Credits + * - LZ77 data compression/decompression routines based from Ilya Muravyov's + * crush.cpp released under public domain. Refined and ported to C by + * Lameguy64. + * - CRC calculation routines based from Lammert Bies' lib_crc routines. */ -#ifndef _LZPACK_H -#define _LZPACK_H +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum); #ifdef __cplusplus } #endif - - -#endif // _LZPACK_H diff --git a/libpsn00b/lzp/lzqlp.h b/libpsn00b/lzp/lzqlp.h index 32ce0d7..127f263 100644 --- a/libpsn00b/lzp/lzqlp.h +++ b/libpsn00b/lzp/lzqlp.h @@ -1,5 +1,23 @@ -#ifndef _QLP_H -#define _QLP_H +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed + */ + +/** + * @file lzqlp.h + * @brief Utility library for file bundling + * + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). + * + * This header provides functions to parse .QLP archives and retrieve pointers + * to their contents after they have been loaded into memory. + */ + +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -23,9 +41,17 @@ typedef struct { uint32_t offs; } QLP_FILE; + +// Function prototypes +#ifdef __cplusplus +extern "C" { +#endif + int qlpFileCount(const QLP_HEAD* qlpfile); const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile); const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile); int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile); -#endif // _QLP_H
\ No newline at end of file +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/psxapi/_syscalls.s b/libpsn00b/psxapi/_syscalls.s index 6eaed72..5062b15 100644 --- a/libpsn00b/psxapi/_syscalls.s +++ b/libpsn00b/psxapi/_syscalls.s @@ -1,26 +1,28 @@ # PSn00bSDK syscall wrappers -# (C) 2022 spicyjpeg - MPL licensed +# (C) 2022-2023 spicyjpeg - MPL licensed .set noreorder +## Interrupt enable/disable + .section .text.EnterCriticalSection .global EnterCriticalSection .type EnterCriticalSection, @function EnterCriticalSection: - li $a0, 0x01 + li $a0, 0x01 syscall 0 - jr $ra + jr $ra nop .section .text.ExitCriticalSection .global ExitCriticalSection .type ExitCriticalSection, @function ExitCriticalSection: - li $a0, 0x02 + li $a0, 0x02 syscall 0 - jr $ra + jr $ra nop .section .text.SwEnterCriticalSection @@ -31,7 +33,7 @@ SwEnterCriticalSection: li $a1, -1026 and $a1, $a0 mtc0 $a1, $12 - andi $a0, 0x0401 # return ((cop0r12_prev & 0x401) == 0x401) + andi $a0, 0x0401 # return !((cop0r12_prev & 0x401) < 0x401) sltiu $v0, $a0, 0x0401 jr $ra @@ -49,3 +51,100 @@ SwExitCriticalSection: jr $ra nop + +## PCDRV (host file access) API + +.section .text.PCinit +.global PCinit +.type PCinit, @function +PCinit: + break 0, 0x101 # () -> error + + jr $ra + nop + +.section .text.PCcreat +.global PCcreat +.type PCcreat, @function +PCcreat: + li $a2, 0 + move $a1, $a0 + break 0, 0x102 # (path, path, 0) -> error, fd + + bgez $v0, .Lcreate_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lcreate_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCopen +.global PCopen +.type PCopen, @function +PCopen: + move $a2, $a1 + move $a1, $a0 + break 0, 0x103 # (path, path, mode) -> error, fd + + bgez $v0, .Lopen_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lopen_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCclose +.global PCclose +.type PCclose, @function +PCclose: + move $a1, $a0 + break 0, 0x104 # (fd, fd) -> error + + jr $ra + nop + +.section .text.PCread +.global PCread +.type PCread, @function +PCread: + move $a3, $a1 + move $a1, $a0 + break 0, 0x105 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lread_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lread_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PCwrite +.global PCwrite +.type PCwrite, @function +PCwrite: + move $a3, $a1 + move $a1, $a0 + break 0, 0x106 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lwrite_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lwrite_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PClseek +.global PClseek +.type PClseek, @function +PClseek: + move $a3, $a2 + move $a2, $a1 + move $a1, $a0 + break 0, 0x107 # (fd, fd, offset, mode) -> error, offset + + bgez $v0, .Lseek_ok # if (error < 0) offset = error + nop + move $v1, $v0 +.Lseek_ok: + jr $ra # return offset + move $v0, $v1 diff --git a/libpsn00b/psxapi/drivers.s b/libpsn00b/psxapi/drivers.s index d991f90..c601201 100644 --- a/libpsn00b/psxapi/drivers.s +++ b/libpsn00b/psxapi/drivers.s @@ -32,10 +32,10 @@ _96_remove: jr $t2 li $t1, 0x72 -.section .text.AddDummyTty -.global AddDummyTty -.type AddDummyTty, @function -AddDummyTty: +.section .text.add_nullcon_driver +.global add_nullcon_driver +.type add_nullcon_driver, @function +add_nullcon_driver: li $t2, 0xa0 jr $t2 li $t1, 0x99 @@ -66,26 +66,26 @@ _card_clear: ## B0 table functions (12) -.section .text.AddDev -.global AddDev -.type AddDev, @function -AddDev: +.section .text.AddDrv +.global AddDrv +.type AddDrv, @function +AddDrv: li $t2, 0xb0 jr $t2 li $t1, 0x47 -.section .text.DelDev -.global DelDev -.type DelDev, @function -DelDev: +.section .text.DelDrv +.global DelDrv +.type DelDrv, @function +DelDrv: li $t2, 0xb0 jr $t2 li $t1, 0x48 -.section .text.ListDev -.global ListDev -.type ListDev, @function -ListDev: +.section .text.ListDrv +.global ListDrv +.type ListDrv, @function +ListDrv: li $t2, 0xb0 jr $t2 li $t1, 0x49 diff --git a/libpsn00b/psxapi/fs.s b/libpsn00b/psxapi/fs.s index f225d64..8b6d57a 100644 --- a/libpsn00b/psxapi/fs.s +++ b/libpsn00b/psxapi/fs.s @@ -6,12 +6,12 @@ .set noreorder -## B0 table functions (5) +## B0 table functions (6) -.section .text.chdir -.global chdir -.type chdir, @function -chdir: +.section .text.cd +.global cd +.type cd, @function +cd: li $t2, 0xb0 jr $t2 li $t1, 0x40 @@ -48,3 +48,11 @@ erase: jr $t2 li $t1, 0x45 +.section .text.undelete +.global undelete +.type undelete, @function +undelete: + li $t2, 0xb0 + jr $t2 + li $t1, 0x46 + diff --git a/libpsn00b/psxapi/stdio.s b/libpsn00b/psxapi/stdio.s index e65f871..14c6d03 100644 --- a/libpsn00b/psxapi/stdio.s +++ b/libpsn00b/psxapi/stdio.s @@ -6,7 +6,7 @@ .set noreorder -## A0 table functions (13) +## A0 table functions (14) .section .text.open .global open @@ -16,10 +16,10 @@ open: jr $t2 li $t1, 0x00 -.section .text.seek -.global seek -.type seek, @function -seek: +.section .text.lseek +.global lseek +.type lseek, @function +lseek: li $t2, 0xa0 jr $t2 li $t1, 0x01 @@ -56,6 +56,14 @@ ioctl: jr $t2 li $t1, 0x05 +.section .text.isatty +.global isatty +.type isatty, @function +isatty: + li $t2, 0xa0 + jr $t2 + li $t1, 0x07 + .section .text.getc .global getc .type getc, @function @@ -112,3 +120,21 @@ printf: jr $t2 li $t1, 0x3f +## B0 table functions (2) + +.section .text._get_errno +.global _get_errno +.type _get_errno, @function +_get_errno: + li $t2, 0xb0 + jr $t2 + li $t1, 0x54 + +.section .text._get_error +.global _get_error +.type _get_error, @function +_get_error: + li $t2, 0xb0 + jr $t2 + li $t1, 0x55 + diff --git a/libpsn00b/psxapi/stubs.json b/libpsn00b/psxapi/stubs.json index 50ffb55..afa83c6 100644 --- a/libpsn00b/psxapi/stubs.json +++ b/libpsn00b/psxapi/stubs.json @@ -8,7 +8,7 @@ { "type": "a", "id": 1, - "name": "seek", + "name": "lseek", "file": "stdio.s" }, { @@ -37,6 +37,12 @@ }, { "type": "a", + "id": 7, + "name": "isatty", + "file": "stdio.s" + }, + { + "type": "a", "id": 8, "name": "getc", "file": "stdio.s" @@ -109,6 +115,12 @@ }, { "type": "a", + "id": 81, + "name": "LoadExec", + "file": "sys.s" + }, + { + "type": "a", "id": 85, "name": "_bu_init", "file": "drivers.s" @@ -128,7 +140,7 @@ { "type": "a", "id": 153, - "name": "AddDummyTty", + "name": "add_nullcon_driver", "file": "drivers.s" }, { @@ -139,6 +151,18 @@ }, { "type": "a", + "id": 157, + "name": "GetConf", + "file": "sys.s" + }, + { + "type": "a", + "id": 159, + "name": "SetMem", + "file": "sys.s" + }, + { + "type": "a", "id": 160, "name": "_boot", "file": "sys.s" @@ -170,13 +194,13 @@ { "type": "b", "id": 0, - "name": "_kernel_malloc", + "name": "alloc_kernel_memory", "file": "sys.s" }, { "type": "b", "id": 1, - "name": "_kernel_free", + "name": "free_kernel_memory", "file": "sys.s" }, { @@ -296,13 +320,13 @@ { "type": "b", "id": 24, - "name": "SetDefaultExitFromException", + "name": "ResetEntryInt", "file": "sys.s" }, { "type": "b", "id": 25, - "name": "SetCustomExitFromException", + "name": "HookEntryInt", "file": "sys.s" }, { @@ -314,7 +338,7 @@ { "type": "b", "id": 64, - "name": "chdir", + "name": "cd", "file": "fs.s" }, { @@ -343,20 +367,26 @@ }, { "type": "b", + "id": 70, + "name": "undelete", + "file": "fs.s" + }, + { + "type": "b", "id": 71, - "name": "AddDev", + "name": "AddDrv", "file": "drivers.s" }, { "type": "b", "id": 72, - "name": "DelDev", + "name": "DelDrv", "file": "drivers.s" }, { "type": "b", "id": 73, - "name": "ListDev", + "name": "ListDrv", "file": "drivers.s" }, { @@ -397,6 +427,18 @@ }, { "type": "b", + "id": 84, + "name": "_get_errno", + "file": "stdio.s" + }, + { + "type": "b", + "id": 85, + "name": "_get_error", + "file": "stdio.s" + }, + { + "type": "b", "id": 86, "name": "GetC0Table", "file": "sys.s" diff --git a/libpsn00b/psxapi/sys.s b/libpsn00b/psxapi/sys.s index e2505e1..40dcdff 100644 --- a/libpsn00b/psxapi/sys.s +++ b/libpsn00b/psxapi/sys.s @@ -6,7 +6,7 @@ .set noreorder -## A0 table functions (8) +## A0 table functions (11) .section .text.b_setjmp .global b_setjmp @@ -48,6 +48,14 @@ FlushCache: jr $t2 li $t1, 0x44 +.section .text.LoadExec +.global LoadExec +.type LoadExec, @function +LoadExec: + li $t2, 0xa0 + jr $t2 + li $t1, 0x51 + .section .text.SetConf .global SetConf .type SetConf, @function @@ -56,6 +64,22 @@ SetConf: jr $t2 li $t1, 0x9c +.section .text.GetConf +.global GetConf +.type GetConf, @function +GetConf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9d + +.section .text.SetMem +.global SetMem +.type SetMem, @function +SetMem: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9f + .section .text._boot .global _boot .type _boot, @function @@ -74,18 +98,18 @@ GetSystemInfo: ## B0 table functions (27) -.section .text._kernel_malloc -.global _kernel_malloc -.type _kernel_malloc, @function -_kernel_malloc: +.section .text.alloc_kernel_memory +.global alloc_kernel_memory +.type alloc_kernel_memory, @function +alloc_kernel_memory: li $t2, 0xb0 jr $t2 li $t1, 0x00 -.section .text._kernel_free -.global _kernel_free -.type _kernel_free, @function -_kernel_free: +.section .text.free_kernel_memory +.global free_kernel_memory +.type free_kernel_memory, @function +free_kernel_memory: li $t2, 0xb0 jr $t2 li $t1, 0x01 @@ -242,18 +266,18 @@ ReturnFromException: jr $t2 li $t1, 0x17 -.section .text.SetDefaultExitFromException -.global SetDefaultExitFromException -.type SetDefaultExitFromException, @function -SetDefaultExitFromException: +.section .text.ResetEntryInt +.global ResetEntryInt +.type ResetEntryInt, @function +ResetEntryInt: li $t2, 0xb0 jr $t2 li $t1, 0x18 -.section .text.SetCustomExitFromException -.global SetCustomExitFromException -.type SetCustomExitFromException, @function -SetCustomExitFromException: +.section .text.HookEntryInt +.global HookEntryInt +.type HookEntryInt, @function +HookEntryInt: li $t2, 0xb0 jr $t2 li $t1, 0x19 diff --git a/libpsn00b/psxcd/cdread.c b/libpsn00b/psxcd/cdread.c index d211a01..1adc255 100644 --- a/libpsn00b/psxcd/cdread.c +++ b/libpsn00b/psxcd/cdread.c @@ -89,6 +89,8 @@ static int _poll_retry(void) { /* Public API */ int CdReadRetry(int sectors, uint32_t *buf, int mode, int attempts) { + _sdk_validate_args((sectors > 0) && buf && (attempts > 0), -1); + if (CdReadSync(1, 0) > 0) { _sdk_log("CdRead() failed, another read in progress (%d sectors pending)\n", _pending_sectors); return 0; diff --git a/libpsn00b/psxcd/common.c b/libpsn00b/psxcd/common.c index 8b8030b..461ab91 100644 --- a/libpsn00b/psxcd/common.c +++ b/libpsn00b/psxcd/common.c @@ -208,6 +208,9 @@ int CdInit(void) { BUS_CD_CFG = 0x00020943; + SetDMAPriority(DMA_CD, 3); + DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA + CD_REG(0) = 1; CD_REG(3) = 0x1f; // Acknowledge all IRQs CD_REG(2) = 0x1f; // Enable all IRQs @@ -217,9 +220,6 @@ int CdInit(void) { CdlATV mix = { 0x80, 0x00, 0x80, 0x00 }; CdMix(&mix); - DMA_DPCR |= 0x0000b000; // Enable DMA3 - DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA3 - _last_mode = 0; _ack_pending = 0; _sync_pending = 0; @@ -244,6 +244,8 @@ int CdInit(void) { /* Low-level command API */ int CdCommandF(CdlCommand cmd, const void *param, int length) { + _sdk_validate_args(param || (length <= 0), -1); + const uint8_t *_param = (const uint8_t *) param; _last_command = (uint8_t) cmd; @@ -283,7 +285,7 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) { __asm__ volatile(""); CD_REG(0) = 0; - for (; length; length--) + for (; length > 0; length--) CD_REG(2) = *(_param++); CD_REG(0) = 0; @@ -292,6 +294,8 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) { } int CdCommand(CdlCommand cmd, const void *param, int length, uint8_t *result) { + _sdk_validate_args(param || (length <= 0), -1); + /*if (_ack_pending) { _sdk_log("CdCommand(0x%02x) failed, drive busy\n", cmd); return 0; @@ -329,8 +333,10 @@ int CdControlF(CdlCommand cmd, const void *param) { } else { // The command takes a mandatory parameter or no parameter. length = flags & 3; - if (length && !param) + if (length && !param) { + _sdk_log("CdControl() param is required for command 0x%02x\n", cmd); return -1; + } } return CdCommandF(cmd, param, length); diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c index 0ac782b..31ed00c 100644 --- a/libpsn00b/psxcd/isofs.c +++ b/libpsn00b/psxcd/isofs.c @@ -92,7 +92,7 @@ static int _CdReadIsoDescriptor(int session_offs) // Verify if volume descriptor is present descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff; - if( strncmp("CD001", descriptor->header.id, 5) ) + if( memcmp("CD001", descriptor->header.id, 5) ) { _sdk_log("Disc does not contain a ISO9660 file system.\n"); @@ -211,7 +211,7 @@ static int _CdReadIsoDirectory(int lba) return 0; } -#ifndef NDEBUG +#if 0 static void dump_directory(void) { @@ -228,8 +228,12 @@ static void dump_directory(void) { dir_entry = (ISO_DIR_ENTRY*)(_cd_iso_directory_buff+dir_pos); - strncpy(namebuff, - _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), dir_entry->identifierLen); + memcpy( + namebuff, + _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), + dir_entry->identifierLen + ); + namebuff[dir_entry->identifierLen] = 0; _sdk_log("P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff); @@ -271,9 +275,12 @@ static void dump_pathtable(void) while( (int)(tbl_pos-_cd_iso_pathtable_buff) < descriptor->pathTableSize.lsb ) { - strncpy(namebuff, + memcpy( + namebuff, tbl_pos+sizeof(ISO_PATHTABLE_ENTRY), - tbl_entry->nameLength); + tbl_entry->nameLength + ); + namebuff[tbl_entry->nameLength] = 0; _sdk_log("%s\n", namebuff); @@ -308,9 +315,12 @@ static int get_pathtable_entry(int entry, ISO_PATHTABLE_ENTRY *tbl, char *namebu { if( namebuff ) { - strncpy(namebuff, + memcpy( + namebuff, tbl_pos+sizeof(ISO_PATHTABLE_ENTRY), - tbl_entry->nameLength); + tbl_entry->nameLength + ); + namebuff[tbl_entry->nameLength] = 0; } if( tbl ) @@ -381,9 +391,12 @@ static int find_dir_entry(const char *name, ISO_DIR_ENTRY *dirent) if( !(dir_entry->flags & 0x2) ) { - strncpy(namebuff, + memcpy( + namebuff, _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), - dir_entry->identifierLen); + dir_entry->identifierLen + ); + namebuff[dir_entry->identifierLen] = 0; if( strcmp(namebuff, name) == 0 ) { @@ -422,7 +435,8 @@ static char* get_pathname(char *path, const char *filename) return NULL; } - strncpy(path, filename, (int)(c-filename)); + memcpy(path, filename, c - filename); + path[c - filename] = 0; return path; } @@ -450,6 +464,8 @@ static char* get_filename(char *name, const char *filename) CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) { + _sdk_validate_args(fp && filename, NULL); + int i,j,found_dir,num_dirs; int dir_len; char tpath_rbuff[128]; @@ -553,6 +569,8 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) CdlDIR *CdOpenDir(const char* path) { + _sdk_validate_args(path, NULL); + CdlDIR_INT* dir; int num_dirs; int i,found_dir; @@ -605,7 +623,11 @@ CdlDIR *CdOpenDir(const char* path) _sdk_log( "Directory LBA = %d\n", tbl_entry.dirOffs ); _CdReadIsoDirectory( tbl_entry.dirOffs ); - + +#ifndef NDEBUG + //dump_directory(); +#endif + dir = (CdlDIR_INT*)malloc( sizeof(CdlDIR_INT) ); dir->_len = _cd_iso_directory_len; @@ -631,6 +653,8 @@ CdlDIR *CdOpenDir(const char* path) int CdReadDir(CdlDIR *dir, CdlFILE* file) { + _sdk_validate_args(dir && file, 0); + CdlDIR_INT* d_dir; ISO_DIR_ENTRY* dir_entry; @@ -658,9 +682,12 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file) } else { - strncpy( file->name, + memcpy( + file->name, d_dir->_dir+d_dir->_pos+sizeof(ISO_DIR_ENTRY), - dir_entry->identifierLen ); + dir_entry->identifierLen + ); + file->name[dir_entry->identifierLen] = 0; } CdIntToPos( dir_entry->entryOffs.lsb, &file->pos ); @@ -683,6 +710,9 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file) void CdCloseDir(CdlDIR *dir) { + if (!dir) + return; + CdlDIR_INT* d_dir; d_dir = (CdlDIR_INT*)dir; @@ -698,6 +728,8 @@ int CdIsoError() int CdGetVolumeLabel(char *label) { + _sdk_validate_args(label, -1); + int i, length = 31; ISO_DESCRIPTOR* descriptor; @@ -735,7 +767,7 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result) if( _ses_scanbuff[0] == 0x1 ) { - if( strncmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 ) + if( memcmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 ) { CdControlF(CdlPause, 0); _ses_scancomplete = 1; @@ -761,6 +793,8 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result) int CdLoadSession(int session) { + _sdk_validate_args(session >= 0, -1); + CdlLOC *loc; CdlCB ready_oldcb; char scanbuff[2048]; diff --git a/libpsn00b/psxcd/misc.c b/libpsn00b/psxcd/misc.c index 8fd2a4d..2f04821 100644 --- a/libpsn00b/psxcd/misc.c +++ b/libpsn00b/psxcd/misc.c @@ -12,15 +12,29 @@ #define DATA_SYNC_TIMEOUT 0x100000 -/* Private types */ - -typedef struct { - uint8_t status, first_track, last_track; -} TrackInfo; +/* Unlock command strings */ + +static const char *_unlock_strings[] = { + "", + "Licensed by", + "Sony", + "Computer", + "Entertainment", + "", + "" +}; + +static const char *const _unlock_regions[] = { + "of America", // CdlRegionSCEA + "(Europe)", // CdlRegionSCEE + "World wide" // CdlRegionSCEW +}; /* Sector DMA transfer functions */ int CdGetSector(void *madr, int size) { + _sdk_validate_args(madr && (size > 0), 0); + //while (!(CD_REG(0) & (1 << 6))) //__asm__ volatile(""); @@ -35,6 +49,8 @@ int CdGetSector(void *madr, int size) { } int CdGetSector2(void *madr, int size) { + _sdk_validate_args(madr && (size > 0), 0); + //while (!(CD_REG(0) & (1 << 6))) //__asm__ volatile(""); @@ -54,7 +70,7 @@ int CdDataSync(int mode) { return 0; } - _sdk_log("CdDataSync() timeout\n"); + _sdk_log("CdDataSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_CD)); return -1; } @@ -77,52 +93,40 @@ int CdPosToInt(const CdlLOC *p) { ) - 150; } -/* Misc. functions */ - -int CdGetToc(CdlLOC *toc) { - TrackInfo track_info; - - if (!CdCommand(CdlGetTN, 0, 0, (uint8_t *) &track_info)) - return 0; - if (CdSync(1, 0) != CdlComplete) - return 0; - - int first = btoi(track_info.first_track); - int tracks = btoi(track_info.last_track) + 1 - first; - //assert(first == 1); +/* Drive unlocking API */ - for (int i = 0; i < tracks; i++) { - uint8_t track = itob(first + i); +CdlRegionCode CdGetRegion(void) { + uint8_t param; + uint8_t result[16]; - if (!CdCommand(CdlGetTD, &track, 1, (uint8_t *) &toc[i])) - return 0; - if (CdSync(1, 0) != CdlComplete) - return 0; + // Firmware version C0 does not support test command 0x22 to retrieve the + // region, but it was only used in the SCPH-1000 Japanese model. Version D1 + // (and possibly others?) is used in debug consoles. + // https://psx-spx.consoledev.net/cdromdrive/#19h20h-int3yymmddver + // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe + param = 0x20; + memset(result, 0, 4); - toc[i].sector = 0; - toc[i].track = track; + if (!CdCommand(CdlTest, ¶m, 1, result)) { + _sdk_log("failed to probe drive firmware version\n"); + return CdlRegionUnknown; } - return tracks; -} - -CdlRegionCode CdGetRegion(void) { - uint8_t param = 0x22; - uint8_t result[16]; + _sdk_log("drive firmware version: 0x%02x\n", result[3]); + if (result[3] == 0xc0) + return CdlRegionSCEI; + if (result[3] >= 0xd0) + return CdlRegionDebug; - // Test command 0x22 is unsupported in firmware version C0, which was used - // exclusively in the SCPH-1000 Japanese model. It's thus safe to assume - // that the console is Japanese if the command returns a valid error. - // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe + param = 0x22; memset(result, 0, 16); if (!CdCommand(CdlTest, ¶m, 1, result)) { _sdk_log("failed to probe drive region\n"); - return (result[1] == 0x10) ? CdlRegionSCEI : CdlRegionUnknown; + return CdlRegionUnknown; } _sdk_log("drive region: %s\n", result); - if (!strcmp(result, "for Japan")) return CdlRegionSCEI; if (!strcmp(result, "for U/C")) @@ -137,7 +141,72 @@ CdlRegionCode CdGetRegion(void) { return CdlRegionUnknown; } +int CdUnlock(CdlRegionCode region) { + if (region <= CdlRegionSCEI) + return 0; + if (region >= CdlRegionDebug) + return 1; + + // This is by far the most efficient way to do it. + _unlock_strings[5] = _unlock_regions[region - CdlRegionSCEA]; + + for (int i = 0; i < 7; i++) { + uint8_t result[4]; + + if (!CdCommand( + 0x50 + i, + _unlock_strings[i], + strlen(_unlock_strings[i]), + result + )) + return 0; + + if (!(result[0] & CdlStatError) || (result[1] != 0x40)) { + _sdk_log("unlock failed, status=0x%02x, code=0x%02x\n", result[0], result[1]); + return 0; + } + } + + _sdk_log("unlock successful\n"); + return CdCommand(CdlNop, 0, 0, 0); +} + +/* Misc. functions */ + +int CdGetToc(CdlLOC *toc) { + _sdk_validate_args(toc, 0); + + uint8_t result[4]; + + if (!CdCommand(CdlGetTN, 0, 0, result)) + return 0; + if (CdSync(1, 0) != CdlComplete) + return 0; + + int first = btoi(result[1]); + int tracks = btoi(result[2]) + 1 - first; + //assert(first == 1); + + for (int i = 0; i < tracks; i++) { + uint8_t track = itob(first + i); + + if (!CdCommand(CdlGetTD, &track, 1, result)) + return 0; + if (CdSync(1, 0) != CdlComplete) + return 0; + + toc[i].minute = result[1]; + toc[i].second = result[2]; + toc[i].sector = 0; + toc[i].track = track; + } + + return tracks; +} + int CdMix(const CdlATV *vol) { + _sdk_validate_args(vol, 0); + CD_REG(0) = 2; CD_REG(2) = vol->val0; CD_REG(3) = vol->val1; diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c index ff712eb..06302e2 100644 --- a/libpsn00b/psxetc/dl.c +++ b/libpsn00b/psxetc/dl.c @@ -112,6 +112,8 @@ static uint32_t _elf_hash(const char *str) { /* Symbol map loading/introspection API */ int DL_InitSymbolMap(int num_entries) { + _sdk_validate_args(num_entries, -1); + if (_symbol_map.entries) DL_UnloadSymbolMap(); @@ -151,6 +153,8 @@ void DL_UnloadSymbolMap(void) { } void DL_AddMapSymbol(const char *name, void *ptr) { + _sdk_validate_args_void(name); + uint32_t hash = _elf_hash(name); int index = _symbol_map.index; _symbol_map.index = index + 1; @@ -168,6 +172,8 @@ void DL_AddMapSymbol(const char *name, void *ptr) { } int DL_ParseSymbolMap(const char *ptr, size_t size) { + _sdk_validate_args(ptr && size, 0); + int entries = 0; // Perform a quick scan over the entire map text and count the number of @@ -232,6 +238,8 @@ int DL_ParseSymbolMap(const char *ptr, size_t size) { } void *DL_GetMapSymbol(const char *name) { + _sdk_validate_args(name, 0); + if (!_symbol_map.entries) { _sdk_log("DL_GetMapSymbol() with no map loaded\n"); return 0; @@ -275,8 +283,7 @@ void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) { /* Library loading and linking API */ DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) { - if (!dll || !ptr) - return 0; + _sdk_validate_args(dll && ptr && size, 0); dll->ptr = ptr; dll->malloc_ptr = (mode & DL_FREE_ON_DESTROY) ? ptr : 0; @@ -463,6 +470,8 @@ void DL_DestroyDLL(DLL *dll) { } void *DL_GetDLLSymbol(const DLL *dll, const char *name) { + _sdk_validate_args(name, 0); + if (!dll) return DL_GetMapSymbol(name); //return _dl_resolve_callback(0, name); diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c index f2a273c..8bd11fc 100644 --- a/libpsn00b/psxetc/interrupts.c +++ b/libpsn00b/psxetc/interrupts.c @@ -4,6 +4,7 @@ */ #include <stdint.h> +#include <assert.h> #include <psxapi.h> #include <psxetc.h> #include <hwregs_c.h> @@ -99,8 +100,7 @@ static void _global_dma_handler(void) { /* IRQ and DMA handler API */ void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) { - if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) - return 0; + _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0); void *old_callback = _irq_handlers[irq]; _irq_handlers[irq] = func; @@ -116,15 +116,13 @@ void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) { } void *GetInterruptCallback(IRQ_Channel irq) { - if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) - return 0; + _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0); return _irq_handlers[irq]; } void *DMACallback(DMA_Channel dma, void (*func)(void)) { - if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) - return 0; + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0); void *old_callback = _dma_handlers[dma]; _dma_handlers[dma] = func; @@ -151,12 +149,34 @@ void *DMACallback(DMA_Channel dma, void (*func)(void)) { } void *GetDMACallback(DMA_Channel dma) { - if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) - return 0; + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0); return _dma_handlers[dma]; } +/* DMA channel priority API */ + +int SetDMAPriority(DMA_Channel dma, int priority) { + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1); + + uint32_t dpcr = DMA_DPCR; + uint32_t channel = dpcr >> (dma * 4); + + dpcr &= ~(0xf << (dma * 4)); + if (priority >= 0) + dpcr |= ((priority & 7) | 8) << (dma * 4); + + DMA_DPCR = dpcr; + return (channel & 8) ? (channel & 7) : -1; +} + +int GetDMAPriority(DMA_Channel dma) { + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1); + + uint32_t channel = DMA_DPCR >> (dma * 4); + return (channel & 8) ? (channel & 7) : -1; +} + /* Hook installation/removal API */ int ResetCallback(void) { @@ -190,7 +210,7 @@ void RestartCallback(void) { // Install the ISR hook and prevent the kernel's internal handlers from // automatically acknowledging SPI and timer IRQs. - SetCustomExitFromException(&_isr_jmp_buf); + HookEntryInt(&_isr_jmp_buf); ChangeClearPAD(0); ChangeClearRCnt(0, 0); ChangeClearRCnt(1, 0); @@ -217,7 +237,7 @@ void StopCallback(void) { DMA_DPCR = _saved_dma_dpcr & 0x07777777; DMA_DICR = 0; - SetDefaultExitFromException(); + ResetEntryInt(); ChangeClearPAD(1); ChangeClearRCnt(0, 1); ChangeClearRCnt(1, 1); diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index e41bd31..7e0758b 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -1,6 +1,6 @@ /* * PSn00bSDK GPU library (common functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> @@ -10,9 +10,8 @@ #include <psxgpu.h> #include <hwregs_c.h> -#define QUEUE_LENGTH 16 -#define DMA_CHUNK_LENGTH 8 -#define VSYNC_TIMEOUT 0x100000 +#define QUEUE_LENGTH 16 +#define VSYNC_TIMEOUT 0x100000 static void _default_vsync_halt(void); @@ -21,7 +20,7 @@ static void _default_vsync_halt(void); typedef struct { void (*func)(uint32_t, uint32_t, uint32_t); uint32_t arg1, arg2, arg3; -} QueueEntry; +} DrawOp; /* Internal globals */ @@ -31,10 +30,10 @@ static void (*_vsync_halt_func)(void) = &_default_vsync_halt; static void (*_vsync_callback)(void) = (void *) 0; static void (*_drawsync_callback)(void) = (void *) 0; -static volatile QueueEntry _draw_queue[QUEUE_LENGTH]; -static volatile uint8_t _queue_head, _queue_tail, _queue_length; -static volatile uint32_t _vblank_counter; -static volatile uint16_t _last_hblank; +static volatile DrawOp _draw_queue[QUEUE_LENGTH]; +static volatile uint8_t _queue_head, _queue_tail, _queue_length, _drawop_type; +static volatile uint32_t _vblank_counter, _last_vblank; +static volatile uint16_t _last_hblank; /* Private interrupt handlers */ @@ -45,16 +44,16 @@ static void _vblank_handler(void) { _vsync_callback(); } -static void _gpu_dma_handler(void) { - //while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) - while (!(GPU_GP1 & (1 << 26))) - __asm__ volatile(""); +static void _process_drawop(void) { + int length = _queue_length; + if (!length) + return; - if (--_queue_length) { + if (--length) { int head = _queue_head; _queue_head = (head + 1) % QUEUE_LENGTH; - volatile QueueEntry *entry = &_draw_queue[head]; + volatile DrawOp *entry = &_draw_queue[head]; entry->func(entry->arg1, entry->arg2, entry->arg3); } else { GPU_GP1 = 0x04000000; // Disable DMA request @@ -62,16 +61,36 @@ static void _gpu_dma_handler(void) { if (_drawsync_callback) _drawsync_callback(); } + + _queue_length = length; +} + +static void _gpu_irq_handler(void) { + GPU_GP1 = 0x02000000; // Reset IRQ + + if (_drawop_type == DRAWOP_TYPE_GPU_IRQ) + _process_drawop(); +} + +static void _gpu_dma_handler(void) { + if (_drawop_type == DRAWOP_TYPE_DMA) + _process_drawop(); } /* GPU reset and system initialization */ void ResetGraph(int mode) { + _queue_head = 0; + _queue_tail = 0; + _queue_length = 0; + _drawop_type = 0; + // Perform some basic system initialization when ResetGraph() is called for // the first time. if (!ResetCallback()) { EnterCriticalSection(); InterruptCallback(IRQ_VBLANK, &_vblank_handler); + InterruptCallback(IRQ_GPU, &_gpu_irq_handler); DMACallback(DMA_GPU, &_gpu_dma_handler); _gpu_video_mode = (GPU_GP1 >> 20) & 1; @@ -80,28 +99,27 @@ void ResetGraph(int mode) { _sdk_log("setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC"); } - if (mode == 3) { + if (mode) { GPU_GP1 = 0x01000000; // Reset command buffer - return; - } - - DMA_DPCR |= 0x0b000b00; // Enable DMA2 and DMA6 - DMA_CHCR(2) = 0x00000201; // Stop DMA2 - DMA_CHCR(6) = 0x00000200; // Stop DMA6 + GPU_GP1 = 0x02000000; // Reset IRQ + GPU_GP1 = 0x04000000; // Disable DMA request - if (mode == 1) { - GPU_GP1 = 0x01000000; // Reset command buffer - return; + if (mode == 1) + return; + } else { + GPU_GP1 = 0x00000000; // Reset GPU } - GPU_GP1 = 0x00000000; // Reset GPU + SetDMAPriority(DMA_GPU, 3); + SetDMAPriority(DMA_OTC, 3); + DMA_CHCR(DMA_GPU) = 0x00000201; // Stop DMA + DMA_CHCR(DMA_OTC) = 0x00000200; // Stop DMA + TIMER_CTRL(0) = 0x0500; TIMER_CTRL(1) = 0x0500; - _queue_head = 0; - _queue_tail = 0; - _queue_length = 0; _vblank_counter = 0; + _last_vblank = 0; _last_hblank = 0; } @@ -127,10 +145,13 @@ int VSync(int mode) { if (mode < 0) return _vblank_counter; - uint32_t status = GPU_GP1; + // Wait for the specified number of vertical blank events since the last + // call to VSync() to occur (if mode >= 2) or just for a single vertical + // blank (if mode = 0). + uint32_t target = mode ? (_last_vblank + mode) : (_vblank_counter + 1); - // Wait for at least one vertical blank event to occur. - do { + while (_vblank_counter < target) { + uint32_t status = GPU_GP1; _vsync_halt_func(); // If interlaced mode is enabled, wait until the GPU starts displaying @@ -139,9 +160,11 @@ int VSync(int mode) { while (!((GPU_GP1 ^ status) & (1 << 31))) __asm__ volatile(""); } - } while ((--mode) > 0); + } + _last_vblank = _vblank_counter; _last_hblank = TIMER_VALUE(1); + return delta; } @@ -167,14 +190,13 @@ void *VSyncCallback(void (*func)(void)) { /* Command queue API */ -// This function is normally only used internally, but it is exposed for -// advanced use cases. -int EnqueueDrawOp( - void (*func)(uint32_t, uint32_t, uint32_t), - uint32_t arg1, - uint32_t arg2, - uint32_t arg3 -) { +void SetDrawOpType(GPU_DrawOpType type) { + _drawop_type = type; +} + +int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3) { + _sdk_validate_args(func, -1); + // If GPU DMA is currently busy, append the command to the queue instead of // executing it immediately. Note that interrupts must be disabled *prior* // to checking if DMA is busy; disabling them afterwards would create a @@ -202,7 +224,7 @@ int EnqueueDrawOp( _queue_tail = (tail + 1) % QUEUE_LENGTH; _queue_length = length + 1; - volatile QueueEntry *entry = &_draw_queue[tail]; + volatile DrawOp *entry = &_draw_queue[tail]; entry->func = func; entry->arg1 = arg1; entry->arg2 = arg2; @@ -225,7 +247,7 @@ int DrawSync(int mode) { if (!_queue_length) { // Wait for any DMA transfer to finish if DMA is enabled. if (GPU_GP1 & (3 << 29)) { - while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24))) + while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(DMA_GPU) & (1 << 24))) __asm__ volatile(""); } @@ -248,88 +270,17 @@ void *DrawSyncCallback(void (*func)(void)) { return old_callback; } -/* OT and primitive drawing API */ - -void ClearOTagR(uint32_t *ot, size_t length) { - DMA_MADR(6) = (uint32_t) &ot[length - 1]; - DMA_BCR(6) = length & 0xffff; - DMA_CHCR(6) = 0x11000002; +/* Queue pause/resume API */ - while (DMA_CHCR(6) & (1 << 24)) - __asm__ volatile(""); -} +int IsIdleGPU(int timeout) { + if (timeout <= 0) + timeout = 1; -void ClearOTag(uint32_t *ot, size_t length) { - // DMA6 only supports writing to RAM in reverse order (last to first), so - // the OT has to be cleared in software here. This function is thus much - // slower than ClearOTagR(). - // https://problemkaputt.de/psx-spx.htm#dmachannels - for (int i = 0; i < (length - 1); i++) - ot[i] = (uint32_t) &ot[i + 1] & 0x00ffffff; - - ot[length - 1] = 0x00ffffff; -} - -void AddPrim(uint32_t *ot, const void *pri) { - addPrim(ot, pri); -} - -void DrawPrim(const uint32_t *pri) { - size_t length = getlen(pri); - - DrawSync(0); - GPU_GP1 = 0x04000002; - - // NOTE: if length >= DMA_CHUNK_LENGTH then it also has to be a multiple of - // DMA_CHUNK_LENGTH, otherwise the DMA channel will get stuck waiting for - // more data indefinitely. - DMA_MADR(2) = (uint32_t) &pri[1]; - if (length < DMA_CHUNK_LENGTH) - DMA_BCR(2) = 0x00010000 | length; - else - DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); - - DMA_CHCR(2) = 0x01000201; -} - -int DrawOTag(const uint32_t *ot) { - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) ot, 0, 0); -} - -void DrawOTag2(const uint32_t *ot) { - GPU_GP1 = 0x04000002; - - while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) - __asm__ volatile(""); - - DMA_MADR(2) = (uint32_t) ot; - DMA_BCR(2) = 0; - DMA_CHCR(2) = 0x01000401; -} - -/* Misc. functions */ - -GPU_VideoMode GetVideoMode(void) { - return _gpu_video_mode; -} - -void SetVideoMode(GPU_VideoMode mode) { - uint32_t _mode, stat = GPU_GP1; - - _gpu_video_mode = mode & 1; - - _mode = (mode & 1) << 3; - _mode |= (stat >> 17) & 0x37; // GPUSTAT 17-22 -> cmd bits 0-5 - _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> cmd bit 6 - _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> cmd bit 7 - - GPU_GP1 = 0x08000000 | mode; -} - -int GetODE(void) { - return (GPU_GP1 >> 31); -} + for (; timeout; timeout--) { + if (GPU_GP1 & (1 << 26)) + return 0; + } -void SetDispMask(int mask) { - GPU_GP1 = 0x03000000 | (mask ? 0 : 1); + //_sdk_log("IsIdleGPU() timeout\n"); + return -1; } diff --git a/libpsn00b/psxgpu/drawing.c b/libpsn00b/psxgpu/drawing.c new file mode 100644 index 0000000..161b2f7 --- /dev/null +++ b/libpsn00b/psxgpu/drawing.c @@ -0,0 +1,148 @@ +/* + * PSn00bSDK GPU library (drawing/display list functions) + * (C) 2022-2023 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <assert.h> +#include <psxetc.h> +#include <psxgpu.h> +#include <hwregs_c.h> + +/* Private utilities */ + +// This function is actually referenced in env.c as well, so it can't be static. +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) ot; + DMA_BCR(DMA_GPU) = 0; + DMA_CHCR(DMA_GPU) = 0x01000401; +} + +static void _send_buffer( + GPU_DrawOpType type, const uint32_t *buf, size_t length +) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) buf; + DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16); + DMA_CHCR(DMA_GPU) = 0x01000201; +} + +/* Buffer and primitive drawing API */ + +int DrawOTag(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) ot, + 0 + ); +} + +int DrawOTagIRQ(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) ot, + 0 + ); +} + +int DrawBuffer(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) buf, + (uint32_t) length + ); +} + +int DrawBufferIRQ(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) buf, + (uint32_t) length + ); +} + +void DrawOTag2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_DMA, ot); +} + +void DrawOTagIRQ2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_GPU_IRQ, ot); +} + +void DrawBuffer2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_DMA, buf, length); +} + +void DrawBufferIRQ2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_GPU_IRQ, buf, length); +} + +void DrawPrim(const uint32_t *pri) { + _sdk_validate_args_void(pri); + + DrawSync(0); + DrawBuffer2(&pri[1], getlen(pri)); +} + +/* Helper functions */ + +void ClearOTagR(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1]; + DMA_BCR(DMA_OTC) = length & 0xffff; + DMA_CHCR(DMA_OTC) = 0x11000002; + + while (DMA_CHCR(DMA_OTC) & (1 << 24)) + __asm__ volatile(""); +} + +void ClearOTag(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + // DMA6 only supports writing to RAM in reverse order (last to first), so + // the OT has to be cleared in software here. This function is thus much + // slower than ClearOTagR(). + // https://problemkaputt.de/psx-spx.htm#dmachannels + for (int i = 0; i < (length - 1); i++) + ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff; + + ot[length - 1] = 0xffffff; +} + +void AddPrim(uint32_t *ot, const void *pri) { + _sdk_validate_args_void(ot && pri); + + addPrim(ot, pri); +} diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c index 8784947..236ae4b 100644 --- a/libpsn00b/psxgpu/env.c +++ b/libpsn00b/psxgpu/env.c @@ -1,9 +1,10 @@ /* * PSn00bSDK GPU library (DRAWENV/DISPENV functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> +#include <assert.h> #include <psxgpu.h> #include <hwregs_c.h> @@ -33,9 +34,53 @@ static inline uint32_t _get_window_mask(int size) { return mask & 0x1f; } +static const uint32_t *_build_drawenv_ot(const uint32_t *ot, DRAWENV *env) { + // All commands are grouped into a single display list packet for + // performance reasons using tagless primitives (the GPU does not care + // about the grouping as the display list is parsed by the CPU). + DR_ENV *prim = &(env->dr_env); + setaddr(prim, ot); + setlen(prim, 5); + + // Texture page (reset active page and set dither/mask bits) + setDrawTPage_T(&(prim->tpage), env->dfe & 1, env->dtd & 1, env->tpage); + + // Texture window + //setTexWindow_T(&(prim->twin), &(env->tw)); + prim->twin.code[0] = 0xe2000000; + prim->twin.code[0] |= _get_window_mask(env->tw.w); + prim->twin.code[0] |= _get_window_mask(env->tw.h) << 5; + prim->twin.code[0] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10 + prim->twin.code[0] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15 + + // Set drawing area + setDrawArea_T(&(prim->area), &(env->clip)); + setDrawOffset_T( + &(prim->offset), env->clip.x + env->ofs[0], env->clip.y + env->ofs[1] + ); + + if (env->isbg) { + FILL_T *fill = &(prim->fill); + setlen(prim, 8); + + // Rectangle fill + // FIXME: reportedly this command doesn't accept height values >511... + setFill_T(fill); + setColor0(fill, *((const uint32_t *) &(env->isbg)) >> 8); + setXY0(fill, env->clip.x, env->clip.y); + setWH(fill, env->clip.w, _min(env->clip.h, 0x1ff)); + } + + return (const uint32_t *) prim; +} + /* Drawing API */ +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot); + DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { + _sdk_validate_args(env && (w > 0) && (h > 0), 0); + env->clip.x = x; env->clip.y = y; env->clip.w = w; @@ -60,69 +105,41 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { } int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { - DR_ENV *prim = &(env->dr_env); - - // All commands are grouped into a single display list packet for - // performance reasons (the GPU does not care about the grouping as the - // display list is parsed by the DMA unit in the CPU). - setaddr(prim, ot); - setlen(prim, 5); - - // Texture page (reset active page and set dither/mask bits) - prim->code[0] = 0xe1000000 | env->tpage; - prim->code[0] |= (env->dtd & 1) << 9; - prim->code[0] |= (env->dfe & 1) << 10; + _sdk_validate_args(ot && env, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); +} - // Texture window - prim->code[1] = 0xe2000000; - prim->code[1] |= _get_window_mask(env->tw.w); - prim->code[1] |= _get_window_mask(env->tw.h) << 5; - prim->code[1] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10 - prim->code[1] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15 - - // Set drawing area top left - prim->code[2] = 0xe3000000; - prim->code[2] |= env->clip.x & 0x3ff; - prim->code[2] |= (env->clip.y & 0x3ff) << 10; - - // Set drawing area bottom right - prim->code[3] = 0xe4000000; - prim->code[3] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff; - prim->code[3] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10; - - // Set drawing offset - prim->code[4] = 0xe5000000; - prim->code[4] |= (env->clip.x + env->ofs[0]) & 0x7ff; - prim->code[4] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11; +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env) { + _sdk_validate_args(ot && env, -1); - if (env->isbg) { - setlen(prim, 8); - - // Rectangle fill - // FIXME: reportedly this command doesn't accept height values >511... - prim->code[5] = 0x02000000; - //prim->code[5] |= env->r0 | (env->g0 << 8) | (env->b0 << 16); - //prim->code[6] = env->clip.x; - //prim->code[6] |= env->clip.y << 16; - prim->code[5] |= *((const uint32_t *) &(env->isbg)) >> 8; - prim->code[6] = *((const uint32_t *) &(env->clip.x)); - prim->code[7] = env->clip.w; - prim->code[7] |= _min(env->clip.h, 0x1ff) << 16; - } - - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0); + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); } void PutDrawEnv(DRAWENV *env) { - DrawOTagEnv((const uint32_t *) 0x00ffffff, env); + _sdk_validate_args_void(env); + + DrawOTagEnv((const uint32_t *) 0xffffff, env); } // This function skips rebuilding the cached packet whenever possible and is // useful if the DRAWENV structure is never modified (which is the case most of // the time). void PutDrawEnvFast(DRAWENV *env) { + _sdk_validate_args_void(env); + if (!(env->dr_env.tag)) - DrawOTagEnv((const uint32_t *) 0x00ffffff, env); + DrawOTagEnv((const uint32_t *) 0xffffff, env); else DrawOTag((const uint32_t *) &(env->dr_env)); } @@ -130,6 +147,8 @@ void PutDrawEnvFast(DRAWENV *env) { /* Display API */ DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) { + _sdk_validate_args(env && (w > 0) && (h > 0), 0); + env->disp.x = x; env->disp.y = y; env->disp.w = w; @@ -148,6 +167,8 @@ DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) { } void PutDispEnv(const DISPENV *env) { + _sdk_validate_args_void(env); + uint32_t h_range, v_range, mode, fb_pos; mode = _gpu_video_mode << 3; @@ -216,6 +237,8 @@ void PutDispEnv(const DISPENV *env) { /* Deprecated "raw" display API */ void PutDispEnvRaw(const DISPENV_RAW *env) { + _sdk_validate_args_void(env); + uint32_t h_range, v_range, fb_pos; h_range = 608 + env->vid_xpos; @@ -233,3 +256,30 @@ void PutDispEnvRaw(const DISPENV_RAW *env) { GPU_GP1 = 0x08000000 | env->vid_mode; // Set video mode GPU_GP1 = 0x05000000 | fb_pos; // Set VRAM location to display } + +/* Misc. display functions */ + +GPU_VideoMode GetVideoMode(void) { + return _gpu_video_mode; +} + +void SetVideoMode(GPU_VideoMode mode) { + uint32_t _mode, stat = GPU_GP1; + + _gpu_video_mode = mode & 1; + + _mode = (mode & 1) << 3; + _mode |= (stat >> 17) & 0x37; // GPUSTAT bits 17-22 -> command bits 0-5 + _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> command bit 6 + _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> command bit 7 + + GPU_GP1 = 0x08000000 | _mode; +} + +int GetODE(void) { + return (GPU_GP1 >> 31); +} + +void SetDispMask(int mask) { + GPU_GP1 = 0x03000000 | (mask ? 0 : 1); +} diff --git a/libpsn00b/psxgpu/font.c b/libpsn00b/psxgpu/font.c index b1c3c7a..c9d60f1 100644 --- a/libpsn00b/psxgpu/font.c +++ b/libpsn00b/psxgpu/font.c @@ -1,4 +1,5 @@ #include <stdint.h> +#include <assert.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -23,6 +24,7 @@ uint16_t _font_clut; extern uint8_t _gpu_debug_font[]; void FntLoad(int x, int y) { + _sdk_validate_args_void((x >= 0) && (y >= 0) && (x < 1024) && (y < 1024)); RECT pos; TIM_IMAGE tim; @@ -66,7 +68,8 @@ void FntLoad(int x, int y) { } int FntOpen(int x, int y, int w, int h, int isbg, int n) { - + _sdk_validate_args((w > 0) && (h > 0) && (n > 0), -1); + int i; // Initialize a text stream @@ -98,7 +101,8 @@ int FntOpen(int x, int y, int w, int h, int isbg, int n) { } int FntPrint(int id, const char *fmt, ...) { - + _sdk_validate_args((id < _nstreams) && fmt, -1); + int n; va_list ap; @@ -124,7 +128,8 @@ int FntPrint(int id, const char *fmt, ...) { } char *FntFlush(int id) { - + _sdk_validate_args(id < _nstreams, 0); + char *opri; SPRT_8 *sprt; DR_TPAGE *tpage; @@ -226,7 +231,8 @@ char *FntFlush(int id) { } char *FntSort(uint32_t *ot, char *pri, int x, int y, const char *text) { - + _sdk_validate_args(ot && pri, 0); + DR_TPAGE *tpage; SPRT_8 *sprt = (SPRT_8*)pri; int i; diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index fc018a4..e02c3c2 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -1,6 +1,6 @@ /* * PSn00bSDK GPU library (image and VRAM transfer functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * * TODO: MoveImage() is currently commented out as it won't trigger a DMA IRQ, * making it unusable as a draw queue command. A way around this (perhaps using @@ -9,11 +9,12 @@ #include <stdint.h> #include <assert.h> +#include <psxetc.h> #include <psxgpu.h> #include <hwregs_c.h> #define QUEUE_LENGTH 16 -#define DMA_CHUNK_LENGTH 8 +#define DMA_CHUNK_LENGTH 16 /* Internal globals */ @@ -37,6 +38,10 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { length += DMA_CHUNK_LENGTH - 1; } + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + SetDrawOpType(DRAWOP_TYPE_DMA); GPU_GP1 = 0x04000000; // Disable DMA request GPU_GP0 = 0x01000000; // Flush cache @@ -49,18 +54,24 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { // Enable DMA request, route to GP0 (2) or from GPU_READ (3) GPU_GP1 = 0x04000002 | (write ^ 1); - DMA_MADR(2) = (uint32_t) data; + while ((DMA_CHCR(DMA_GPU) & (1 << 24)) || !(GPU_GP1 & (1 << 28))) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(2) = 0x00010000 | length; + DMA_BCR(DMA_GPU) = 0x00010000 | length; else - DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_GPU) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(2) = 0x01000200 | write; + DMA_CHCR(DMA_GPU) = 0x01000200 | write; } /* VRAM transfer API */ int LoadImage(const RECT *rect, const uint32_t *data) { + _sdk_validate_args(rect && data, -1); + int index = _next_saved_rect; _saved_rects[index] = *rect; @@ -75,6 +86,8 @@ int LoadImage(const RECT *rect, const uint32_t *data) { } int StoreImage(const RECT *rect, uint32_t *data) { + _sdk_validate_args(rect && data, -1); + int index = _next_saved_rect; _saved_rects[index] = *rect; @@ -88,22 +101,53 @@ int StoreImage(const RECT *rect, uint32_t *data) { ); } +int MoveImage(const RECT *rect, int x, int y) { + _sdk_validate_args(rect, -1); + + int index = _next_saved_rect; + + _saved_rects[index] = *rect; + _next_saved_rect = (index + 1) % QUEUE_LENGTH; + + return EnqueueDrawOp( + (void *) &MoveImage2, + (uint32_t) &_saved_rects[index], + (uint32_t) x, + (uint32_t) y + ); +} + void LoadImage2(const RECT *rect, const uint32_t *data) { + _sdk_validate_args_void(rect && data); + _dma_transfer(rect, (uint32_t *) data, 1); } void StoreImage2(const RECT *rect, uint32_t *data) { + _sdk_validate_args_void(rect && data); + _dma_transfer(rect, data, 0); } -/*void MoveImage2(const RECT *rect, int x, int y) { +void MoveImage2(const RECT *rect, int x, int y) { + _sdk_validate_args_void(rect); + + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + SetDrawOpType(DRAWOP_TYPE_GPU_IRQ); + GPU_GP0 = 0x80000000; //GPU_GP0 = rect->x | (rect->y << 16); GPU_GP0 = *((const uint32_t *) &(rect->x)); GPU_GP0 = (x & 0xffff) | (y << 16); //GPU_GP0 = rect->w | (rect->h << 16); GPU_GP0 = *((const uint32_t *) &(rect->w)); -}*/ + + // As no DMA transfer is performed by this command, the GPU IRQ is used + // instead of the DMA IRQ to trigger the draw queue callback. + GPU_GP0 = 0x1f000000; +} /* .TIM image parsers */ @@ -112,6 +156,8 @@ void StoreImage2(const RECT *rect, uint32_t *data) { // returning pointers to them, which become useless once the .TIM file is // unloaded from main RAM. int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) { + _sdk_validate_args(tim && info, 1); + if ((*(tim++) & 0xffff) != 0x0010) return 1; @@ -138,6 +184,8 @@ int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) { } int GetTimInfo(const uint32_t *tim, TIM_IMAGE *info) { + _sdk_validate_args(tim && info, 1); + if ((*(tim++) & 0xffff) != 0x0010) return 1; diff --git a/libpsn00b/psxpress/README.md b/libpsn00b/psxpress/README.md index a894874..df18ec5 100644 --- a/libpsn00b/psxpress/README.md +++ b/libpsn00b/psxpress/README.md @@ -1,14 +1,19 @@ # PSn00bSDK MDEC library -This is a fully open source reimplementation of the official SDK's "data +This is a fully original reimplementation of the official SDK's "data compression" library. This library is made up of two parts, the MDEC API and functions to decompress Huffman-encoded bitstreams (.BS files, or frames in -.STR files) into data to be fed to the MDEC. FMV playback is not part of this -library (nor the official one) per se, but can implemented by using these APIs -alongside some code to stream data from the CD drive. +.STR files) into data to be fed to the MDEC. Two different implementations of +the latter are provided, one using the GTE and scratchpad region and an older +one using a large lookup table in main RAM. -**Currently only version 1 and 2 bitstreams are supported**. +FMV playback is not part of this library per se, but can implemented using the +APIs defined here alongside some code to stream data from the CD drive. + +Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI +bitstreams are not supported, but no encoder is publicly available for those +anyway. ## MDEC API @@ -26,14 +31,16 @@ The following functions are currently provided: - `DecDCTvlcStart()`, `DecDCTvlcContinue()`: a decompressor implementation that uses a small (<1 KB) lookup table and leverages the GTE, written in assembly. - `DecDCTvlcCopyTable()` can optionally be called to temporarily move the table - to the scratchpad region to improve decompression speed. -- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: a different implementation using + `DecDCTvlcCopyTableV2()` or `DecDCTvlcCopyTableV3()` may optionally be called + to temporarily move the table to the scratchpad region in order to boost + decompression speed. +- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: an older implementation using a large (34 KB) lookup table in main RAM, written in C. The table must be - decompressed ahead of time using `DecDCTvlcBuild()`, but can be deallocated - when no longer needed. + decompressed ahead of time manually using `DecDCTvlcBuild()`, but can be + deallocated when no longer needed. **This implementation does not support** + **version 3 bitstreams**. - `DecDCTvlc()`, `DecDCTvlc2()`: wrappers around the functions listed above, - for compatibility with the Sony SDK. Using them is not recommended. + for compatibility with the Sony SDK. ## SPU ADPCM encoding API diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index d15a4db..b3aa837 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -1,11 +1,11 @@ /* * PSn00bSDK MDEC library (low-level MDEC/DMA API) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> #include <assert.h> -#include <psxapi.h> +#include <psxetc.h> #include <psxpress.h> #include <hwregs_c.h> @@ -14,14 +14,14 @@ /* Default IDCT matrix and quantization tables */ -#define S0 0x5a82 // 0x4000 * cos(0/16 * pi) * sqrt(2) -#define S1 0x7d8a // 0x4000 * cos(1/16 * pi) * 2 -#define S2 0x7641 // 0x4000 * cos(2/16 * pi) * 2 -#define S3 0x6a6d // 0x4000 * cos(3/16 * pi) * 2 -#define S4 0x5a82 // 0x4000 * cos(4/16 * pi) * 2 -#define S5 0x471c // 0x4000 * cos(5/16 * pi) * 2 -#define S6 0x30fb // 0x4000 * cos(6/16 * pi) * 2 -#define S7 0x18f8 // 0x4000 * cos(7/16 * pi) * 2 +#define S0 0x5a82 // (1 << 14) * cos(0/16 * pi) * sqrt(2) +#define S1 0x7d8a // (1 << 14) * cos(1/16 * pi) * 2 +#define S2 0x7641 // (1 << 14) * cos(2/16 * pi) * 2 +#define S3 0x6a6d // (1 << 14) * cos(3/16 * pi) * 2 +#define S4 0x5a82 // (1 << 14) * cos(4/16 * pi) * 2 +#define S5 0x471c // (1 << 14) * cos(5/16 * pi) * 2 +#define S6 0x30fb // (1 << 14) * cos(6/16 * pi) * 2 +#define S7 0x18f8 // (1 << 14) * cos(7/16 * pi) * 2 static const DECDCTENV _default_mdec_env = { // The default luma and chroma quantization table is based on the MPEG-1 @@ -84,34 +84,38 @@ static const DECDCTENV _default_mdec_env = { /* Public API */ void DecDCTReset(int mode) { - FastEnterCriticalSection(); + SetDMAPriority(DMA_MDEC_IN, 3); + SetDMAPriority(DMA_MDEC_OUT, 3); + DMA_CHCR(DMA_MDEC_IN) = 0x00000201; // Stop DMA + DMA_CHCR(DMA_MDEC_OUT) = 0x00000200; // Stop DMA - DMA_DPCR |= 0x000000bb; // Enable DMA0 and DMA1 - DMA_CHCR(0) = 0x00000201; // Stop DMA0 - DMA_CHCR(1) = 0x00000200; // Stop DMA1 - MDEC1 = 0x80000000; // Reset MDEC - MDEC1 = 0x60000000; // Enable DMA in/out requests + MDEC1 = 0x80000000; // Reset MDEC + MDEC1 = 0x60000000; // Enable DMA in/out requests - FastExitCriticalSection(); if (!mode) DecDCTPutEnv(0, 0); } void DecDCTPutEnv(const DECDCTENV *env, int mono) { - const DECDCTENV *_env = env ? env : &_default_mdec_env; DecDCTinSync(0); + if (!env) + env = &_default_mdec_env; MDEC0 = 0x60000000; // Set IDCT matrix - DecDCTinRaw((const uint32_t *) _env->dct, 32); + DecDCTinRaw((const uint32_t *) env->dct, 32); DecDCTinSync(0); - MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s) - DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32); + MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set quantization table(s) + DecDCTinRaw((const uint32_t *) env->iq_y, mono ? 16 : 32); DecDCTinSync(0); } void DecDCTin(const uint32_t *data, int mode) { + _sdk_validate_args_void(data); + uint32_t header = *data; + DecDCTinSync(0); + if (mode == DECDCT_MODE_RAW) MDEC0 = header; else if (mode & DECDCT_MODE_24BPP) @@ -126,18 +130,21 @@ void DecDCTin(const uint32_t *data, int mode) { // data length as an argument rather than parsing it from the first 4 bytes of // the stream. void DecDCTinRaw(const uint32_t *data, size_t length) { + _sdk_validate_args_void(data && length); + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { _sdk_log("input data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } - DMA_MADR(0) = (uint32_t) data; + DMA_MADR(DMA_MDEC_IN) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(0) = 0x00010000 | length; + DMA_BCR(DMA_MDEC_IN) = 0x00010000 | length; else - DMA_BCR(0) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_MDEC_IN) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(0) = 0x01000201; + DMA_CHCR(DMA_MDEC_IN) = 0x01000201; } int DecDCTinSync(int mode) { @@ -149,11 +156,13 @@ int DecDCTinSync(int mode) { return 0; } - _sdk_log("DecDCTinSync() timeout\n"); + _sdk_log("DecDCTinSync() timeout, MDEC1=0x%08x\n", MDEC1); return -1; } void DecDCTout(uint32_t *data, size_t length) { + _sdk_validate_args_void(data && length); + DecDCToutSync(0); if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { @@ -161,24 +170,25 @@ void DecDCTout(uint32_t *data, size_t length) { length += DMA_CHUNK_LENGTH - 1; } - DMA_MADR(1) = (uint32_t) data; + DMA_MADR(DMA_MDEC_OUT) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(1) = 0x00010000 | length; + DMA_BCR(DMA_MDEC_OUT) = 0x00010000 | length; else - DMA_BCR(1) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_MDEC_OUT) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(1) = 0x01000200; + DMA_CHCR(DMA_MDEC_OUT) = 0x01000200; } int DecDCToutSync(int mode) { if (mode) - return (DMA_CHCR(1) >> 24) & 1; + return (DMA_CHCR(DMA_MDEC_OUT) >> 24) & 1; for (int i = MDEC_SYNC_TIMEOUT; i; i--) { - if (!(DMA_CHCR(1) & (1 << 24))) + if (!(DMA_CHCR(DMA_MDEC_OUT) & (1 << 24))) return 0; } - _sdk_log("DecDCToutSync() timeout\n"); + _sdk_log("DecDCToutSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_MDEC_OUT)); return -1; } diff --git a/libpsn00b/psxpress/vlc.c b/libpsn00b/psxpress/vlc.c index 4e3e283..36cfbe2 100644 --- a/libpsn00b/psxpress/vlc.c +++ b/libpsn00b/psxpress/vlc.c @@ -1,6 +1,6 @@ /* * PSn00bSDK MDEC library (support code for the main VLC decompressor) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> @@ -10,87 +10,120 @@ /* Huffman code lookup table */ -#define _val1(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff)) -#define _val2(rl, dc, len) (_val1(rl, dc) | (len << 16)) +#define _DC(y, c) (((y) << 4) | (c)) +#define _AC(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff)) +#define _ACL(rl, dc, len) (_AC(rl, dc) | ((len) << 16)) -#define _pair(rl, dc) _val1(rl, dc), _val1(rl, -(dc)) -#define _pair2(rl, dc, len) _val2(rl, dc, len), _val2(rl, -(dc), len) -#define _pair3(rl, dc, len) \ - _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len) -#define _pair4(rl, dc, len) \ - _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len) +#define _DC2(y, c) _DC(y, c), _DC(y, c) +#define _DC3(y, c) _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c) +#define _DC4(y, c) \ + _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c), \ + _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c) +#define _AC2(rl, dc) _AC(rl, dc), _AC(rl, -(dc)) +#define _ACL2(rl, dc, len) _ACL(rl, dc, len), _ACL(rl, -(dc), len) +#define _ACL3(rl, dc, len) \ + _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len) +#define _ACL4(rl, dc, len) \ + _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len) // This table isn't compressed since it makes no sense to compress less than a // kilobyte's worth of data. -static const DECDCTTAB _default_huffman_table = { - .lut0 = { +static const VLC_TableV3 _default_huffman_table = { + .ac0 = { // 11 x - _pair( 0, 1) + _AC2( 0, 1) }, - .lut2 = { + .ac2 = { // 01 0xx - _pair2( 0, 2, 5), _pair2( 2, 1, 5), + _ACL2( 0, 2, 5), _ACL2( 2, 1, 5), // 01 1x- - _pair3( 1, 1, 4) + _ACL3( 1, 1, 4) }, - .lut3 = { + .ac3 = { // 001 00xxxx - _pair2(13, 1, 9), _pair2( 0, 6, 9), _pair2(12, 1, 9), _pair2(11, 1, 9), - _pair2( 3, 2, 9), _pair2( 1, 3, 9), _pair2( 0, 5, 9), _pair2(10, 1, 9), + _ACL2(13, 1, 9), _ACL2( 0, 6, 9), _ACL2(12, 1, 9), _ACL2(11, 1, 9), + _ACL2( 3, 2, 9), _ACL2( 1, 3, 9), _ACL2( 0, 5, 9), _ACL2(10, 1, 9), // 001 xxx--- - _pair4( 0, 3, 6), _pair4( 4, 1, 6), _pair4( 3, 1, 6) + _ACL4( 0, 3, 6), _ACL4( 4, 1, 6), _ACL4( 3, 1, 6) }, - .lut4 = { + .ac4 = { // 0001 xxx - _pair( 7, 1), _pair( 6, 1), _pair( 1, 2), _pair( 5, 1) + _AC2( 7, 1), _AC2( 6, 1), _AC2( 1, 2), _AC2( 5, 1) }, - .lut5 = { + .ac5 = { // 00001 xxx - _pair( 2, 2), _pair( 9, 1), _pair( 0, 4), _pair( 8, 1) + _AC2( 2, 2), _AC2( 9, 1), _AC2( 0, 4), _AC2( 8, 1) }, - .lut7 = { + .ac7 = { // 0000001 xxxx - _pair(16, 1), _pair( 5, 2), _pair( 0, 7), _pair( 2, 3), - _pair( 1, 4), _pair(15, 1), _pair(14, 1), _pair( 4, 2) + _AC2(16, 1), _AC2( 5, 2), _AC2( 0, 7), _AC2( 2, 3), + _AC2( 1, 4), _AC2(15, 1), _AC2(14, 1), _AC2( 4, 2) }, - .lut8 = { + .ac8 = { // 00000001 xxxxx - _pair( 0, 11), _pair( 8, 2), _pair( 4, 3), _pair( 0, 10), - _pair( 2, 4), _pair( 7, 2), _pair(21, 1), _pair(20, 1), - _pair( 0, 9), _pair(19, 1), _pair(18, 1), _pair( 1, 5), - _pair( 3, 3), _pair( 0, 8), _pair( 6, 2), _pair(17, 1) + _AC2( 0, 11), _AC2( 8, 2), _AC2( 4, 3), _AC2( 0, 10), + _AC2( 2, 4), _AC2( 7, 2), _AC2(21, 1), _AC2(20, 1), + _AC2( 0, 9), _AC2(19, 1), _AC2(18, 1), _AC2( 1, 5), + _AC2( 3, 3), _AC2( 0, 8), _AC2( 6, 2), _AC2(17, 1) }, - .lut9 = { + .ac9 = { // 000000001 xxxxx - _pair(10, 2), _pair( 9, 2), _pair( 5, 3), _pair( 3, 4), - _pair( 2, 5), _pair( 1, 7), _pair( 1, 6), _pair( 0, 15), - _pair( 0, 14), _pair( 0, 13), _pair( 0, 12), _pair(26, 1), - _pair(25, 1), _pair(24, 1), _pair(23, 1), _pair(22, 1) + _AC2(10, 2), _AC2( 9, 2), _AC2( 5, 3), _AC2( 3, 4), + _AC2( 2, 5), _AC2( 1, 7), _AC2( 1, 6), _AC2( 0, 15), + _AC2( 0, 14), _AC2( 0, 13), _AC2( 0, 12), _AC2(26, 1), + _AC2(25, 1), _AC2(24, 1), _AC2(23, 1), _AC2(22, 1) }, - .lut10 = { + .ac10 = { // 0000000001 xxxxx - _pair( 0, 31), _pair( 0, 30), _pair( 0, 29), _pair( 0, 28), - _pair( 0, 27), _pair( 0, 26), _pair( 0, 25), _pair( 0, 24), - _pair( 0, 23), _pair( 0, 22), _pair( 0, 21), _pair( 0, 20), - _pair( 0, 19), _pair( 0, 18), _pair( 0, 17), _pair( 0, 16) + _AC2( 0, 31), _AC2( 0, 30), _AC2( 0, 29), _AC2( 0, 28), + _AC2( 0, 27), _AC2( 0, 26), _AC2( 0, 25), _AC2( 0, 24), + _AC2( 0, 23), _AC2( 0, 22), _AC2( 0, 21), _AC2( 0, 20), + _AC2( 0, 19), _AC2( 0, 18), _AC2( 0, 17), _AC2( 0, 16) }, - .lut11 = { + .ac11 = { // 00000000001 xxxxx - _pair( 0, 40), _pair( 0, 39), _pair( 0, 38), _pair( 0, 37), - _pair( 0, 36), _pair( 0, 35), _pair( 0, 34), _pair( 0, 33), - _pair( 0, 32), _pair( 1, 14), _pair( 1, 13), _pair( 1, 12), - _pair( 1, 11), _pair( 1, 10), _pair( 1, 9), _pair( 1, 8) + _AC2( 0, 40), _AC2( 0, 39), _AC2( 0, 38), _AC2( 0, 37), + _AC2( 0, 36), _AC2( 0, 35), _AC2( 0, 34), _AC2( 0, 33), + _AC2( 0, 32), _AC2( 1, 14), _AC2( 1, 13), _AC2( 1, 12), + _AC2( 1, 11), _AC2( 1, 10), _AC2( 1, 9), _AC2( 1, 8) }, - .lut12 = { + .ac12 = { // 000000000001 xxxxx - _pair( 1, 18), _pair( 1, 17), _pair( 1, 16), _pair( 1, 15), - _pair( 6, 3), _pair(16, 2), _pair(15, 2), _pair(14, 2), - _pair(13, 2), _pair(12, 2), _pair(11, 2), _pair(31, 1), - _pair(30, 1), _pair(29, 1), _pair(28, 1), _pair(27, 1) + _AC2( 1, 18), _AC2( 1, 17), _AC2( 1, 16), _AC2( 1, 15), + _AC2( 6, 3), _AC2(16, 2), _AC2(15, 2), _AC2(14, 2), + _AC2(13, 2), _AC2(12, 2), _AC2(11, 2), _AC2(31, 1), + _AC2(30, 1), _AC2(29, 1), _AC2(28, 1), _AC2(27, 1) + }, + .dc = { + // 00----- + _DC4(1, 0), _DC4(1, 0), _DC4(1, 0), _DC4(1, 0), + // 01----- + _DC4(2, 1), _DC4(2, 1), _DC4(2, 1), _DC4(2, 1), + // 100---- + _DC4(0, 2), _DC4(0, 2), + // 101---- + _DC4(3, 2), _DC4(3, 2), + // 110---- + _DC4(4, 3), _DC4(4, 3), + // 1110--- + _DC4(5, 4), + // 11110-- + _DC3(6, 5), + // 111110- + _DC2(7, 6), + // 1111110 + _DC(8, 7), + // 1111111(0) + _DC(0, 8) + }, + .dc_len = { + _DC(3, 2), _DC(2, 2), _DC(2, 2), _DC(3, 3), + _DC(3, 4), _DC(4, 5), _DC(5, 6), _DC(6, 7), + _DC(7, 8) } }; @@ -100,7 +133,7 @@ static const DECDCTTAB _default_huffman_table = { static VLC_Context _default_context; static size_t _max_buffer_size = 0; -const DECDCTTAB *_vlc_huffman_table = &_default_huffman_table; +const VLC_TableV3 *_vlc_huffman_table = &_default_huffman_table; /* Stateful VLC decoder API (for Sony SDK compatibility) */ @@ -120,10 +153,19 @@ size_t DecDCTvlcSize(size_t size) { /* Lookup table relocation API */ -void DecDCTvlcCopyTable(DECDCTTAB *addr) { +void DecDCTvlcCopyTableV2(VLC_TableV2 *addr) { + if (addr) { + _vlc_huffman_table = (const VLC_TableV3 *) addr; + memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV2)); + } else { + _vlc_huffman_table = &_default_huffman_table; + } +} + +void DecDCTvlcCopyTableV3(VLC_TableV3 *addr) { if (addr) { - _vlc_huffman_table = addr; - memcpy(addr, &_default_huffman_table, sizeof(DECDCTTAB)); + _vlc_huffman_table = (const VLC_TableV3 *) addr; + memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV3)); } else { _vlc_huffman_table = &_default_huffman_table; } diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s index f3a1c67..2de22f7 100644 --- a/libpsn00b/psxpress/vlc.s +++ b/libpsn00b/psxpress/vlc.s @@ -1,375 +1,576 @@ # PSn00bSDK MDEC library (GTE-accelerated VLC decompressor) -# (C) 2022 spicyjpeg - MPL licensed +# (C) 2022-2023 spicyjpeg - MPL licensed # -# Register map: -# - $a0 = ctx -# - $a1 = output -# - $a2 = max_size -# - $a3 = input -# - $t0 = window -# - $t1 = next_window -# - $t2 = remaining -# - $t3 = quant_scale -# - $t4 = is_v3 -# - $t5 = bit_offset -# - $t6 = block_index -# - $t7 = coeff_index -# - $t8 = _vlc_huffman_table -# - $t9 = &ac_jump_area +# TODO: reduce the size of the v3 DC coefficient decoder; currently the code is +# duplicated for each block type, but it can probably be shortened with no +# performance impact... -.set noreorder +.include "gtereg.inc" -.set VLC_Context_input, 0 -.set VLC_Context_window, 4 -.set VLC_Context_next_window, 8 -.set VLC_Context_remaining, 12 -.set VLC_Context_quant_scale, 16 -.set VLC_Context_is_v3, 18 -.set VLC_Context_bit_offset, 19 -.set VLC_Context_block_index, 20 -.set VLC_Context_coeff_index, 21 - -.set DECDCTTAB_lut0, 0 -.set DECDCTTAB_lut2, 4 -.set DECDCTTAB_lut3, 36 -.set DECDCTTAB_lut4, 292 -.set DECDCTTAB_lut5, 308 -.set DECDCTTAB_lut7, 324 -.set DECDCTTAB_lut8, 356 -.set DECDCTTAB_lut9, 420 -.set DECDCTTAB_lut10, 484 -.set DECDCTTAB_lut11, 548 -.set DECDCTTAB_lut12, 612 +.set noreorder +.set noat + +.set value, $v0 +.set length, $v1 +.set ctx, $a0 +.set output, $a1 +.set max_size, $a2 +.set input, $a3 +.set temp, $t0 +.set window, $t1 +.set next_window, $t2 +.set remaining, $t3 +.set is_v3, $t4 +.set bit_offset, $t5 +.set block_index, $t6 +.set coeff_index, $t7 +.set quant_scale, $s0 +.set last_y, $s1 +.set last_cr, $s2 +.set last_cb, $s3 +.set huffman_table, $t8 +.set ac_jump_area, $t9 + +.set VLC_Context_input, 0x0 +.set VLC_Context_window, 0x4 +.set VLC_Context_next_window, 0x8 +.set VLC_Context_remaining, 0xc +.set VLC_Context_is_v3, 0x10 +.set VLC_Context_bit_offset, 0x11 +.set VLC_Context_block_index, 0x12 +.set VLC_Context_coeff_index, 0x13 +.set VLC_Context_quant_scale, 0x14 +.set VLC_Context_last_y, 0x16 +.set VLC_Context_last_cr, 0x18 +.set VLC_Context_last_cb, 0x1a + +.set VLC_Table_ac0, 0x0 +.set VLC_Table_ac2, 0x4 +.set VLC_Table_ac3, 0x24 +.set VLC_Table_ac4, 0x124 +.set VLC_Table_ac5, 0x134 +.set VLC_Table_ac7, 0x144 +.set VLC_Table_ac8, 0x164 +.set VLC_Table_ac9, 0x1a4 +.set VLC_Table_ac10, 0x1e4 +.set VLC_Table_ac11, 0x224 +.set VLC_Table_ac12, 0x264 +.set VLC_Table_dc, 0x2a4 +.set VLC_Table_dc_len, 0x324 .section .text.DecDCTvlcStart .global DecDCTvlcStart .type DecDCTvlcStart, @function DecDCTvlcStart: + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + # Create a new context on-the-fly without writing it to memory then jump # into DecDCTvlcContinue(), skipping context loading. - lw $t0, 8($a3) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) - nop - srl $v0, $t0, 16 - sll $t0, 16 - - lw $t1, 12($a3) # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) - or $t0, $v0 - srl $v0, $t1, 16 - sll $t1, 16 - - lhu $t2, 0($a3) # remaining = bs->uncomp_length * 2 - or $t1, $v0 - - lhu $t3, 4($a3) # quant_scale = (bs->quant_scale & 63) << 10 - sll $t2, 1 - andi $t3, 63 - - lhu $t4, 6($a3) # is_v3 = !(bs->version < 3) - sll $t3, 10 - sltiu $t4, $t4, 3 - xori $t4, 1 - - li $t5, 32 # bit_offset = 32 - li $t6, 5 # block_index = 5 - li $t7, 0 # coeff_index = 0 + lw window, 8(input) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) + li last_y, 0 + srl temp, window, 16 + sll window, 16 + or window, temp + + # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) + lw next_window, 12(input) + li last_cr, 0 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + + lhu remaining, 0(input) # remaining = bs->uncomp_length * 2 + li last_cb, 0 + sll remaining, 1 + + lw temp, 4(input) # quant_scale = (bs->quant_scale & 63) << 10 + li bit_offset, 32 + andi quant_scale, temp, 63 + sll quant_scale, 10 + + srl temp, 16 # is_v3 = !(bs->version < 3) + sltiu is_v3, temp, 3 + xori is_v3, 1 + + li block_index, 5 + li coeff_index, 0 j _vlc_skip_context_load - addiu $a3, 16 # input = &(bs->data[2]) + addiu input, 16 # input = &(bs->data[2]) .section .text.DecDCTvlcContinue .global DecDCTvlcContinue .type DecDCTvlcContinue, @function DecDCTvlcContinue: - lw $a3, VLC_Context_input($a0) - lw $t0, VLC_Context_window($a0) - lw $t1, VLC_Context_next_window($a0) - lw $t2, VLC_Context_remaining($a0) - lhu $t3, VLC_Context_quant_scale($a0) - lb $t4, VLC_Context_is_v3($a0) - lb $t5, VLC_Context_bit_offset($a0) - lb $t6, VLC_Context_block_index($a0) - lb $t7, VLC_Context_coeff_index($a0) + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + lw input, VLC_Context_input(ctx) + lw window, VLC_Context_window(ctx) + lw next_window, VLC_Context_next_window(ctx) + lw remaining, VLC_Context_remaining(ctx) + lb is_v3, VLC_Context_is_v3(ctx) + lb bit_offset, VLC_Context_bit_offset(ctx) + lb block_index, VLC_Context_block_index(ctx) + lb coeff_index, VLC_Context_coeff_index(ctx) + lhu quant_scale, VLC_Context_quant_scale(ctx) + lh last_y, VLC_Context_last_y(ctx) + lh last_cr, VLC_Context_last_cr(ctx) + lh last_cb, VLC_Context_last_cb(ctx) _vlc_skip_context_load: - # Determine how many bytes to output. This whole block of code basically - # does this: + # Determine how many bytes to output. + # if (max_size <= 0) max_size = 0x3fff0000 # max_size = min((max_size - 1) * 2, remaining) # remaining -= max_size - bgtz $a2, .Lmax_size_valid # if (max_size <= 0) max_size = 0x7ffe0000 - addiu $a2, -1 # else max_size = (max_size - 1) * 2 - lui $a2, 0x3fff + bgtz max_size, .Lmax_size_valid + addiu max_size, -1 + lui max_size, 0x3fff .Lmax_size_valid: - sll $a2, 1 + sll max_size, 1 - blt $a2, $t2, .Lmax_size_ok # if (max_size > remaining) max_size = remaining - lui $v1, 0x3800 - move $a2, $t2 -.Lmax_size_ok: - subu $t2, $a2 # remaining -= max_size + subu remaining, max_size + bgez remaining, .Lmax_size_ok + lui temp, 0x3800 + addu max_size, remaining + li remaining, 0 + +.Lmax_size_ok: # Write the length of the data that will be decoded to first 4 bytes of the # output buffer, which will be then parsed by DecDCTin(). - srl $v0, $a2, 1 # output[0] = 0x38000000 | (max_size / 2) - or $v0, $v1 - sw $v0, 0($a1) + srl value, max_size, 1 # output[0] = 0x38000000 | (max_size / 2) + or value, temp + sw value, 0(output) # Obtain the addresses of the lookup table and jump area in advance so that # they don't have to be retrieved for each coefficient decoded. - lw $t8, _vlc_huffman_table - la $t9, .Lac_prefix_10 + lw huffman_table, _vlc_huffman_table + la ac_jump_area, .Lac_prefix_01 - 32 - beqz $a2, .Lstop_processing - addiu $a1, 4 # output = (uint16_t *) &output[1] + beqz max_size, .Lstop_processing + addiu output, 4 .Lprocess_next_code_loop: # while (max_size) # This is the "hot" part of the decoder, executed for each code in the # bitstream. The first step is to determine if the next code is a DC or AC - # coefficient. - bnez $t7, .Lprocess_ac_coefficient - addiu $t7, 1 # coeff_index++ - bnez $t4, .Lprocess_dc_v3_coefficient - li $v1, 0x01ff + # coefficient; at the same time the GTE is given the task of counting the + # number of leading zeroes/ones in the code (which takes 2 more cycles). + mtc2 window, C2_LZCS + + bnez coeff_index, .Lprocess_ac_coefficient + addiu coeff_index, 1 + bnez is_v3, .Lprocess_dc_v3_coefficient + li temp, 0x1ff .Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) # The DC coefficient in version 2 frames is not compressed. Value 0x1ff is # used to signal the end of the bitstream. - srl $v0, $t0, 22 # prefix = (window >> (32 - 10)) - beq $v0, $v1, .Lstop_processing # if (prefix == 0x1ff) break - or $v0, $t3 # *output = prefix | quant_scale - sll $t0, 10 # window <<= 10 - b .Lwrite_value - addiu $t5, -10 # bit_offset -= 10 + # prefix = window >> (32 - 10) + # if (prefix == 0x1ff) break + # *output = prefix | quant_scale + srl value, window, 22 + beq value, temp, .Lstop_processing + or value, quant_scale + sll window, 10 + addiu bit_offset, -10 + + b .Lfeed_bitstream + sh value, 0(output) .Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) - # TODO: version 3 is currently not supported. - jr $ra - li $v0, -1 - -.Lprocess_ac_coefficient: # if (coeff_index) - # Check whether the prefix code is one of the shorter, more common ones, - # and start counting the number of leading zeroes/ones using the GTE (which - # takes 2 more cycles). - srl $v0, $t0, 30 - li $v1, 3 - beq $v0, $v1, .Lac_prefix_11 - li $v1, 2 - beq $v0, $v1, .Lac_prefix_10 - li $v1, 1 - mtc2 $t0, $30 - beq $v0, $v1, .Lac_prefix_01 + # Version 3 DC coefficients are variable-length deltas, prefixed with a + # Huffman code indicating their length. Since the prefix code is up to 7 + # bits long, it makes sense to decode it with a simple 128-byte lookup + # table rather than using the GTE. The codes are different for luma and + # chroma blocks, so each table entry contains the decoded length for both + # block types (packed as two nibbles). Prefix 111111111 is used to signal + # the end of the bitstream. + # prefix = window >> (32 - 9) + # if (prefix == 0x1ff) break + # lengths = huffman_table->dc[prefix >> 2] + srl length, window, 23 + beq length, temp, .Lstop_processing + srl length, 2 + addu length, huffman_table + + addiu $at, block_index, -4 + bltz $at, .Ldc_block_y + lbu length, VLC_Table_dc(length) + beqz $at, .Ldc_block_cb + andi length, 15 # if (block_index >= Cb) dc_length = lengths & 15 + +.Ldc_block_cr: # if (block_index > Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cr_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cr_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cr_positive: + addu last_cr, value + andi last_cr, 0x3ff + +.Ldc_cr_zero: + sll temp, last_cr, 2 # *output = (last_cr << 2) | quant_scale + or temp, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) + +.Ldc_block_cb: # if (block_index == Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cb_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cb_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cb_positive: + addu last_cb, value + andi last_cb, 0x3ff + +.Ldc_cb_zero: + sll value, last_cb, 2 # *output = (last_cb << 2) | quant_scale + or value, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh value, 0(output) + +.Ldc_block_y: # if (block_index < Cb) nop + srl length, 4 # dc_length = lengths >> 4 + + # prefix_length = huffman_table->dc_len[dc_length] >> 4 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + srl temp, 4 + + sllv window, window, temp + beqz length, .Ldc_y_zero # if (dc_length) + subu bit_offset, temp + + sll temp, last_y, 2 + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_y_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_y_positive: + addu last_y, value + andi last_y, 0x3ff + +.Ldc_y_zero: + sll temp, last_y, 2 # *output = (last_y << 2) | quant_scale + or temp, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) - # If the code is longer, retrieve the number of leading zeroes from the GTE - # and use it as an index into the jump area. Each block in the area is 8 - # instructions long and handles decoding a specific prefix. - mfc2 $v0, $31 - li $v1, 11 - bgt $v0, $v1, .Lreturn_error # if (prefix > 11) return -1 - sll $v0, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(u32)] - addu $v0, $t9 - jr $v0 +.Lprocess_ac_coefficient: # if (coeff_index) + # Check whether the prefix code is 10 or 11 (i.e. if it starts with 1). If + # not, retrieve the number of leading zeroes from the GTE and use it as an + # index into the jump area. Each block in the area is 8 instructions long + # and handles decoding a specific prefix. + mfc2 temp, C2_LZCR + + bltz window, .Lac_prefix_1 # if (!(window >> 31)) + addiu $at, temp, -11 # if (prefix > 11) return -1 + bgtz $at, .Lreturn_error + sll temp, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(uint32_t)] + addu temp, ac_jump_area + jr temp nop .Lreturn_error: - jr $ra + b .Lreturn li $v0, -1 -.Lac_prefix_11: - # Prefix 11 is followed by a single bit. - srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16) - andi $v0, 2 - addu $v0, $t8 # value = table->lut0[index] - lhu $v0, DECDCTTAB_lut0($v0) - sll $t0, 3 # window <<= 3 - b .Lwrite_value - addiu $t5, -3 # bit_offset -= 3 - #.word 0 +.Lac_prefix_1: # if (window >> 31) + sll window, 1 + bltz window, .Lac_prefix_11 + li temp, 0xfe00 .Lac_prefix_10: # Prefix 10 marks the end of a block. - li $v0, 0xfe00 # value = 0xfe00 - sll $t0, 2 # window <<= 2 - addiu $t5, -2 # bit_offset -= 2 - addiu $t6, -1 # block_index-- - bgez $t6, .Lwrite_value - li $t7, 0 # coeff_index = 0 - b .Lwrite_value - li $t6, 5 # if (block_index < 0) block_index = 5 + # *output = 0xfe00 + # coeff_index = 0 + # if (--block_index < Y3) block_index = Cr + sll window, 1 + addiu bit_offset, -2 + sh temp, 0(output) + + addiu block_index, -1 + bgez block_index, .Lfeed_bitstream + li coeff_index, 0 + b .Lfeed_bitstream + li block_index, 5 + +.Lac_prefix_11: + # Prefix 11 is followed by a single bit. Note that the 10/11 prefix check + # already shifts the window by one bit (without updating the bit offset). + # index = ((window >> (32 - 1 - 1)) & 1) * sizeof(uint16_t) + # *output = huffman_table->ac0[index] + srl value, window, 29 + andi value, 2 + addu value, huffman_table + lhu value, VLC_Table_ac0(value) + sll window, 2 + addiu bit_offset, -3 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_01: # Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a # 3-bit lookup index starting with 0. A 32-bit lookup table is used, # containing both MDEC codes and lengths. - srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32) - andi $v0, 28 - addu $v0, $t8 # value = table->lut2[index] - lw $v0, DECDCTTAB_lut2($v0) - b .Lupdate_window_and_write - srl $v1, $v0, 16 # length = value >> 16 + # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(uint32_t) + # *output = huffman_table->ac2[index] & 0xffff + # length = huffman_table->ac2[index] >> 16 + srl value, window, 25 + andi value, 28 + addu value, huffman_table + lw value, VLC_Table_ac2(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) .word 0, 0 .Lac_prefix_001: # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a # 3-bit lookup index starting with 01/10/11. - srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32) - andi $v0, 252 - addu $v0, $t8 # value = table->lut3[index] - lw $v0, DECDCTTAB_lut3($v0) - b .Lupdate_window_and_write - srl $v1, $v0, 16 # length = value >> 16 + # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(uint32_t) + # *output = huffman_table->ac3[index] & 0xffff + # length = huffman_table->ac3[index] >> 16 + srl value, window, 21 + andi value, 252 + addu value, huffman_table + lw value, VLC_Table_ac3(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) .word 0, 0 .Lac_prefix_0001: # Prefix 0001 is followed by a 3-bit lookup index. - srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16) - andi $v0, 14 - addu $v0, $t8 # value = table->lut4[index] - lhu $v0, DECDCTTAB_lut4($v0) - sll $t0, 7 # window <<= 4 + 3 - b .Lwrite_value - addiu $t5, -7 # bit_offset -= 4 + 3 - .word 0 + # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac4[index] + srl value, window, 24 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac4(value) + sll window, 7 + addiu bit_offset, -7 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00001: # Prefix 00001 is followed by a 3-bit lookup index. - srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16) - andi $v0, 14 - addu $v0, $t8 # value = table->lut5[index] - lhu $v0, DECDCTTAB_lut5($v0) - sll $t0, 8 # window <<= 5 + 3 - b .Lwrite_value - addiu $t5, -8 # bit_offset -= 5 + 3 - .word 0 + # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac5[index] + srl value, window, 23 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac5(value) + sll window, 8 + addiu bit_offset, -8 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000001: # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. - srl $v0, $t0, 10 # value = window >> (32 - 6 - 16) - sll $t0, 22 # window <<= 6 + 16 - b .Lwrite_value - addiu $t5, -22 # bit_offset -= 6 + 16 - .word 0, 0, 0, 0 + # *output = window >> (32 - 6 - 16) + srl value, window, 10 + sll window, 22 + addiu bit_offset, -22 + + b .Lfeed_bitstream + sh value, 0(output) + .word 0, 0, 0 .Lac_prefix_0000001: # Prefix 0000001 is followed by a 4-bit lookup index. - srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16) - andi $v0, 30 - addu $v0, $t8 # value = table->lut7[index] - lhu $v0, DECDCTTAB_lut7($v0) - sll $t0, 11 # window <<= 7 + 4 - b .Lwrite_value - addiu $t5, -11 # bit_offset -= 7 + 4 - .word 0 + # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(uint16_t) + # *output = huffman_table->ac7[index] + srl value, window, 20 + andi value, 30 + addu value, huffman_table + lhu value, VLC_Table_ac7(value) + sll window, 11 + addiu bit_offset, -11 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00000001: # Prefix 00000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut8[index] - lhu $v0, DECDCTTAB_lut8($v0) - sll $t0, 13 # window <<= 8 + 5 - b .Lwrite_value - addiu $t5, -13 # bit_offset -= 8 + 5 - .word 0 + # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac8[index] + srl value, window, 18 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac8(value) + sll window, 13 + addiu bit_offset, -13 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000000001: # Prefix 000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut9[index] - lhu $v0, DECDCTTAB_lut9($v0) - sll $t0, 14 # window <<= 9 + 5 - b .Lwrite_value - addiu $t5, -14 # bit_offset -= 9 + 5 - .word 0 + # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac9[index] + srl value, window, 17 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac9(value) + sll window, 14 + addiu bit_offset, -14 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_0000000001: # Prefix 0000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut10[index] - lhu $v0, DECDCTTAB_lut10($v0) - sll $t0, 15 # window <<= 10 + 5 - b .Lwrite_value - addiu $t5, -15 # bit_offset -= 10 + 5 - .word 0 + # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac10[index] + srl value, window, 16 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac10(value) + sll window, 15 + addiu bit_offset, -15 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00000000001: # Prefix 00000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut11[index] - lhu $v0, DECDCTTAB_lut11($v0) - sll $t0, 16 # window <<= 11 + 5 - b .Lwrite_value - addiu $t5, -16 # bit_offset -= 11 + 5 - .word 0 + # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac11[index] + srl value, window, 15 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac11(value) + sll window, 16 + addiu bit_offset, -16 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000000000001: # Prefix 000000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut12[index] - lhu $v0, DECDCTTAB_lut12($v0) - sll $t0, 17 # window <<= 12 + 5 - b .Lwrite_value - addiu $t5, -17 # bit_offset -= 12 + 5 - .word 0 - -.Lupdate_window_and_write: - sllv $t0, $t0, $v1 # window <<= length - subu $t5, $v1 # bit_offset -= length -.Lwrite_value: - sh $v0, 0($a1) + # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac12[index] + srl value, window, 14 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac12(value) + sll window, 17 + addiu bit_offset, -17 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lupdate_window_ac: + srl length, value, 16 +.Lupdate_window_dc: + sllv window, window, length + subu bit_offset, length + .Lfeed_bitstream: # Update the window. This makes sure the next iteration of the loop will be # able to read up to 32 bits from the bitstream. - bgez $t5, .Lskip_feeding # if (bit_offset < 0) - addiu $a2, -1 # max_size-- - - subu $v0, $0, $t5 # window = next_window << (-bit_offset) - sllv $t0, $t1, $v0 - lw $t1, 0($a3) # next_window = (*input << 16) | (*input >> 16) - addiu $t5, 32 # bit_offset += 32 - srl $v0, $t1, 16 - sll $t1, 16 - or $t1, $v0 - addiu $a3, 4 # input++ + bgez bit_offset, .Lskip_feeding # if (bit_offset < 0) + addiu max_size, -1 + + subu temp, $0, bit_offset # window = next_window << (-bit_offset) + sllv window, next_window, temp + lw next_window, 0(input) # next_window = (*input << 16) | (*input >> 16) + addiu bit_offset, 32 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + addiu input, 4 .Lskip_feeding: - srlv $v0, $t1, $t5 # window |= next_window >> bit_offset - or $t0, $v0 + srlv temp, next_window, bit_offset # window |= next_window >> bit_offset + or window, temp - bnez $a2, .Lprocess_next_code_loop - addiu $a1, 2 # output++ + bnez max_size, .Lprocess_next_code_loop + addiu output, 2 .Lstop_processing: # If remaining = 0, skip flushing the context, pad the output buffer with # end-of-block codes if necessary and return 0. Otherwise flush the context # and return 1. - beqz $t2, .Lpad_output_buffer - nop - - sw $a3, VLC_Context_input($a0) - sw $t0, VLC_Context_window($a0) - sw $t1, VLC_Context_next_window($a0) - sw $t2, VLC_Context_remaining($a0) - sh $t3, VLC_Context_quant_scale($a0) - sb $t4, VLC_Context_is_v3($a0) - sb $t5, VLC_Context_bit_offset($a0) - sb $t6, VLC_Context_block_index($a0) - sb $t7, VLC_Context_coeff_index($a0) - - jr $ra + beqz remaining, .Lpad_output_buffer + li temp, 0xfe00 + + sw input, VLC_Context_input(ctx) + sw window, VLC_Context_window(ctx) + sw next_window, VLC_Context_next_window(ctx) + sw remaining, VLC_Context_remaining(ctx) + sb bit_offset, VLC_Context_bit_offset(ctx) + sb block_index, VLC_Context_block_index(ctx) + sb coeff_index, VLC_Context_coeff_index(ctx) + sh last_y, VLC_Context_last_y(ctx) + sh last_cr, VLC_Context_last_cr(ctx) + sh last_cb, VLC_Context_last_cb(ctx) + + b .Lreturn li $v0, 1 .Lpad_output_buffer: - beqz $a2, .Lreturn_zero - li $v0, 0xfe00 -.Lpad_output_buffer_loop: # while (max_size) - sh $v0, 0($a1) # *output = 0xfe00 - addiu $a2, -1 # max_size-- - bnez $a2, .Lpad_output_buffer_loop - addiu $a1, 2 # output++ + beqz max_size, .Lreturn + li $v0, 0 -.Lreturn_zero: +.Lpad_output_buffer_loop: # while (max_size) + sh temp, 0(output) + addiu max_size, -1 + bnez max_size, .Lpad_output_buffer_loop + addiu output, 2 + +.Lreturn: + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) jr $ra - li $v0, 0 + addiu $sp, 16 diff --git a/libpsn00b/psxpress/vlc2.c b/libpsn00b/psxpress/vlc2.c index 9eb99bf..7d9d9f3 100644 --- a/libpsn00b/psxpress/vlc2.c +++ b/libpsn00b/psxpress/vlc2.c @@ -63,7 +63,7 @@ static const uint32_t _compressed_table[TABLE_LENGTH] = { static VLC_Context _default_context; static size_t _max_buffer_size = 0; -const DECDCTTAB2 *_vlc_huffman_table2 = 0; +const DECDCTTAB *_vlc_huffman_table2 = 0; /* VLC decoder */ @@ -77,14 +77,17 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( VLC_Context *ctx, uint32_t *buf, size_t max_size ) { const uint32_t *input = ctx->input; - uint32_t remaining = ctx->remaining; uint32_t window = ctx->window; uint32_t next_window = ctx->next_window; - uint16_t quant_scale = ctx->quant_scale; + uint32_t remaining = ctx->remaining; + int is_v3 = ctx->is_v3; + int bit_offset = ctx->bit_offset; int block_index = ctx->block_index; int coeff_index = ctx->coeff_index; - int bit_offset = ctx->bit_offset; - int is_v3 = ctx->is_v3; + uint16_t quant_scale = ctx->quant_scale; + int16_t last_y = ctx->last_y; + int16_t last_cr = ctx->last_cr; + int16_t last_cb = ctx->last_cb; //if (!_vlc_huffman_table2) //return -1; @@ -120,25 +123,22 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( *output = (uint16_t) _get_bits_unsigned(22); _advance_window(22); } else if (window >> 24) { - // The first lookup table is for codes that not start with + // The first lookup table is for codes that do not start with // 00000000. - value = _vlc_huffman_table2->lut[_get_bits_unsigned(13)]; + value = _vlc_huffman_table2->ac[_get_bits_unsigned(13)]; _advance_window(value >> 16); *output = (uint16_t) value; } else { // If the code starts with 00000000, use the second lookup // table. - value = _vlc_huffman_table2->lut00[_get_bits_unsigned(17)]; + value = _vlc_huffman_table2->ac00[_get_bits_unsigned(17)]; _advance_window(value >> 16); *output = (uint16_t) value; } } else { - // Parse the DC (first) coefficient for this block. Version 2 - // simply stores the signed 10-bit value as-is, while version 3 - // uses a delta encoding combined with a compression method similar - // to exp-Golomb. + // Parse the DC (first) coefficient for this block. if (is_v3) { - // TODO: version 3 is currently not supported. + // This implementation does not support version 3. return -1; } else { value = _get_bits_unsigned(10); @@ -158,7 +158,7 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( // time and processes each 16-bit word starting from the the MSB, so an // endianness conversion is necessary to preserve bit order when // reading 32 bits at a time. Also note that the PS1 CPU is not capable - // of shifting by more than 31 bits - it will shift by 0 bits instead! + // of shifting by >=31 bits - it will shift by (N % 32) bits instead! if (bit_offset < 0) { window = next_window << (-bit_offset); bit_offset += 32; @@ -176,12 +176,15 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( return 0; ctx->input = input; - ctx->remaining = remaining; ctx->window = window; ctx->next_window = next_window; + ctx->remaining = remaining; + ctx->bit_offset = bit_offset; ctx->block_index = block_index; ctx->coeff_index = coeff_index; - ctx->bit_offset = bit_offset; + ctx->last_y = last_y; + ctx->last_cr = last_cr; + ctx->last_cb = last_cb; return 1; } @@ -197,21 +200,24 @@ int DecDCTvlcStart2( return -1; ctx->input = &input[2]; - ctx->remaining = (header->mdec0_header & 0xffff) * 2; ctx->window = (input[0] << 16) | (input[0] >> 16); ctx->next_window = (input[1] << 16) | (input[1] >> 16); - ctx->quant_scale = (header->quant_scale & 63) << 10; + ctx->remaining = (header->mdec0_header & 0xffff) * 2; + ctx->is_v3 = (header->version >= 3); + ctx->bit_offset = 32; ctx->block_index = 0; ctx->coeff_index = 0; - ctx->bit_offset = 32; - ctx->is_v3 = (header->version == 3); + ctx->quant_scale = (header->quant_scale & 63) << 10; + ctx->last_y = 0; + ctx->last_cr = 0; + ctx->last_cb = 0; return DecDCTvlcContinue2(ctx, buf, max_size); } /* Stateful VLC decoder API (for Sony SDK compatibility) */ -int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table) { +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table) { if (table) _vlc_huffman_table2 = table; @@ -230,7 +236,7 @@ size_t DecDCTvlcSize2(size_t size) { /* Lookup table decompressor */ -void DecDCTvlcBuild(DECDCTTAB2 *table) { +void DecDCTvlcBuild(DECDCTTAB *table) { uint32_t *output = (uint32_t *) table; _vlc_huffman_table2 = table; diff --git a/libpsn00b/psxsio/tty.c b/libpsn00b/psxsio/tty.c index a1b33c8..a88af85 100644 --- a/libpsn00b/psxsio/tty.c +++ b/libpsn00b/psxsio/tty.c @@ -8,7 +8,7 @@ * critical section or even from an interrupt handler. */ -#include <ioctl.h> +#include <sys/ioctl.h> #include <psxapi.h> #include <psxsio.h> #include <hwregs_c.h> @@ -93,8 +93,8 @@ void AddSIO(int baud) { close(0); close(1); - DelDev(_sio_dcb.name); - AddDev(&_sio_dcb); + DelDrv(_sio_dcb.name); + AddDrv(&_sio_dcb); open(_sio_dcb.name, 2); open(_sio_dcb.name, 1); } @@ -102,6 +102,6 @@ void AddSIO(int baud) { void DelSIO(void) { SIO_Quit(); - DelDev(_sio_dcb.name); - AddDummyTty(); + DelDrv(_sio_dcb.name); + add_nullcon_driver(); } diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c index 45654ad..64c6d1b 100644 --- a/libpsn00b/psxspu/common.c +++ b/libpsn00b/psxspu/common.c @@ -1,10 +1,11 @@ /* * PSn00bSDK SPU library (common functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> #include <assert.h> +#include <psxetc.h> #include <psxspu.h> #include <hwregs_c.h> @@ -31,7 +32,7 @@ static void _wait_status(uint16_t mask, uint16_t value) { return; } - _sdk_log("status register timeout (0x%04x)\n", SPU_STAT); + _sdk_log("timeout, status=0x%04x\n", SPU_STAT); } static size_t _dma_transfer(uint32_t *data, size_t length, int write) { @@ -59,13 +60,14 @@ static size_t _dma_transfer(uint32_t *data, size_t length, int write) { SPU_CTRL |= ctrl; _wait_status(0x0030, ctrl); - DMA_MADR(4) = (uint32_t) data; + DMA_MADR(DMA_SPU) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(4) = 0x00010000 | length; + DMA_BCR(DMA_SPU) = 0x00010000 | length; else - DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_SPU) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(4) = 0x01000200 | write; + DMA_CHCR(DMA_SPU) = 0x01000200 | write; return length; } @@ -130,8 +132,8 @@ void SpuInit(void) { SPU_EXT_VOL_L = 0; SPU_EXT_VOL_R = 0; - DMA_DPCR |= 0x000b0000; // Enable DMA4 - DMA_CHCR(4) = 0x00000201; // Stop DMA4 + SetDMAPriority(DMA_SPU, 3); + DMA_CHCR(DMA_SPU) = 0x00000201; // Stop DMA SPU_DMA_CTRL = 0x0004; // Reset transfer mode SPU_CTRL = 0xc001; // Enable SPU, DAC, CD audio, disable DMA request @@ -162,12 +164,18 @@ void SpuInit(void) { } size_t SpuRead(uint32_t *data, size_t size) { + _sdk_validate_args(data && size, 0); + return _dma_transfer(data, size, 0) * 4; } size_t SpuWrite(const uint32_t *data, size_t size) { - if (_transfer_addr < WRITABLE_AREA_ADDR) + _sdk_validate_args(data && size, 0); + + if (_transfer_addr < WRITABLE_AREA_ADDR) { + _sdk_log("ignoring attempt to write to capture buffers at 0x%05x\n", _transfer_addr); return 0; + } // I/O transfer mode is not that useful, but whatever. if (_transfer_mode) @@ -177,6 +185,8 @@ size_t SpuWrite(const uint32_t *data, size_t size) { } size_t SpuWritePartly(const uint32_t *data, size_t size) { + //_sdk_validate_args(data && size, 0); + size_t _size = SpuWrite(data, size); _transfer_addr += (_size + 1) / 2; @@ -188,6 +198,10 @@ SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) { return mode; } +SPU_TransferMode SpuGetTransferMode(void) { + return _transfer_mode; +} + uint32_t SpuSetTransferStartAddr(uint32_t addr) { if (addr > 0x7ffff) return 0; @@ -196,6 +210,10 @@ uint32_t SpuSetTransferStartAddr(uint32_t addr) { return addr; } +uint32_t SpuGetTransferStartAddr(void) { + return _transfer_addr * 8; +} + int SpuIsTransferCompleted(int mode) { if (!mode) return ((SPU_STAT >> 10) & 1) ^ 1; |
