diff options
| author | John "Lameguy" Wilbert Villamor <lameguy64@gmail.com> | 2022-11-03 10:14:22 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-11-03 10:14:22 +0800 |
| commit | 4139331d233b7a962e747c5564fa68a285f81cc8 (patch) | |
| tree | d4d3374afd5e36e8580cc424ab2c63ee9e7d357c /libpsn00b | |
| parent | e08a3d9366f8ca14a76b3dd569dac1fb9f569748 (diff) | |
| parent | 37d963f724113e45d15aa9b8ee86baa9c4362b8f (diff) | |
| download | psn00bsdk-4139331d233b7a962e747c5564fa68a285f81cc8.tar.gz | |
Merge pull request #60 from spicyjpeg/bugfix
Bugfixes, new serial port API and sound examples
Diffstat (limited to 'libpsn00b')
49 files changed, 3389 insertions, 2120 deletions
diff --git a/libpsn00b/CMakeLists.txt b/libpsn00b/CMakeLists.txt index 602b3c8..a6b6df3 100644 --- a/libpsn00b/CMakeLists.txt +++ b/libpsn00b/CMakeLists.txt @@ -48,6 +48,8 @@ foreach(_library IN LISTS PSN00BSDK_LIBRARIES) ${_library} INTERFACE $<$<STREQUAL:$<UPPER_CASE:$<TARGET_PROPERTY:PSN00BSDK_TARGET_TYPE>>,${_type}>:${_name}> ) + + target_compile_definitions(${_name} PRIVATE SDK_LIBRARY_NAME="${_library}") endforeach() endforeach() @@ -102,6 +104,6 @@ install( # once the debug and release builds are merged into the same installation tree. install( EXPORT libpsn00b - DESTINATION ${CMAKE_INSTALL_LIBDIR}/libpsn00b + DESTINATION ${CMAKE_INSTALL_LIBDIR}/libpsn00b/cmake #EXPORT_LINK_INTERFACE_LIBRARIES ) diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake index e78355f..b21451e 100644 --- a/libpsn00b/cmake/internal_setup.cmake +++ b/libpsn00b/cmake/internal_setup.cmake @@ -34,7 +34,7 @@ if(NOT DEFINED PSN00BSDK_VERSION) string(JSON PSN00BSDK_GIT_COMMIT GET ${_json} git_commit) endif() -include(${CMAKE_CURRENT_LIST_DIR}/../libpsn00b.cmake OPTIONAL) +include(${CMAKE_CURRENT_LIST_DIR}/libpsn00b.cmake OPTIONAL) if(TARGET psn00bsdk) link_libraries(psn00bsdk) endif() diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h index 32301e2..12212af 100644 --- a/libpsn00b/include/assert.h +++ b/libpsn00b/include/assert.h @@ -1,20 +1,35 @@ /* - * PSn00bSDK assert macro + * PSn00bSDK assert macro and internal logging * (C) 2022 spicyjpeg - MPL licensed + * + * Note that the _sdk_log() macro is used internally by PSn00bSDK to output + * debug messages and warnings. */ #ifndef __ASSERT_H #define __ASSERT_H +#include <stdio.h> + void _assert_abort(const char *file, int line, const char *expr); #ifdef NDEBUG -#define assert(x) + +#define assert(expr) +#define _sdk_log(fmt, ...) + #else + #define assert(expr) { \ - if (!(expr)) \ - _assert_abort(__FILE__, __LINE__, #expr); \ + if (!(expr)) _assert_abort(__FILE__, __LINE__, #expr); \ } + +#ifdef SDK_LIBRARY_NAME +#define _sdk_log(fmt, ...) printf(SDK_LIBRARY_NAME ": " fmt, ##__VA_ARGS__) +#else +#define _sdk_log(fmt, ...) printf(fmt, ##__VA_ARGS__) +#endif + #endif #endif diff --git a/libpsn00b/include/dlfcn.h b/libpsn00b/include/dlfcn.h index 5848a95..3c5260d 100644 --- a/libpsn00b/include/dlfcn.h +++ b/libpsn00b/include/dlfcn.h @@ -7,38 +7,31 @@ #define __DLFCN_H #include <stdint.h> +#include <stddef.h> #include <elf.h> -/* Helper macro for setting $t9 before calling a function */ +/* Macros */ -#define DL_PRE_CALL(func) { \ - __asm__ volatile("move $t9, %0;" :: "r"(func) : "$t9"); \ -} +/** + * @brief Prepares for a DLL function call. + * + * @details Sets the $t9 register to the specified value (which should be a + * pointer to a DLL function obtained using DL_GetDLLSymbol()). This must be + * done prior to calling a DLL function from the main executable to ensure the + * DLL can correctly invoke the symbol resolver if necessary. + * + * This macro is not required when calling a DLL function from another DLL, as + * GCC will generate code to set $t9 appropriately. + */ +#define DL_PRE_CALL(func) \ + __asm__ volatile("move $t9, %0;" :: "r"(func) : "$t9"); -/* Types */ - -#define RTLD_DEFAULT ((DLL *) 0) - -typedef enum _DL_Error { - RTLD_E_NONE = 0, // No error - RTLD_E_FILE_OPEN = 1, // Unable to find or open file - RTLD_E_FILE_ALLOC = 2, // Unable to allocate buffer to load file into - RTLD_E_FILE_READ = 3, // Failed to read file - RTLD_E_NO_MAP = 4, // No symbol map has been loaded yet - RTLD_E_MAP_ALLOC = 5, // Unable to allocate symbol map structures - RTLD_E_NO_SYMBOLS = 6, // No symbols found in symbol map - RTLD_E_DLL_NULL = 7, // Unable to initialize DLL from null pointer - RTLD_E_DLL_ALLOC = 8, // Unable to allocate DLL metadata structures - RTLD_E_DLL_FORMAT = 9, // Unsupported DLL type or format - RTLD_E_MAP_SYMBOL = 10, // Symbol not found in symbol map - RTLD_E_DLL_SYMBOL = 11, // Symbol not found in DLL - RTLD_E_HASH_LOOKUP = 12 // Hash table lookup failed due to internal error -} DL_Error; +/* Structure and enum definitions */ typedef enum _DL_ResolveMode { - RTLD_LAZY = 1, // Resolve functions when they are first called (default) - RTLD_NOW = 2, // Resolve all symbols immediately on load - RTLD_FREE_ON_DESTROY = 4 // Automatically free DLL buffer when closing DLL + DL_LAZY = 1, // Resolve functions when they are first called (default) + DL_NOW = 2, // Resolve all symbols immediately on load + DL_FREE_ON_DESTROY = 4 // Automatically free DLL buffer when closing DLL } DL_ResolveMode; // Members of this struct should not be accessed directly in most cases, but @@ -55,151 +48,171 @@ typedef struct _DLL { uint16_t got_length; } DLL; -/* API */ +/* Public API */ #ifdef __cplusplus extern "C" { #endif /** - * @brief Reads the symbol table from the provided string buffer (which may or - * may not be null-terminated), parses it and stores the parsed entries into a - * private hash table; the buffer won't be further referenced and can be safely - * deallocated after parsing. Returns the number of entries successfully parsed - * or -1 if an error occurred. + * @brief Creates an empty symbol map in memory. * - * This function expects the string buffer to contain one more lines, each of - * which must follow this format: + * @details Initializes the internal symbol hash table to contain at most the + * given number of symbols. Once this function is called, symbols can be + * registered using DL_AddMapSymbol() and then looked up using + * DL_GetMapSymbol(). The default DLL resolver will search the hash table for + * external symbols required by DLLs. * - * <SYMBOL_NAME> <T|R|D|B> <HEX_ADDRESS> <HEX_SIZE> [DEBUG_INFO...] + * This function is normally not required when loading a map file through + * DL_ParseSymbolMap(), but it can be used alongside DL_AddMapSymbol() to + * implement a custom symbol map parser. * - * The "nm" tool included in the GCC toolchain can be used to generate a map - * file in the appropriate format after building the executable, by using this - * command: + * @param num_entries + * @return 0 or -1 in case of error * - * mipsel-none-elf-nm -f posix -l -n executable.elf >executable.map + * @see DL_AddMapSymbol(), DL_GetMapSymbol() + */ +int DL_InitSymbolMap(int num_entries); + +/** + * @brief Destroys the currently loaded symbol map. * - * @param ptr - * @param size - * @return -1 or number of entries parsed + * @details Frees the internal hash table allocated by DL_InitSymbolMap() or + * DL_ParseSymbolMap(), containing the currently loaded symbol map. Freeing the + * table manually before loading a new symbol map is normally unnecessary as it + * is done automatically, however this function can be useful to recover heap + * space once the map is no longer needed. */ -int32_t DL_ParseSymbolMap(const char *ptr, size_t size); +void DL_UnloadSymbolMap(void); /** - * @brief File wrapper around DL_ParseSymbolMap(). Allocates a temporary buffer - * then loads the specified map file into it (using BIOS APIs) and calls - * DL_ParseSymbolMap() to parse it. The buffer is deallocated immediately after - * parsing. + * @brief Adds a symbol to the currently loaded symbol map. * - * @param filename Must always contain device name, e.g. "cdrom:MODULE.DLL;1" - * @return -1 or number of entries parsed + * @details Registers a new symbol (function or variable) with the given name + * and address, and adds it to the internal hash table. The symbol can then be + * looked up using DL_GetMapSymbol(). The default DLL resolver will search the + * hash table for external symbols required by DLLs. + * + * This function shall only be called after DL_InitSymbolMap() or + * DL_ParseSymbolMap() is called. + * + * @param name + * @param ptr + * + * @see DL_GetMapSymbol() */ -//int32_t DL_LoadSymbolMapFromFile(const char *filename); +void DL_AddMapSymbol(const char *name, void *ptr); /** - * @brief Frees internal buffers containing the currently loaded symbol map. - * This is automatically done before loading a new symbol map so there is no - * need to call this function in most cases, however it can still be useful to - * free up space on the heap once the symbol map is no longer needed. + * @brief Creates a symbol map in memory from a map file in text format. + * + * @details Initializes the internal symbol hash table, then parses entries + * from the provided string buffer (which may or may not be null-terminated) + * and adds each one to the table. The string buffer won't be further + * referenced and can be safely deallocated after parsing. Returns the number + * of entries successfully parsed. + * + * The string buffer shall contain one or more lines, each of which must follow + * this format: + * + * <SYMBOL_NAME> <T|R|D|B> <HEX_ADDRESS> <HEX_SIZE> [...] + * + * The "nm" tool included in the GCC toolchain can be used to generate a map + * file in the appropriate format after building the executable: + * + * mipsel-none-elf-nm -f posix -l -n executable.elf >executable.map + * + * @param ptr + * @param size + * @return Number of entries parsed, -1 in case of failure + * + * @see DL_UnloadSymbolMap(), DL_GetMapSymbol() */ -void DL_UnloadSymbolMap(void); +int DL_ParseSymbolMap(const char *ptr, size_t size); /** - * @brief Queries the currently loaded symbol map for the symbol with the given - * name and returns a pointer to it, which can then be used to directly access - * the symbol. If the symbol can't be found, null is returned instead. + * @brief Gets a pointer to a symbol in the currently loaded map by its name. + * + * @details Queries the currently loaded symbol map for the symbol with the + * given name and returns a pointer to it, which can then be used to directly + * access the symbol. If the symbol can't be found, a null pointer is returned. * * @param name - * @return NULL or pointer to symbol (any type) + * @return NULL or pointer to symbol */ -void *DL_GetSymbolByName(const char *name); +void *DL_GetMapSymbol(const char *name); /** - * @brief Sets a custom function to be called for resolving symbols in DLLs. + * @brief Sets a custom handler for resolving symbols in DLLs. + * + * @details Sets a custom function to be called for resolving symbols in DLLs. * The function will be given a pointer to the current DLL and the unresolved * symbol's name, and should return the address of the symbol in the executable * (the dynamic linker will lock up if it returns null). Passing null instead - * of a function resets the default behavior of calling DL_GetSymbolByName() to + * of a function resets the default behavior of calling DL_GetMapSymbol() to * find the symbol in the currently loaded symbol map. - * + * * @param callback NULL or pointer to callback function + * @return Previously set callback or NULL */ -void DL_SetResolveCallback(void *(*callback)(DLL *, const char *)); +void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)); /** - * @brief Initializes a buffer holding the contents of a dynamically-loaded + * @brief Initializes a DLL structure. + * + * @details Initializes a buffer holding the contents of a dynamically-loaded * library file (compiled with the dll.ld linker script and converted to a raw - * binary) *in-place*. A new DLL struct is allocated to store metadata but, + * binary) *in-place*. Metadata is written to the provided DLL struct but, * unlike DL_ParseSymbolMap(), the DLL's actual code, data and tables are * referenced directly from the provided buffer. The buffer must not be moved * or deallocated, at least not before calling DL_DestroyDLL() on the DLL * struct returned by this function. * * The third argument specifies when symbols in the DLL should be resolved. - * Setting it to RTLD_LAZY defers resolution of undefined functions to when - * they are first called, while RTLD_NOW forces all symbols to be resolved - * immediately. If a custom resolver has been set via DL_SetResolveCallback(), - * it will be called for each symbol to resolve. + * Setting it to DL_LAZY defers resolution of undefined functions to when they + * are first called, while DL_NOW forces all symbols to be resolved + * immediately. Either mode can be OR'd with DL_FREE_ON_DESTROY to + * automatically deallocate the provided buffer when DL_DestroyDLL() is called. * + * If a custom resolver has been set via DL_SetResolveCallback(), it will be + * called for each symbol to resolve. + * + * @param dll * @param ptr * @param size - * @param mode RTLD_LAZY or RTLD_NOW - * @return NULL or pointer to a new DLL struct - */ -DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode); - -/** - * @brief File wrapper around dlinit(). Allocates a new buffer, loads the - * specified file into it (using BIOS APIs) and calls dlinit() on that. When - * calling dlclose() on a DLL loaded from a file, the file buffer is - * automatically destroyed. - * - * @param filename Must always contain device name, e.g. "cdrom:MODULE.DLL;1" - * @param mode RTLD_LAZY or RTLD_NOW + optionally RTLD_FREE_ON_DESTROY - * @return NULL or pointer to a new DLL struct + * @param mode DL_LAZY or DL_NOW, optionally with DL_FREE_ON_DESTROY + * @return Pointer to DLL structure or NULL in case of failure + * + * @see DL_DestroyDLL(), DL_GetDLLSymbol() */ -//DLL *DL_LoadDLLFromFile(const char *filename, DL_ResolveMode mode); +DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode); /** - * @brief Destroys a loaded DLL by calling its global destructors and freeing - * the buffer it's loaded in. Any pointer passed to DL_DestroyDLL() should no - * longer be used after the call. If the DLL was initialized in-place using - * DL_CreateDLL(), DL_DestroyDLL() will only free the buffer initially passed - * to DL_CreateDLL() if RTLD_FREE_ON_DESTROY was used. + * @brief Destroys a DLL structure. + * + * @details Destroys a loaded DLL by calling its global destructors. If the DLL + * was initialized with the DL_FREE_ON_DESTROY flag, the buffer associated with + * the DLL is also deallocated. Note that the DLL structure itself is *not* + * deallocated. * * @param dll */ void DL_DestroyDLL(DLL *dll); /** - * @brief Returns a pointer to the DLL symbol with the given name, or null if - * it can't be found. If null or RTLD_DEFAULT is passed as first argument, the - * executable itself is searched instead using the symbol map (behaving the - * same as DL_GetSymbolByName()). + * @brief Gets a pointer to a symbol in a DLL by its name. + * + * @details Returns a pointer to the DLL symbol with the given name, or null if + * it can't be found. If a null pointer is passed as first argument, the + * executable itself is searched instead using the symbol map (behaving + * identically to DL_GetMapSymbol()). * - * @param dll DLL struct or RTLD_DEFAULT + * @param dll Pointer to DLL structure or NULL * @param name * @return NULL or pointer to symbol (any type) */ void *DL_GetDLLSymbol(const DLL *dll, const char *name); -/** - * @brief Returns a code describing the last error that occurred, or DL_E_NONE - * if no error has occurred since the last call to dlerror() (i.e. calling this - * also resets the internal error flags). - * - * @return NULL or member of DL_Error enum - */ -DL_Error DL_GetLastError(void); - -/* POSIX "compatibility" macros */ - -#define dlinit(ptr, size, mode) DL_CreateDLL(ptr, size, mode) -//#define dlopen(filename, mode) DL_LoadDLLFromFile(filename, mode) -#define dlsym(dll, name) DL_GetDLLSymbol(dll, name) -#define dlclose(dll) DL_DestroyDLL(dll) -#define dlerror() DL_GetLastError() - #ifdef __cplusplus } #endif diff --git a/libpsn00b/include/hwregs_a.inc b/libpsn00b/include/hwregs_a.inc index c78b41a..ca38542 100644 --- a/libpsn00b/include/hwregs_a.inc +++ b/libpsn00b/include/hwregs_a.inc @@ -32,12 +32,18 @@ .set SPU_MASTER_VOL_R, 0x1d82 .set SPU_REVERB_VOL_L, 0x1d84 .set SPU_REVERB_VOL_R, 0x1d86 -.set SPU_KEY_ON, 0x1d88 -.set SPU_KEY_OFF, 0x1d8c -.set SPU_FM_MODE, 0x1d90 -.set SPU_NOISE_MODE, 0x1d94 -.set SPU_REVERB_ON, 0x1d98 -.set SPU_CHAN_STATUS, 0x1d9c +.set SPU_KEY_ON1, 0x1d88 +.set SPU_KEY_ON1, 0x1d8a +.set SPU_KEY_OFF1, 0x1d8c +.set SPU_KEY_OFF2, 0x1d8e +.set SPU_FM_MODE1, 0x1d90 +.set SPU_FM_MODE2, 0x1d92 +.set SPU_NOISE_MODE1, 0x1d94 +.set SPU_NOISE_MODE2, 0x1d96 +.set SPU_REVERB_ON1, 0x1d98 +.set SPU_REVERB_ON2, 0x1d9a +.set SPU_CHAN_STATUS1, 0x1d9c +.set SPU_CHAN_STATUS2, 0x1d9e .set SPU_REVERB_ADDR, 0x1da2 .set SPU_IRQ_ADDR, 0x1da4 @@ -59,8 +65,8 @@ .set SPU_VOICE_VOL_R, 0x02 .set SPU_VOICE_FREQ, 0x04 .set SPU_VOICE_ADDR, 0x06 -.set SPU_VOICE_ADSR_L, 0x08 -.set SPU_VOICE_ADSR_H, 0x0a +.set SPU_VOICE_ADSR1, 0x08 +.set SPU_VOICE_ADSR2, 0x0a .set SPU_VOICE_LOOP, 0x0e ## MDEC diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h index b205b87..0e21922 100644 --- a/libpsn00b/include/hwregs_c.h +++ b/libpsn00b/include/hwregs_c.h @@ -38,12 +38,18 @@ #define SPU_MASTER_VOL_R _MMIO16(IOBASE | 0x1d82) #define SPU_REVERB_VOL_L _MMIO16(IOBASE | 0x1d84) #define SPU_REVERB_VOL_R _MMIO16(IOBASE | 0x1d86) -#define SPU_KEY_ON _MMIO32(IOBASE | 0x1d88) -#define SPU_KEY_OFF _MMIO32(IOBASE | 0x1d8c) -#define SPU_FM_MODE _MMIO32(IOBASE | 0x1d90) -#define SPU_NOISE_MODE _MMIO32(IOBASE | 0x1d94) -#define SPU_REVERB_ON _MMIO32(IOBASE | 0x1d98) -#define SPU_CHAN_STATUS _MMIO32(IOBASE | 0x1d9c) +#define SPU_KEY_ON1 _MMIO16(IOBASE | 0x1d88) +#define SPU_KEY_ON2 _MMIO16(IOBASE | 0x1d8a) +#define SPU_KEY_OFF1 _MMIO16(IOBASE | 0x1d8c) +#define SPU_KEY_OFF2 _MMIO16(IOBASE | 0x1d8e) +#define SPU_FM_MODE1 _MMIO16(IOBASE | 0x1d90) +#define SPU_FM_MODE2 _MMIO16(IOBASE | 0x1d92) +#define SPU_NOISE_MODE1 _MMIO16(IOBASE | 0x1d94) +#define SPU_NOISE_MODE2 _MMIO16(IOBASE | 0x1d96) +#define SPU_REVERB_ON1 _MMIO16(IOBASE | 0x1d98) +#define SPU_REVERB_ON2 _MMIO16(IOBASE | 0x1d9a) +#define SPU_CHAN_STATUS1 _MMIO16(IOBASE | 0x1d9c) +#define SPU_CHAN_STATUS2 _MMIO16(IOBASE | 0x1d9e) #define SPU_REVERB_ADDR _MMIO16(IOBASE | 0x1da2) #define SPU_IRQ_ADDR _MMIO16(IOBASE | 0x1da4) @@ -67,7 +73,8 @@ #define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N)) #define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N)) #define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N)) -#define SPU_CH_ADSR(N) _MMIO32(IOBASE | 0x1c08 + 16 * (N)) +#define SPU_CH_ADSR1(N) _MMIO16(IOBASE | 0x1c08 + 16 * (N)) +#define SPU_CH_ADSR2(N) _MMIO16(IOBASE | 0x1c0a + 16 * (N)) #define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N)) /* MDEC */ diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h index c5eaa59..5facc1c 100644 --- a/libpsn00b/include/inline_c.h +++ b/libpsn00b/include/inline_c.h @@ -3,9 +3,17 @@ * (C) 2019 Lameguy64 * (C) 2021-2022 Soapy (tweaked by spicyjpeg) * - * This header is basically identical to Nugget's inline_n.h. All GTE commands - * can be used right away without having to run DMPSX or any other tool on - * object files. + * This header is basically identical to Nugget's inline_n.h. + */ + +/** + * @file inline_c.h + * @brief Inline GTE macro header + * + * @details This header provides a set of macros for making use of GTE commands + * and registers from C or C++ code. Unlike the official SDK, all commands can + * be used right away without having to run any other post-processing tool on + * compiled object files. */ #ifndef _INLINE_C_H @@ -13,7 +21,11 @@ /* GTE load macros */ -/* Load a SVECTOR (passed as a pointer) to GTE V0 +/** + * @brief Loads a single SVECTOR to GTE vector register V0 + * + * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY0 + * and C2_VZ0. */ #define gte_ldv0( r0 ) __asm__ volatile ( \ "lwc2 $0, 0( %0 );" \ @@ -22,7 +34,11 @@ : "r"( r0 ) \ : "$t0" ) -/* Load a SVECTOR (passed as a pointer) to GTE V1 +/** + * @brief Loads a single SVECTOR to GTE vector register V1 + * + * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY1 + * and C2_VZ1. */ #define gte_ldv1( r0 ) __asm__ volatile ( \ "lwc2 $2, 0( %0 );" \ @@ -31,7 +47,11 @@ : "r"( r0 ) \ : "$t0" ) -/* Load a SVECTOR (passed as a pointer) to GTE V2 +/** + * @brief Loads a single SVECTOR to GTE vector register V2 + * + * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY2 + * and C2_VZ2. */ #define gte_ldv2( r0 ) __asm__ volatile ( \ "lwc2 $4, 0( %0 );" \ @@ -40,7 +60,11 @@ : "r"( r0 ) \ : "$t0" ) -/* Load three SVECTORs (passed as a pointer) to the GTE at once +/** + * @brief Load three SVECTORs to GTE vector registers at once + * + * @details Loads values from three SVECTOR structs to GTE data registers + * C2_VXY0 and C2_VZ0, C2_VXY1 and C2_VZ1, C2_VXY2 and C2_VZ2 at once. */ #define gte_ldv3( r0, r1, r2 ) __asm__ volatile ( \ "lwc2 $0, 0( %0 );" \ @@ -88,6 +112,14 @@ : \ : "r"( r0 ) ) +/** + * @brief Load a CVECTOR to GTE register C2_RGBC + * + * @details Loads a CVECTOR value to GTE data register C2_RGBC. The primitive + * code (the last byte of a CVECTOR) is passed to the color FIFO registers when + * performing lighting compute operations, so it can be stored to the RGBC + * field of a primitive directly without any additional operation required. + */ #define gte_ldrgb( r0 ) __asm__ volatile ( \ "lwc2 $6 , 0( %0 );" \ : \ @@ -224,6 +256,12 @@ : "r"( r0 ) \ : "$12", "$13", "$14" ) +/** + * @brief Loads values to GTE registers C2_IR1-3 + * + * @details Loads three 32-bit values to GTE data registers C2_IR1, C2_IR2 and + * C2_IR3. + */ #define gte_ldopv2( r0 ) __asm__ volatile ( \ "lwc2 $11, 8( %0 );" \ "lwc2 $9 , 0( %0 );" \ @@ -253,6 +291,14 @@ : \ : "r"( r0 ), "r"( r1 ), "r"( r2 ) ) +/** + * @brief Sets an RGB color value to the GTE + * + * @details Sets the specified RGB value to GTE control registers C2_RBK, + * C2_GBK and C2_BBK. This specifies the color value to use when a normal faces + * away from the direction of the light source. This can be considered as the + * ambient light color. + */ #define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \ "sll $t0, %0, 4;" \ "sll $t1, %1, 4;" \ @@ -282,6 +328,13 @@ : "r"( r0 ), "r"( r1 ), "r"( r2 ) \ : "$12", "$13", "$14" ) +/** + * @brief Sets the GTE screen offset + * + * @details Sets the values of the GTE screen offset which is applied to 2D + * projected coordinates when performing perspective transformation. The values + * are set to GTE control registers C2_OFX and C2_OFY. + */ #define gte_SetGeomOffset( r0, r1 ) __asm__ volatile ( \ "sll $t0, %0, 16;" \ "sll $t1, %1, 16;" \ @@ -291,6 +344,13 @@ : "r"( r0 ), "r"( r1 ) \ : "$t0", "$t1" ) +/** + * @brief Sets the distance of the projection plane + * + * @details Sets the specified value to GTE control register C2_H which + * determines the projection plane distance, otherwise known as the field of + * view. + */ #define gte_SetGeomScreen( r0 ) __asm__ volatile ( \ "ctc2 %0, $26;" \ : \ @@ -305,6 +365,12 @@ : "r"( r0 ) \ : "$12", "$13" ) +/** + * @brief Sets a 3x3 rotation matrix portion from a MATRIX to the GTE + * + * @details Sets the 3x3 rotation matrix coordinates from a MATRIX struct to + * GTE control registers C2_R11R12, C2_R13R21, C2_R22R23, C2_R31R32 and C2_R33. + */ #define gte_SetRotMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -329,6 +395,17 @@ : "r"( r0 ) \ : "$12", "$13" ) +/** + * @brief Sets a 3x3 lighting matrix from a MATRIX to the GTE + * + * @details Sets the 3x3 lighting matrix coordinates from a MATRIX struct to + * GTE control registers C2_L11L12, C2_L13L21, C2_L22L23, C2_L31L32 and C2_L33. + * + * The lighting matrix is essentially a triplet of three light direction + * vectors. L11, L12 and L13 represents the X, Y and Z coordinates of light + * source 0 for example. Coordinates must be normalized to ensure correct + * results. + */ #define gte_SetLightMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -353,6 +430,17 @@ : "r"( r0 ) \ : "$12", "$13" ) +/** + * @brief Sets a 3x3 color matrix from a MATRIX to the GTE + * + * @details Sets the 3x3 color matrix values from a MATRIX struct to GTE + * control registers C2_LR1LR2, C2_LR3LG1, C2_LG2LG3, C2_LB1LB2 and C2_LB3. + * + * The light color matrix is essentially a triplet of three RGB colors for each + * of the three light sources. LR1, LG1 and LB1 represents the R, G and B color + * values for light source 0 for example. Values are of range 0 to 4095, higher + * values will be saturated. + */ #define gte_SetColorMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 0( %0 );" \ "lw $t1, 4( %0 );" \ @@ -368,6 +456,12 @@ : "r"( r0 ) \ : "$t2" ) +/** + * @brief Sets the translation portion of a MATRIX to the GTE + * + * @details Sets the translation coordinates from a MATRIX struct to GTE + * control registers C2_TRX, C2_TRY and C2_TRZ respectively. + */ #define gte_SetTransMatrix( r0 ) __asm__ volatile ( \ "lw $t0, 20( %0 );" \ "lw $t1, 24( %0 );" \ @@ -1044,11 +1138,39 @@ /* GTE operation macros */ +/** + * @brief Rotate, Translate and Perspective Single (15 cycles) + * + * @details Performs rotation, translation and perspective calculation of a + * single vertex. Divide overflows are simply saturated allowing for crude Z + * clipping. Check C2_FLAG to determine which overflow error has occurred + * during calculation. + * + * The following equation is performed when executing this GTE command: + * + * IR1 = MAC1 = (TRX*4096 + R11*VX0 + R12*VY0 + R13*VZ0) / 4096 + * IR2 = MAC2 = (TRY*4096 + R21*VX0 + R22*VY0 + R23*VZ0) / 4096 + * IR3 = MAC3 = (TRZ*4096 + R31*VX0 + R32*VY0 + R33*VZ0) / 4096 + * SZ3 = MAC3 + * + * MAC0 = (((H*131072/SZ3)+1)/2) * IR1 + OFX, SX2 = MAC0 / 65536 + * MAC0 = (((H*131072/SZ3)+1)/2) * IR2 + OFY, SY2 = MAC0 / 65536 + * MAC0 = (((H*131072/SZ3)+1)/2) * DQA + DQB, IR0 = MAC0 / 4096 + */ #define gte_rtps() __asm__ volatile ( \ "nop;" \ "nop;" \ "cop2 0x0180001;" ) +/** + * @brief Rotate, Translate and Perspective Triple (23 cycles) + * + * @details Performs rotation, translation and perspective calculation of three + * vertices at once. The equation performed is the same as gte_rtps() only + * repeated three times for each vertex. The result of the first vertex is + * stored in GTE data register C2_SXY0, the second vector in C2_SXY1 then + * C2_SXY2. + */ #define gte_rtpt() __asm__ volatile ( \ "nop;" \ "nop;" \ @@ -1325,16 +1447,53 @@ "nop;" \ "cop2 0x0138041C;" ) +/** + * @brief Normal clipping (8 cycles) + * + * @details Computes the sign of three screen coordinates (C2_SXY0-3) used for + * backface culling. If the value of C2_MAC0 is negative, the coordinates are + * inverted and thus the triangle is back facing. + * + * The following equation is performed when executing this GTE command: + * + * MAC0 = SX0*SY1 + SX1*SY2 + SX2*SY0 - SX0*SY2 - SX1*SY0 - SX2*SY1 + */ #define gte_nclip() __asm__ volatile ( \ "nop;" \ "nop;" \ "cop2 0x01400006;" ) +/** + * @brief Average screen Z result (5 cycles) + * + * @details Averages the values of GTE registers C2_SZ1, C2_SZ2 and C2_SZ3, + * multiplies it by C2_ZSF3 and divides the result by 0x1000 before storing to + * C2_OTZ. Used to compute the ordering table depth level for a three-vertex + * primitive. + * + * The following equation is performed when executing this GTE command: + * + * MAC0 = ZSF3 * (SZ1+SZ2+SZ3) + * OTZ = MAC0 / 4096 + */ #define gte_avsz3() __asm__ volatile ( \ "nop;" \ "nop;" \ "cop2 0x0158002D;" ) +/** + * @brief Average screen Z result (6 cycles) + * + * @details Averages the values of GTE registers C2_SZ1, C2_SZ2, C2_SZ3 and + * C2_SZ4, multiplies it by C2_ZSF4 and divides the result by 0x1000 before + * storing to C2_OTZ. Used to compute the ordering table depth level for a + * four-vertex primitive. + * + * The following equation is performed when executing this GTE command: + * + * MAC0 = ZSF4 * (SZ1+SZ2+SZ3+SZ4) + * OTZ = MAC0 / 4096 + */ #define gte_avsz4() __asm__ volatile ( \ "nop;" \ "nop;" \ diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h index 0460f20..8150703 100644 --- a/libpsn00b/include/psxcd.h +++ b/libpsn00b/include/psxcd.h @@ -3,174 +3,792 @@ * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed */ +/** + * @file psxcd.h + * @brief CD-ROM library header + * + * @details The PSn00bSDK CD-ROM library provides facilities for using the + * CD-ROM hardware of the PS1. Unlike the CD-ROM library of the official SDK, + * psxcd is immune to the 30 file and directory limit and is capable of parsing + * directories containing as many files as the ISO9660 file system can support, + * unless the records are too large to be loaded into memory. However, to + * maintain compatibility with the PS1 BIOS, the root directory must not exceed + * the 30 file limit and the entire disc should contain no more than 45 + * directories total. + * + * Whilst psxcd is not constrained by the 30 file per directory limit, it does + * not support Joliet CD-ROM extensions to support long file names. However, a + * library extension is considered for future development. + */ + #ifndef __PSXCD_H #define __PSXCD_H #include <stdint.h> -/* - * CD-ROM control commands - */ -#define CdlNop 0x01 /* a.k.a. Getstat */ -#define CdlSetloc 0x02 -#define CdlPlay 0x03 -#define CdlForward 0x04 -#define CdlBackward 0x05 -#define CdlReadN 0x06 -#define CdlStandby 0x07 /* a.k.a. MotorOn */ -#define CdlStop 0x08 -#define CdlPause 0x09 -#define CdlInit 0x0A -#define CdlMute 0x0B -#define CdlDemute 0x0C -#define CdlSetfilter 0x0D -#define CdlSetmode 0x0E -#define CdlGetparam 0x0F -#define CdlGetlocL 0x10 -#define CdlGetlocP 0x11 -#define CdlSetsession 0x12 /* ORIGINAL CODE */ -#define CdlGetTN 0x13 -#define CdlGetTD 0x14 -#define CdlSeekL 0x15 -#define CdlSeekP 0x16 -#define CdlTest 0x19 /* ORIGINAL CODE */ -#define CdlReadS 0x1B +/* Enum definitions */ -/* - * CD-ROM status bits - */ -#define CdlStatError 0x01 -#define CdlStatStandby 0x02 -#define CdlStatSeekError 0x04 -#define CdlStatIdError 0x08 /* ORIGINAL CODE */ -#define CdlStatShellOpen 0x10 -#define CdlStatRead 0x20 -#define CdlStatSeek 0x40 -#define CdlStatPlay 0x80 +typedef enum _CdlCommand { + CdlNop = 0x01, + CdlSetloc = 0x02, + CdlPlay = 0x03, + CdlForward = 0x04, + CdlBackward = 0x05, + CdlReadN = 0x06, + CdlStandby = 0x07, + CdlStop = 0x08, + CdlPause = 0x09, + CdlInit = 0x0a, + CdlMute = 0x0b, + CdlDemute = 0x0c, + CdlSetfilter = 0x0d, + CdlSetmode = 0x0e, + CdlGetparam = 0x0f, + CdlGetlocL = 0x10, + CdlGetlocP = 0x11, + CdlSetsession = 0x12, + CdlGetTN = 0x13, + CdlGetTD = 0x14, + CdlSeekL = 0x15, + CdlSeekP = 0x16, + CdlTest = 0x19, + CdlGetID = 0x1a, + CdlReadS = 0x1b, + CdlReset = 0x1c +} CdlCommand; -/* - * CD-ROM mode bits - */ -#define CdlModeDA 0x01 -#define CdlModeAP 0x02 -#define CdlModeRept 0x04 -#define CdlModeSF 0x08 -//#define CdlModeSize0 0x10 -//#define CdlModeSize1 0x20 -#define CdlModeIgnore 0x10 -#define CdlModeSize 0x20 -#define CdlModeRT 0x40 -#define CdlModeSpeed 0x80 +typedef enum _CdlStatFlag { + CdlStatError = 1 << 0, + CdlStatStandby = 1 << 1, + CdlStatSeekError = 1 << 2, + CdlStatIdError = 1 << 3, + CdlStatShellOpen = 1 << 4, + CdlStatRead = 1 << 5, + CdlStatSeek = 1 << 6, + CdlStatPlay = 1 << 7 +} CdlStatFlag; -/* - * CD-ROM interrupt result values +typedef enum _CdlModeFlag { + CdlModeDA = 1 << 0, + CdlModeAP = 1 << 1, + CdlModeRept = 1 << 2, + CdlModeSF = 1 << 3, + //CdlModeSize0 = 1 << 4, + //CdlModeSize1 = 1 << 5, + CdlModeIgnore = 1 << 4, + CdlModeSize = 1 << 5, + CdlModeRT = 1 << 6, + CdlModeSpeed = 1 << 7 +} CdlModeFlag; + +typedef enum _CdlIntrResult { + CdlNoIntr = 0, + CdlDataReady = 1, + CdlComplete = 2, + CdlAcknowledge = 3, + CdlDataEnd = 4, + CdlDiskError = 5 +} CdlIntrResult; + +typedef enum _CdlIsoError { + CdlIsoOkay = 0, + CdlIsoSeekError = 1, + CdlIsoReadError = 2, + CdlIsoInvalidFs = 3, + CdlIsoLidOpen = 4 +} CdlIsoError; + +/** + * @brief Translates a BCD format value to decimal + * + * @details Translates a specified value in BCD format (ie. 32/0x20 = 20) into + * a decimal integer, as the CD-ROM controller returns integer values only in + * BCD format. */ -#define CdlNoIntr 0x00 -#define CdlDataReady 0x01 -#define CdlComplete 0x02 -#define CdlAcknowledge 0x03 -#define CdlDataEnd 0x04 -#define CdlDiskError 0x05 +#define btoi(b) ((b)/16*10+(b)%16) -/* - * CD-ROM file system error codes (original) +/** + * @brief Translates a decimal value to BCD + * + * @details Translates a decimal integer into a BCD format value (ie. + * 20 = 32/0x20), as the CD-ROM controller only accepts values in BCD format. */ -#define CdlIsoOkay 0x00 -#define CdlIsoSeekError 0x01 -#define CdlIsoReadError 0x02 -#define CdlIsoInvalidFs 0x03 -#define CdlIsoLidOpen 0x04 +#define itob(i) ((i)/10*16+(i)%10) -#define btoi(b) ((b)/16*10+(b)%16) /* Convert BCD value to integer */ -#define itob(i) ((i)/10*16+(i)%10) /* Convert integer to BCD value */ +/* Structure and type definitions */ -/* - * CD-ROM disc location struct - */ -typedef struct _CdlLOC -{ - uint8_t minute; - uint8_t second; - uint8_t sector; - uint8_t track; +/** + * @brief CD-ROM positional coordinates + * + * @details This structure is used to specify CD-ROM positional coordinates for + * CdlSetloc, CdlReadN and CdlReadS CD-ROM commands. Use CdIntToPos() to set + * parameters from a logical sector number. + * + * @see CdIntToPos(), CdControl() + */ +typedef struct _CdlLOC { + uint8_t minute; // Minutes (BCD) + uint8_t second; // Seconds (BCD) + uint8_t sector; // Sector or frame (BCD) + uint8_t track; // Track number (not used) } CdlLOC; -/* - * CD-ROM audio attenuation struct (volume) - */ -typedef struct _CdlATV -{ - uint8_t val0; /* L -> SPU L */ - uint8_t val1; /* L -> SPU R */ - uint8_t val2; /* R -> SPU R */ - uint8_t val3; /* R -> SPU L */ +/** + * @brief CD-ROM attenuation parameters + * + * @details This structure specifies parameters for the CD-ROM attenuation. + * Values must be of range 0 to 127. + * + * The CD-ROM attenuation can be used to set the CD-ROM audio output to mono + * (0x40, 0x40, 0x40, 0x40) or reversed stereo (0x00, 0x80, 0x00, 0x80). It can + * also be used to play one of two stereo channels to both speakers. + * + * The CD-ROM attenuation affects CD-DA and CD-XA audio. + * + * @see CdMix() + */ +typedef struct _CdlATV { + uint8_t val0; // CD to SPU L-to-L volume + uint8_t val1; // CD to SPU L-to-R volume + uint8_t val2; // CD to SPU R-to-R volume + uint8_t val3; // CD to SPU R-to-L volume } CdlATV; -/* - * CD-ROM file information struct +/** + * @brief File entry structure + * + * @details Used to store basic information of a file such as logical block + * location and size. Currently, CdSearchFile() is the only function that uses + * this struct but it will be used in directory listing functions that may be + * implemented in the future. + * + * @see CdSearchFile() */ -typedef struct _CdlFILE -{ - CdlLOC pos; - uint32_t size; - char name[16]; +typedef struct _CdlFILE { + CdlLOC pos; // CD-ROM position coordinates of file + uint32_t size; // Size of file in bytes + char name[16]; // File name } CdlFILE; -typedef struct _CdlFILTER -{ - uint8_t file; - uint8_t chan; - uint16_t pad; +/** + * @brief Structure used to set CD-ROM XA filter + * + * @details This structure is used to specify stream filter parameters for + * CD-ROM XA audio streaming using the CdlSetfilter command. This only affects + * CD-ROM XA audio streaming. + * + * CD-ROM XA audio is normally comprised of up to 8 or more ADPCM compressed + * audio streams interleaved into one continuous stream of data. The data + * stream is normally read at 2x speed but only one of eight XA audio streams + * can be played at a time. The XA stream to play is specified by the + * CdlSetfilter command and this struct. + * + * The CD-ROM XA filter can be changed during CD-ROM XA audio playback with + * zero audio interruption. This can be used to achieve dynamic music effects + * by switching to alternate versions of a theme to fit specific scenes + * seamlessly. + * + * @see CdControl() + */ +typedef struct _CdlFILTER { + uint8_t file; // File number to fetch (usually 1) + uint8_t chan; // Channel number (0 through 7) + uint16_t pad; // Padding } CdlFILTER; -/* Directory query context */ -typedef void* CdlDIR; +/** + * @brief CD-ROM directory query context handle + * + * @details Used to store a directory context created by CdOpenDir(). An open + * context can then be used with CdReadDir() and closed with CdCloseDir(). + * + * @see CdOpenDir() + */ +typedef void *CdlDIR; -/* Data callback */ typedef void (*CdlCB)(int, uint8_t *); +/* Public API */ + #ifdef __cplusplus extern "C" { #endif +/** + * @brief Initializes the CD-ROM library + * + * @details Initializes the CD-ROM subsystem which includes hooking the + * required IRQ handler, sets up internal variables of the CD-ROM library and + * attempts to initialize the CD-ROM controller. The mode parameter does + * nothing but may be used in future updates of this library. + * + * This function must be called after ResetGraph and before any other CD-ROM + * library function that interfaces with the CD-ROM controller. This function + * may not be called twice as it may cause instability or would just crash. + * + * @return Always 1. May change in the future. + */ int CdInit(void); +/** + * @brief Translates a logical sector number to CD-ROM positional coordinates + * + * @details This function translates the logical sector number from i to CD-ROM + * positional coordinates stored to a CdlLOC struct specified by p. The + * translation takes the lead-in offset into account so the first logical + * sector begins at 0 and the result will be offset by 150 sectors. + * + * @param i Logical sector number + * @param p Pointer to a CdlLOC structure + * @return Pointer to the specified CdlLOC struct plus 150 sectors. + */ CdlLOC* CdIntToPos(int i, CdlLOC *p); -int CdPosToInt(CdlLOC *p); + +/** + * @brief Translates CD-ROM positional coordinates to a logical sector number + * + * @details Translates the CD-ROM position parameters from a CdlLOC struct + * specified by p to a logical sector number. The translation takes the lead-in + * offset of 150 sectors into account so the logical sector number returned + * would begin at zero. + * + * @param p Pointer to a CdlLOC struct + * @return Logical sector number minus the 150 sector lead-in. + */ +int CdPosToInt(const CdlLOC *p); + +/** + * @brief Gets CD-ROM TOC information + * + * @details Retrieves the track entries from a CD's table of contents (TOC). The + * function can return up to 99 track entries, which is the maximum number of + * audio tracks the CD standard supports. + * + * This function only retrieve the minutes and seconds of an audio track's + * position as the CD-ROM controller only returns the minutes and seconds of a + * track, which may result in the end of the previous track being played + * instead of the intended track to be played. This can be remedied by having a + * 2 second pregap on each audio track on your disc. + * + * @param toc Pointer to an array of CdlLOC entries + * @return Number of tracks on the disc, zero on error. + * + * @see CdControl() + */ int CdGetToc(CdlLOC *toc); +/** + * @brief Issues a control command to the CD-ROM controller + * + * @details Sends a CD-ROM command specified by com to the CD-ROM controller, + * waits for an acknowledge interrupt (very fast) then returns. It will also + * issue parameters from param to the CD-ROM controller if the command accepts + * parameters. Response data from the CD-ROM controller is stored to result on + * commands that produce response data. + * + * Because this function waits for an acknowledge interrupt from the CD-ROM + * controller, this function should not be used in a callback. Instead, use + * CdControlF(). + * + * Commands that are blocking require the use of CdSync() to wait for the + * command to fully complete. + * + * CD-ROM control commands: + * + * | Command | Value | Parameter | Blocking | Description | + * | :------------ | ----: | :-------- | :------- | :----------------------------------------------------------------------------------------------------------------- | + * | CdlNop | 0x01 | | No | Also known as Getstat. Normally used to query the CD-ROM status, which is retrieved using CdStatus(). | + * | CdlSetloc | 0x02 | CdlLOC | No | Sets the seek target location, but does not seek. Actual seeking begins upon issuing a seek or read command. | + * | CdlPlay | 0x03 | uint8_t | No | Begins CD Audio playback. Parameter specifies an optional track number to play (some emulators do not support it). | + * | CdlForward | 0x04 | | No | Fast forward (CD Audio only), issue CdlPlay to stop fast forward. | + * | CdlBackward | 0x05 | | No | Rewind (CD Audio only), issue CdlPlay to stop rewind. | + * | CdlReadN | 0x06 | CdlLOC | No | Begin reading data sectors. Used in conjunction with CdReadCallback(). | + * | CdlStandby | 0x07 | | Yes | Also known as MotorOn, starts CD motor and remains idle. | + * | CdlStop | 0x08 | | Yes | Stops playback and the disc itself. | + * | CdlPause | 0x09 | | Yes | Stops playback or data reading, but leaves the disc on standby. | + * | CdlInit | 0x0A | | Yes | Initialize the CD-ROM controller. | + * | CdlMute | 0x0B | | No | Mutes CD audio (both DA and XA). | + * | CdlDemute | 0x0C | | No | Unmutes CD audio (both DA and XA). | + * | CdlSetfilter | 0x0D | CdlFILTER | No | Set XA audio filter. | + * | CdlSetmode | 0x0E | uint8_t | No | Set CD-ROM mode. | + * | CdlGetparam | 0x0F | | No | Returns current CD-ROM mode and file/channel filter settings. | + * | CdlGetlocL | 0x10 | | No | Returns current logical CD position, mode and XA filter parameters. | + * | CdlGetlocP | 0x11 | | No | Returns current physical CD position (using SubQ location data). | + * | CdlSetsession | 0x12 | uint8_t | Yes | Seek to specified session on a multi-session disc. | + * | CdlGetTN | 0x13 | | No | Get CD-ROM track count. | + * | CdlGetTD | 0x14 | uint8_t | No | Get specified track position. | + * | CdlSeekL | 0x15 | | Yes | Logical seek to target position, set by last CdlSetloc command. | + * | CdlSeekP | 0x16 | | Yes | Physical seek to target position, set by last CdlSetloc command. | + * | CdlTest | 0x19 | (varies) | Yes | Special test command not disclosed to official developers (see nocash documents for more info). | + * | CdlReadS | 0x1B | CdlLOC | No | Begin reading data sectors without pausing for error correction. | + * + * CD-ROM return values: + * + * | Command | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | :---------- | :---- | :---- | :----- | :--- | :------ | :------ | :--- | :----- | + * | CdlGetparam | stat | mode | 0 | file | channel | | | | + * | CdlGetlocL | amin | asec | aframe | mode | file | channel | sm | ci | + * | CdlGetlocP | track | index | min | sec | frame | amin | asec | aframe | + * | CdlGetTN | stat | first | last | | | | | | + * | CdlGetTD | stat | min | sec | | | | | | + * + * NOTE: Values are in BCD format. + * + * @param com Command value + * @param param Command parameters + * @param result Pointer of buffer to store result + * @return 1 if the command was issued successfully. Otherwise 0 if a + * previously issued command has not yet finished processing. + * + * @see CdSync(), CdControlF() + */ int CdControl(uint8_t com, const void *param, uint8_t *result); + +/** + * @brief Issues a CD-ROM command to the CD-ROM controller (blocking) + * + * @details This function works just like CdControl(), but blocks on blocking + * commands until said blocking command has completed. + * + * Because this function waits for an acknowledge interrupt from the CD-ROM + * controller, this function should not be used in a callback. Use CdControlF() + * instead. + * + * @param com Command value + * @param param Command parameters + * @param result Pointer of buffer to store result + * @return 1 if the command was issued successfully. Otherwise 0 if a + * previously issued command has not yet finished processing. + * + * @see CdControl(), CdControlF() + */ int CdControlB(uint8_t com, const void *param, uint8_t *result); + +/** + * @brief Issues a CD-ROM command to the CD-ROM controller (does not block) + * + * @details This function works more or less the same as CdControl() but it + * does not block even for the acknowledge interrupt from the CD-ROM + * controller. Since this function is non-blocking it can be used in a callback + * function. + * + * When using this function in a callback, a maximum of two commands can be + * issued at once and only the first command can have parameters. This is + * because the CD-ROM controller can only queue up to two commands and the + * parameter FIFO is not cleared until the last command is acknowledged. But + * waiting for acknowledgment in a callback is not possible. + * + * @param com Command value + * @param param Command parameters + * @return 1 if the command was issued successfully. Otherwise 0 if a + * previously issued command has not yet finished processing. + * + * @see CdControl() + */ int CdControlF(uint8_t com, const void *param); + +/** + * @brief Waits for blocking command or blocking status + * + * @details If mode is zero the function blocks if a blocking command was + * issued earlier until the command has finished. If mode is non-zero the + * function returns a command status value. + * + * A buffer specified by result will be set with the most recent CD-ROM status + * value from the last command issued. + * + * @param mode Mode + * @param result Pointer to store most recent CD-ROM status + * @return Command status is returned as one of the following definitions: + * + * | Definition | Description | + * | :----------- | :-------------------------- | + * | CdlComplete | Command completed. | + * | CdlNoIntr | No interrupt, command busy. | + * | CdlDiskError | CD-ROM error occurred. | + * + * @see CdControl() + */ int CdSync(int mode, uint8_t *result); + +/** + * @brief Sets a callback function + * + * @details Sets a callback with the specified function func. The callback is + * executed whenever a blocking command has completed. + * + * status is the CD-ROM status from the command that has completed processing. + * *result corresponds to the *result parameter on CdControl()/CdControlB() and + * returns the pointer to the buffer last set with that function. + * + * @param func Callback function + * @return Pointer to last callback function set, or NULL if none was set. + * + * @see CdControl, CdControlB, CdSync + */ uint32_t CdSyncCallback(CdlCB func); +/** + * @brief Sets a callback function + * + * @details Sets a callback with the specified function func. The callback is + * executed whenever there's an incoming data sector from the CD-ROM controller + * during CdlReadN or CdlReadS. The pending sector data can be retrieved using + * CdGetSector(). + * + * status is the CD-ROM status code from the last CD command that has finished + * processing. *result corresponds to the result pointer that was passed by the + * last CdControl()/CdControlB() call. + * + * This callback cannot be used in conjunction with CdRead() because it also + * uses this callback hook for its own internal use. The previously set + * callback is restored after read completion however. + * + * @param func Callback function + * @return Pointer to last callback function set, or NULL if none was set. + * + * @see CdControl(), CdControlB(), CdGetSector() + */ int CdReadyCallback(CdlCB func); + +/** + * @brief Gets data from the CD-ROM sector buffered + * + * @details Reads sector data that is pending in the CD-ROM sector buffer and + * stores it to *madr. Uses DMA to transfer the sector data and blocks very + * briefly until said transfer completes. + * + * This function is intended to be called within a callback routine set using + * CdReadyCallback() to fetch read data sectors from the CD-ROM sector buffer. + * + * @param madr Pointer to memory buffer to store sector data + * @param size Number of 32-bit words to retrieve + * @return Always 1. + * + * @see CdReadyCallback() + */ int CdGetSector(void *madr, int size); + +/** + * @brief Gets data from the CD-ROM sector buffered (non-blocking) + * + * @details Reads sector data that is pending in the CD-ROM sector buffer and + * stores it to *madr. Uses DMA to transfer the sector data in the background + * while keeping the CPU running (one word is transferred every 16 CPU cycles). + * Note this is much slower than the blocking transfer performed by + * CdGetSector(). + * + * This function is intended to be called within a callback routine set using + * CdReadyCallback() to fetch read data sectors from the CD-ROM sector buffer. + * Since the transfer is asynchronous, CdDataSync() should be used to wait + * until the whole sector has been read. + * + * @param madr Pointer to memory buffer to store sector data + * @param size Number of 32-bit words to retrieve + * @return Always 1. + * + * @see CdReadyCallback(), CdDataSync() + */ int CdGetSector2(void *madr, int size); + +/** + * @brief Waits for sector transfer to finish + * + * @details If mode is zero the function blocks until any sector DMA transfer + * initiated by calling CdGetSector2() has finished. If mode is non-zero the + * function returns a boolean value representing whether a transfer is + * currently in progress. + * + * @param mode Mode + * @return 0 if the transfer has finished, 1 if it is still in progress or -1 + * in case of a timeout. + * + * @see CdGetSector2() + */ int CdDataSync(int mode); +/** + * @brief Locates a file in the CD-ROM file system + * + * @details Searches a file specified by filename by path and name in the + * CD-ROM file system and returns information of the file if found. The file + * information acquired will be stored to loc. + * + * Directories can be separated with slashes (/) or backslashes (\), a leading + * slash or backslash is optional but paths must be absolute. File version + * identifier (;1) at the end of the file name is also optional. File and + * directory names are case insensitive. + * + * The ISO9660 file system routines of libpsxcd do not support long file names + * currently. Only MS-DOS style 8.3 file names are supported; extensions such + * as Joliet and Rock Ridge are ignored. + * + * Upon calling this function for the first time, the ISO descriptor of the + * disc is read and the whole path table is cached into memory. Next the + * directory descriptor of the particular directory specified is loaded and + * cached to locate the file specified. The directory descriptor is kept in + * memory as long as the consecutive files to be searched are stored in the + * same directory until a file in another directory is to be searched. On which + * the directory descriptor is unloaded and a new directory descriptor is read + * from the disc and cached. Therefore, locating files in the same directory is + * faster as the relevant directory descriptor is already in memory and no disc + * reads are issued. + * + * As of Revision 66 of PSn00bSDK, media change is detected by checking the + * CD-ROM lid open status bit and attempting to acknowledge it with a CdlNop + * command, to discriminate the status from an open lid or changed disc. + * + * @param loc Pointer to a CdlFILE struct to store file information + * @param filename Path and name of file to locate + * @return Pointer to the specified CdlFILE struct. Otherwise NULL is returned + * when the file is not found. + */ CdlFILE* CdSearchFile(CdlFILE *loc, const char *filename); +/** + * @brief Reads sectors from the CD-ROM + * + * @details Reads a number sectors specified by sectors from the location set + * by the last CdlSetloc command, the read sectors are then stored to a buffer + * specified by buf. mode specifies the CD-ROM mode to use for the read + * operation. + * + * The size of the sector varies depending on the sector read mode specified by + * mode. For standard data sectors it is multiples of 2048 bytes. If + * CdlModeSize0 is specified the sector size is 2328 bytes which includes the + * whole sector minus sync, adress, mode and sub header bytes. CdlModeSize1 + * makes the sector size 2340 which is the entire sector minus sync bytes. + * Ideally, CdlModeSpeed must be specified to read data sectors at double + * CD-ROM speed. + * + * This function blocks very briefly to issue the necessary commands to start + * CD-ROM reading. To determine if reading has completed use CdReadSync or + * CdReadCallback. + * + * @param sectors Number of sectors to read + * @param buf Pointer to buffer to store sectors read + * @param mode CD-ROM mode for reading + * @return Always returns 0 even on errors. This may change in future versions. + * + * @see CdReadSync(), CdReadCallback() + */ int CdRead(int sectors, uint32_t *buf, int mode); + +/** + * @brief Waits for CD-ROM read completion or returns read status + * + * @details This function works more or less like CdSync() but for CdRead(). If + * mode is zero the function blocks if CdRead() was issued earlier until + * reading has completed. If mode is non-zero the function completes + * immediately and returns number of sectors remaining. + * + * A buffer specified by result will be set with the most recent CD-ROM status + * value from the last read issued. + * + * @param mode Mode + * @param result Pointer to store most recent CD-ROM status + * @return Number of sectors remaining. If reading is completed, 0 is returned. + * On error, -1 is returned. + * + * @see CdRead() + */ int CdReadSync(int mode, uint8_t *result); + +/** + * @brief Sets a callback function for read completion + * + * @details Works much the same as CdSyncCallback() but for CdRead(). Sets a + * callback with the specified function func. The callback is executed whenever + * a read operation initiated by CdRead() has completed. + * + * status is the CD-ROM status from the command that has completed processing. + * *result points to a read result buffer. + * + * @param func Callback function + * @return Pointer to last callback function set, or NULL if none was set. + * + * @see CdRead() + */ uint32_t CdReadCallback(CdlCB func); +/** + * @brief Gets the most recent CD-ROM status + * + * @details Returns the CD-ROM status since the last command issued. The status + * value is updated by most CD-ROM commands. + * + * To get the current CD-ROM status you can issue CdlNop commands at regular + * intervals to update the CD-ROM status this function returns. + * + * @return CD-ROM status from last comand issued. + * + * @see CdControl() + */ int CdStatus(void); + +/** + * @brief Gets the last CD-ROM mode + * + * @details Returns the CD-ROM mode last set when issuing a CdlSetmode command. + * The function returns instantly as it merely returns a value stored in an + * internal variable. + * + * Since the value is simply a copy of what was specified from the last + * CdlSetmode command, the mode value may become inaccurate if CdlInit or other + * commands that affect the CD-ROM mode have been issued previously. + * + * @return Last CD-ROM mode value. + */ int CdMode(void); -int CdMix(CdlATV *vol); +/** + * @brief Sets CD-ROM mixer or attenuation + * + * @details Sets the CD-ROM attenuation parameters from a CdlATV struct + * specified by vol. The CD-ROM attenuation settings are different from the SPU + * CD-ROM volume. + * + * Normally used to configure CD and XA audio playback for mono or reverse + * stereo output, though this was rarely used in practice. + * + * @param vol CD-ROM attenuation parameters + * @return Always 1. + */ +int CdMix(const CdlATV *vol); -/* ORIGINAL CODE */ +/** + * @brief Opens a directory on the CD-ROM file system + * + * @details Opens a directory on the CD-ROM file system to read the contents of + * a directory. + * + * A path name can use a slash (/) or backslash character (\) as the directory + * name separator. The path must be absolute and should begin with a slash or + * backslash. It should also not be prefixed with a device name (ie. + * \MYDIR1\MYDIR2 will work but not cdrom:\MYDIR1\MYDIR2). The file system + * routines in libpsxcd can query directory paths of up to 128 characters. + * + * The ISO9660 file system routines of libpsxcd do not support long file names + * currently. Only MS-DOS style 8.3 file names are supported; extensions such + * as Joliet and Rock Ridge are ignored. + * + * @param path Directory path to open + * @return Pointer of a CdlDIR context, NULL if an error occurred. + * + * @see CdReadDir(), CdCloseDir() + */ CdlDIR* CdOpenDir(const char* path); + +/** + * @brief Reads a directory entry from an open directory context + * + * @details Retrieves a file entry from an open directory context and stores it + * to a CdlFILE struct specified by file. Repeated calls of this function + * retrieves the next directory entry available until there are no more + * directory entries that follow. + * + * @param dir Open directory context (from CdOpenDir()) + * @param file Pointer to a CdlFILE struct + * @return 1 if there are proceeding directory entries that follow, otherwise 0. + * + * @see CdOpenDir() + */ int CdReadDir(CdlDIR* dir, CdlFILE* file); + +/** + * @brief Closes a directory context created by CdOpenDir() + * + * @details Closes a directory query context created by CdOpenDir(). Behavior + * is undefined when closing a previously closed directory context. + * + * @param dir Directory context + * + * @see CdOpenDir() + */ void CdCloseDir(CdlDIR* dir); int CdGetVolumeLabel(char* label); +/** + * @brief Sets a callback function for auto pause + * + * @details The callback function specified in *func is executed when an auto + * pause interrupt occurs when the current CD-ROM mode is set with CdlModeAP. + * Auto pause interrupt occurs when CD Audio playback reaches the end of the + * audio track. Specifying 0 disables the callback. + * + * This can be used to easily loop CD audio automatically without requiring any + * intervention in your software loop. + * + * @param func Callback function + * @return Pointer to the last callback function set. Zero if no callback was + * set previously. + * + * @see CdControl() + */ int* CdAutoPauseCallback(void(*func)()); -int CdIsoError(); +/** + * @brief Retrieves CD-ROM ISO9660 parser status + * + * @details Returns the status of the file system parser from the last call of + * a file system related function, such as CdSearchFile(), CdGetVolumeLabel() + * and CdOpenDir(). Use this function to retrieve the exact error occurred when + * either of those functions fail. + * + * @return CD-ROM ISO9660 parser error code, as listed below: + * + * | Value | Description | + * | :-------------- | :-------------------------------------------------------------------------------------------------- | + * | CdlIsoOkay | File system parser okay. | + * | CdlIsoSeekError | Logical seek error occurred. May occur when attempting to query the filesystem on an audio-only CD. | + * | CdlIsoReadError | Read error occurred while reading the CD-ROM file system descriptor. | + * | CdlIsoInvalidFs | Disc does not contain a standard ISO9660 file system. | + * | CdlIsoLidOpen | Lid is open when attempting to parse the CD-ROM file system. | + */ +int CdIsoError(void); + +/** + * @brief Locates and parses the specified disc session + * + * @details Loads a session specified by session on a multi-session disc. Uses + * CdlSetsession to seek to the specified disc session, then scans the + * following 512 sectors for an ISO volume descriptor. If a volume descriptor + * is found the file system of that session is parsed and files inside the new + * session can be accessed using regular CD-ROM file and directory querying + * functions (CdSearchFile(), CdOpenDir(), CdReadDir(), CdCloseDir()). No + * special consideration is required when reading files from a new session. + * + * Loading a session takes 5-10 seconds to complete depending on the distance + * between the beginning of the disc and the start of the specified session. If + * the session specified does not exist, the disc will stop and would take + * 15-20 seconds to restart. The function does not support loading the most + * recent session of a disc automatically due to limitations of the CD-ROM + * hardware, so the user must be prompted to specify which session to load and + * to keep a record of the number of sessions that have been written to the + * disc. + * + * This function can also be used to update the Table of Contents (TOC) and + * reparse the file system regardless of the media change status by simply + * loading the first session. This is most useful for accessing files or audio + * tracks on a disc that was inserted using the swap trick method (it is + * recommended to stop the disc using CdlStop then restart it with CdlStandby + * after a button prompt for convenience, if you wish to implement this + * capability). Seeking to sessions other than the first session does not work + * with the swap trick however, so a chipped or unlockable console is desired + * for reading multi-session discs. + * + * NOTE: When the lid has been opened, the current CD-ROM session is reset to + * the first session on the disc. The console may produce an audible click + * sound when executing this function. This is normal, and the click sound is + * no different to the click heard on disc spin-up in older models of the + * console. + * + * @param session Session number (1 = first session) + * @return 0 on success. On failure due to open lid, bad session number or no + * volume descriptor found in specified session, returns -1 and return value of + * CdIsoError() is updated. + */ int CdLoadSession(int session); #ifdef __cplusplus diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h index fcfec06..ae4611e 100644 --- a/libpsn00b/include/psxetc.h +++ b/libpsn00b/include/psxetc.h @@ -3,20 +3,45 @@ * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed */ +/** + * @file psxetc.h + * @brief Interrupt management library header + * + * @details This library provides basic facilities (such as interrupt handling) + * used by all other PSn00bSDK libraries, as well as some additional + * functionality including a dynamic linker (whose API is however defined in a + * separate header). + */ + #ifndef __PSXETC_H #define __PSXETC_H -/* Macros */ +/* IRQ and DMA channel definitions */ -// This macro is used internally by PSn00bSDK to log debug messages to a buffer -// which is then printed to stdout when calling VSync(). -#ifdef NDEBUG -#define _sdk_log(...) -#define _sdk_dump_log() -#else -#define _sdk_log(...) _sdk_log_inner(__VA_ARGS__) -#define _sdk_dump_log() _sdk_dump_log_inner() -#endif +typedef enum _IRQ_Channel { + IRQ_VBLANK = 0, + IRQ_GPU = 1, + IRQ_CD = 2, + IRQ_DMA = 3, + IRQ_TIMER0 = 4, + IRQ_TIMER1 = 5, + IRQ_TIMER2 = 6, + IRQ_SIO0 = 7, + IRQ_SIO1 = 8, + IRQ_SPU = 9, + IRQ_GUN = 10, + IRQ_PIO = 10 +} IRQ_Channel; + +typedef enum _DMA_Channel { + DMA_MDEC_IN = 0, + DMA_MDEC_OUT = 1, + DMA_GPU = 2, + DMA_CD = 3, + DMA_SPU = 4, + DMA_PIO = 5, + DMA_OTC = 6 +} DMA_Channel; /* Public API */ @@ -24,16 +49,155 @@ extern "C" { #endif -void _sdk_log_inner(const char *fmt, ...); -void _sdk_dump_log_inner(void); +/** + * @brief Sets a callback for an interrupt. + * + * @details Registers a function to be called whenever the specified interrupt + * is fired. A previously registered callback can be removed by passing a null + * pointer instead. The IRQ controller is automatically configured to only + * enable interrupts for which a callback is registered. + * + * The callback will run in the exception handler's context, so it should be as + * fast as possible and shall not call any function that relies on interrupts + * being enabled. Each interrupt is acknowledged automatically before the + * callback is invoked. + * + * The following interrupt channels are available (the ones already used + * internally by libraries shall not be overridden to avoid conflicts): + * + * | ID | Channel | Used by | + * | --: | :--------------- | :-------------------------------------- | + * | 0 | IRQ_VBLANK | psxgpu (use VSyncCallback() instead) | + * | 1 | IRQ_GPU | | + * | 2 | IRQ_CD | psxcd (use CdReadyCallback() instead) | + * | 3 | IRQ_DMA | psxetc (use DMACallback() instead) | + * | 4 | IRQ_TIMER0 | | + * | 5 | IRQ_TIMER1 | | + * | 6 | IRQ_TIMER2 | | + * | 7 | IRQ_SIO0 | | + * | 8 | IRQ_SIO1 | psxsio (use SIO_ReadCallback() instead) | + * | 9 | IRQ_SPU | | + * | 10 | IRQ_GUN, IRQ_PIO | | + * + * WARNING: even though interrupts are acknowledged automatically at the IRQ + * controller side, most IRQ channels (1, 2, 3, 7, 8, 9) additionally require + * acknowledging at the device side, which must be done by the callback. The + * exact way to acknowledge interrupts varies for each device, however it + * usually involves setting or clearing a bit in a register. See the nocash + * documentation for more details. + * + * @param irq + * @param func + * @return Previously set callback for the channel or NULL + */ +void *InterruptCallback(IRQ_Channel irq, void (*func)(void)); + +/** + * @brief Gets the callback for an interrupt. + * + * @details Returns a pointer to the callback currently registered to handle + * the specified interrupt, or a null pointer if none is set. + * + * @param irq + * @return Currently set callback for the channel or NULL + * + * @see InterruptCallback() + */ +void *GetInterruptCallback(IRQ_Channel irq); -void *InterruptCallback(int irq, void (*func)(void)); -void *GetInterruptCallback(int irq); -void *DMACallback(int dma, void (*func)(void)); -void *GetDMACallback(int dma); +/** + * @brief Sets a callback for a DMA interrupt. + * + * @details Registers a function to be called whenever the specified DMA + * channel goes from busy to idle, i.e. when a transfer is completed. A + * previously registered callback can be removed by passing a null pointer + * instead. The DMA controller is automatically configured to only enable DMA + * interrupts for which a callback is registered. + * + * This function uses InterruptCallback() to register a "master handler" for + * DMA interrupts, which then dispatches the IRQ to depending on the channel + * that triggered it. + * + * The callback will run in the exception handler's context, so it should be as + * fast as possible and shall not call any function that relies on interrupts + * being enabled. Each interrupt is acknowledged automatically before the + * callback is invoked. + * + * The following DMA channels are available (the ones already used internally + * by libraries shall not be overridden to avoid conflicts): + * + * | ID | Channel | Used by | + * | --: | :----------- | :-------------------------------------- | + * | 0 | DMA_MDEC_IN | | + * | 1 | DMA_MDEC_OUT | | + * | 2 | DMA_GPU | psxgpu (use DrawSyncCallback() instead) | + * | 3 | DMA_CD | | + * | 4 | DMA_SPU | | + * | 5 | DMA_PIO | | + * | 6 | DMA_OTC | | + * + * @param dma + * @param func + * @return Previously set callback for the channel or NULL + */ +void *DMACallback(DMA_Channel dma, void (*func)(void)); +/** + * @brief Gets the callback for a DMA interrupt. + * + * @details Returns a pointer to the callback currently registered to handle + * the specified DMA interrupt, or a null pointer if none is set. + * + * @param dma + * @return Currently set callback for the channel or NULL + * + * @see DMACallback() + */ +void *GetDMACallback(DMA_Channel dma); + +/** + * @brief Initializes the interrupt dispatcher. + * + * @details Sets up the interrupt handling system, hooks the BIOS to dispatch + * interrupts to the library and clears all registered callbacks. This function + * must be called once at the beginning of the program, prior to registering + * any IRQ or DMA callbacks. + * + * ResetCallback() is called by psxgpu's ResetGraph(), so invoking it manually + * is usually not required. Calling ResetCallback() after ResetGraph() will + * actually result in improper initialization, as ResetGraph() registers + * several callbacks used internally by psxgpu. + * + * @return 0 or -1 if the was already initialized + */ int ResetCallback(void); + +/** + * @brief Restores the interrupt dispatcher. + * + * @details Restores the IRQ and DMA controller state saved by StopCallback() + * and reinstalls BIOS hooks for interrupt dispatching. All callbacks + * previously set before StopCallback() was called are preserved. + * + * @see StopCallback() + */ void RestartCallback(void); + +/** + * @brief Temporarily disables the interrupt dispatcher. + * + * @details Saves the state of the IRQ and DMA controllers, then disables them + * and removes BIOS hooks. This function must be called prior to launching a + * new executable or DLL that calls ResetCallback() or ResetGraph(), or an + * executable not built with PSn00bSDK that uses its own interrupt handling + * subsystem (such as a retail game). The saved state can be restored after the + * executable returns using RestartCallback(). + * + * Note that interrupts are (obviously) disabled until RestartCallback() is + * called. + * + * @see RestartCallback() + */ void StopCallback(void); #ifdef __cplusplus diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index f2568b0..68e3bff 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -31,65 +31,65 @@ typedef enum _GPU_VideoMode { /* Structure macros */ #define setVector(v, _x, _y, _z) \ - (v)->vx = _x, (v)->vy = _y, (v)->vz = _z + (v)->vx = (_x), (v)->vy = (_y), (v)->vz = (_z) #define setRECT(v, _x, _y, _w, _h) \ - (v)->x = _x, (v)->y = _y, (v)->w = _w, (v)->h = _h + (v)->x = (_x), (v)->y = (_y), (v)->w = (_w), (v)->h = (_h) #define setTPage(p, tp, abr, x, y) ((p)->tpage = getTPage(tp, abr, x, y)) #define setClut(p, x, y) ((p)->clut = getClut(x, y)) -#define setRGB0(p, r, g, b) ((p)->r0 = r, (p)->g0 = g, (p)->b0 = b) -#define setRGB1(p, r, g, b) ((p)->r1 = r, (p)->g1 = g, (p)->b1 = b) -#define setRGB2(p, r, g, b) ((p)->r2 = r, (p)->g2 = g, (p)->b2 = b) -#define setRGB3(p, r, g, b) ((p)->r3 = r, (p)->g3 = g, (p)->b3 = b) +#define setRGB0(p, r, g, b) ((p)->r0 = (r), (p)->g0 = (g), (p)->b0 = (b)) +#define setRGB1(p, r, g, b) ((p)->r1 = (r), (p)->g1 = (g), (p)->b1 = (b)) +#define setRGB2(p, r, g, b) ((p)->r2 = (r), (p)->g2 = (g), (p)->b2 = (b)) +#define setRGB3(p, r, g, b) ((p)->r3 = (r), (p)->g3 = (g), (p)->b3 = (b)) #define setXY0(p, _x0, _y0) \ - (p)->x0 = _x0, (p)->y0 = _y0 + (p)->x0 = (_x0), (p)->y0 = (_y0) #define setXY2(p, _x0, _y0, _x1, _y1) \ - (p)->x0 = _x0, (p)->y0 = _y0, \ - (p)->x1 = _x1, (p)->y1 = _y1 + (p)->x0 = (_x0), (p)->y0 = (_y0), \ + (p)->x1 = (_x1), (p)->y1 = (_y1) #define setXY3(p, _x0, _y0, _x1, _y1, _x2, _y2) \ - (p)->x0 = _x0, (p)->y0 = _y0, \ - (p)->x1 = _x1, (p)->y1 = _y1, \ - (p)->x2 = _x2, (p)->y2 = _y2 + (p)->x0 = (_x0), (p)->y0 = (_y0), \ + (p)->x1 = (_x1), (p)->y1 = (_y1), \ + (p)->x2 = (_x2), (p)->y2 = (_y2) #define setXY4(p, _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3) \ - (p)->x0 = _x0, (p)->y0 = _y0, \ - (p)->x1 = _x1, (p)->y1 = _y1, \ - (p)->x2 = _x2, (p)->y2 = _y2, \ - (p)->x3 = _x3, (p)->y3 = _y3 + (p)->x0 = (_x0), (p)->y0 = (_y0), \ + (p)->x1 = (_x1), (p)->y1 = (_y1), \ + (p)->x2 = (_x2), (p)->y2 = (_y2), \ + (p)->x3 = (_x3), (p)->y3 = (_y3) #define setWH(p, _w, _h) \ - (p)->w = _w, (p)->h = _h + (p)->w = (_w), (p)->h = (_h) #define setXYWH(p, _x0, _y0, _w, _h) \ - (p)->x0 = _x0, (p)->y0 = _y0, \ - (p)->x1 = (_x0 + (_w)), (p)->y1 = _y0, \ - (p)->x2 = _x0, (p)->y2 = (_y0 + (_h)), \ - (p)->x3 = (_x0 + (_w)), (p)->y3 = (_y0 + (_h)) + (p)->x0 = (_x0), (p)->y0 = (_y0), \ + (p)->x1 = ((_x0) + (_w)), (p)->y1 = (_y0), \ + (p)->x2 = (_x0), (p)->y2 = ((_y0) + (_h)), \ + (p)->x3 = ((_x0) + (_w)), (p)->y3 = ((_y0) + (_h)) #define setUV0(p, _u0, _v0) \ - (p)->u0 = _u0, (p)->v0 = _v0 + (p)->u0 = (_u0), (p)->v0 = (_v0) #define setUV3(p, _u0, _v0, _u1, _v1, _u2, _v2) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = _u1, (p)->v1 = _v1, \ - (p)->u2 = _u2, (p)->v2 = _v2 + (p)->u0 = (_u0), (p)->v0 = (_v0), \ + (p)->u1 = (_u1), (p)->v1 = (_v1), \ + (p)->u2 = (_u2), (p)->v2 = (_v2) #define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = _u1, (p)->v1 = _v1, \ - (p)->u2 = _u2, (p)->v2 = _v2, \ - (p)->u3 = _u3, (p)->v3 = _v3 + (p)->u0 = (_u0), (p)->v0 = (_v0), \ + (p)->u1 = (_u1), (p)->v1 = (_v1), \ + (p)->u2 = (_u2), (p)->v2 = (_v2), \ + (p)->u3 = (_u3), (p)->v3 = (_v3) #define setUVWH(p, _u0, _v0, _w, _h) \ - (p)->u0 = _u0, (p)->v0 = _v0, \ - (p)->u1 = (_u0 + (_w)), (p)->v1 = _v0, \ - (p)->u2 = _u0, (p)->v2 = (_v0 + (_h)), \ - (p)->u3 = (_u0 + (_w)), (p)->v3 = (_v0 + (_h)) + (p)->u0 = (_u0), (p)->v0 = (_v0), \ + (p)->u1 = ((_u0) + (_w)), (p)->v1 = (_v0), \ + (p)->u2 = (_u0), (p)->v2 = ((_v0) + (_h)), \ + (p)->u3 = ((_u0) + (_w)), (p)->v3 = ((_v0) + (_h)) /* Primitive handling macros */ @@ -113,8 +113,13 @@ typedef enum _GPU_VideoMode { #define setShadeTex(p, tge) \ ((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1)) -#define getTPage(tp, abr, x, y) \ - ((((x) & 0x3ff) >> 6) | (((y) >> 8) << 4) | (((abr) & 3) << 5) | (((tp) & 3) << 7)) +#define getTPage(tp, abr, x, y) ( \ + (((x) / 64) & 15) | \ + ((((y) / 256) & 1) << 4) | \ + (((abr) & 3) << 5) | \ + (((tp) & 3) << 7) | \ + ((((y) / 512) & 1) << 11) \ +) #define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f)) @@ -151,24 +156,42 @@ typedef enum _GPU_VideoMode { #define setDrawTPage(p, dfe, dtd, tpage) \ setlen(p, 1), \ - (p)->code[0] = 0xe1000000 | tpage | (dfe << 10) | (dtd << 9) + (p)->code[0] = (0xe1000000 | \ + (tpage) | \ + ((dtd) << 9) | \ + ((dfe) << 10) \ + ) #define setDrawOffset(p, _x, _y) \ setlen(p, 1), \ - (p)->code[0] = 0xe5000000 | (_x & 0x3ff) | ((_y & 0x3ff) << 11) + (p)->code[0] = (0xe5000000 | \ + ((_x) % 1024) | \ + (((_y) % 1024) << 11) \ + ) #define setDrawMask(p, sb, mt) \ setlen(p, 1), \ - (p)->code[0] = 0xe6000000 | sb | (mt << 1) + (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1)) #define setDrawArea(p, r) \ setlen(p, 2), \ - (p)->code[0] = 0xe3000000 | ((r)->x & 0x3ff) | (((r)->y & 0x1ff) << 10), \ - (p)->code[1] = 0xe4000000 | (((r)->x + (r)->w - 1) & 0x3ff) | ((((r)->y + (r)->h - 1) & 0x1ff) << 10) + (p)->code[0] = (0xe3000000 | \ + ((r)->x % 1024) | \ + (((r)->y % 1024) << 10) \ + ), \ + (p)->code[1] = (0xe4000000 | \ + (((r)->x + (r)->w - 1) % 1024) | \ + ((((r)->y + (r)->h - 1) % 1024) << 10) \ + ) #define setTexWindow(p, r) \ setlen(p, 1), \ - (p)->code[0] = 0xe2000000 | ((r)->w & 0x1f) | (((r)->h & 0x1f) << 5) | (((r)->x & 0x1f) << 10) | (((r)->y & 0x1f) << 15) + (p)->code[0] = (0xe2000000 | \ + ((r)->w % 32) | \ + (((r)->h % 32) << 5) | \ + (((r)->x % 32) << 10) | \ + (((r)->y % 32) << 15) \ + ) /* Primitive structure definitions */ diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h index ddc988d..3c1d5a4 100644 --- a/libpsn00b/include/psxgte.h +++ b/libpsn00b/include/psxgte.h @@ -3,6 +3,17 @@ * (C) 2019-2022 Lameguy64 - MPL licensed */ +/** + * @file psxgte.h + * @brief GTE library header + * + * @details The Geometry Transformation Engine, often referred to as the GTE, + * is most responsible for providing 3D capabilities to the PS1. This is + * effectively an all-integer math co-processor connected directly to the CPU, + * as it is accessed using COP2 and related MIPS instructions to access + * registers and issue commands to the GTE. + */ + #ifndef __PSXGTE_H #define __PSXGTE_H @@ -35,50 +46,216 @@ typedef struct _DVECTOR { /* Public API */ +#define csin(a) isin(a) +#define ccos(a) icos(a) +#define rsin(a) isin(a) +#define rcos(a) icos(a) + #ifdef __cplusplus extern "C" { #endif -void InitGeom(void); - -// Integer SIN/COS functions (4096 = 360 degrees) -// Does not use tables! +/** + * @brief Gets sine of angle (fixed-point, high precision version) + * + * @details Returns the sine of angle a. + * + * @param a Angle in fixed-point format (131072 = 360 degrees) + * @return Sine value in 20.12 fixed-point format (4096 = 1.0). + */ int isin(int a); + +/** + * @brief Gets cosine of angle (fixed-point, high precision version) + * + * @details Returns the cosine of angle a. + * + * @param a Angle in fixed-point format (131072 = 360 degrees) + * @return Cosine value in 20.12 fixed-point format (4096 = 1.0). + */ int icos(int a); -// Higher precision integer sin/cos functions (131072 = 360 degrees) -// Does not use tables! +/** + * @brief Gets sine of angle (fixed-point) + * + * @details Returns the sine of angle a. + * + * @param a Angle in fixed-point format (131072 = 360 degrees) + * @return Sine value in 20.12 fixed-point format (4096 = 1.0). + */ int hisin(int a); + +/** + * @brief Gets cosine of angle (fixed-point) + * + * @details Returns the cosine of angle a. + * + * @param a Angle in fixed-point format (131072 = 360 degrees) + * @return Cosine value in 20.12 fixed-point format (4096 = 1.0). + */ int hicos(int a); +/** + * @brief Initializes the GTE + * + * @details Resets, enables and initializes the GTE. Must be called prior to + * using any GTE function or macro. + */ +void InitGeom(void); + +/** + * @brief Gets square root (fixed-point) + * + * @details Returns the square root of value v. + * + * @param v Value in 20.12 fixed-point format (4096 = 1.0) + * @return Square root in 20.12 fixed-point format (4096 = 1.0). + */ +int SquareRoot12(int v); + +/** + * @brief Gets square root (integer) + * + * @details Returns the square root of value v. + * + * @param v Value in integer format + * @return Square root in integer format. + */ +int SquareRoot0(int v); + +/** + * @brief Pushes the current GTE matrix to the matrix stack + * + * @details Pushes the current GTE rotation matrix and translation vector to + * the internal matrix stack. Only one matrix stack level is currently + * supported. + */ void PushMatrix(void); + +/** + * @brief Pops the last matrix pushed into the matrix stack back to the GTE + * + * @details Pops the last inserted matrix in the internal matrix stack back to + * the GTE. Only one matrix stack level is currently supported. + */ void PopMatrix(void); +/** + * @brief Defines the rotation matrix of a MATRIX + * + * @details Defines the rotation matrix of m from rotation coordinates of r. + * The matrix is computed as follows: + * + * [ 1 0 0 ] [ cy 0 sy] [ cz -sz 0 ] + * [ 0 cx -sx] * [ 0 1 0 ] * [ sz cz 0 ] + * [ 0 sx cx] [-sy 0 cy] [ 0 0 1 ] + * + * where: + * + * sx = sin(r.x) sy = sin(r.y) sz = sin(r.z) + * cx = cos(r.x) cy = cos(r.y) cz = cos(r.z) + * + * @param r Rotation vector (input) + * @param m Matrix (output) + * @return Pointer to m. + * + * @see TransMatrix(), CompMatrixLV() + */ MATRIX *RotMatrix(SVECTOR *r, MATRIX *m); + +/** + * @brief Defines the rotation matrix of a MATRIX (high precision version) + * + * @details Defines the rotation matrix of m from rotation coordinates of r. + * This function is a variant of RotMatrix() that uses hisin()/hicos() instead + * of isin()/icos(). + * + * See RotMatrix() for more details. + * + * @param r Rotation vector (input) + * @param m Matrix (output) + * @return Pointer to m. + * + * @see RotMatrix() + */ MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m); +/** + * @brief Defines the translation vector of a MATRIX + * + * @details Simply sets the translation vector of MATRIX m. To perform + * accumulative translation operations, see CompMatrixLV(). + * + * @param m Translation vector (input) + * @param r Matrix (output) + * @return Pointer to m. + * + * @see RotMatrix(), CompMatrixLV() + */ MATRIX *TransMatrix(MATRIX *m, VECTOR *r); + MATRIX *ScaleMatrix(MATRIX *m, VECTOR *s); MATRIX *ScaleMatrixL(MATRIX *m, VECTOR *s); MATRIX *MulMatrix(MATRIX *m0, MATRIX *m1); MATRIX *MulMatrix0(MATRIX *m0, MATRIX *m1, MATRIX *m2); +/** + * @brief Composite coordinate matrix transform + * + * @details Performs vector multiply by matrix with vector addition from v0 to + * the translation vector of v1. Then, multiples the rotation matrix of v0 by + * the rotation matrix of v1. The result of both operations is then stored in + * v2. Replaces the current GTE rotation matrix and translation vector with v0. + * + * Often used to adjust the matrix (includes rotation and translation) of an + * object relative to a world matrix, so the object would render relative to + * the world matrix. + * + * @param v0 Input matrix A + * @param v1 Input matrix B + * @param v2 Output matrix + * @return Pointer to v2. + */ MATRIX *CompMatrixLV(MATRIX *v0, MATRIX *v1, MATRIX *v2); + +/** + * @brief Multiplies a vector by a matrix + * + * @details Multiplies vector v0 with matrix m, result is stored to v1. + * Replaces the current GTE rotation matrix and translation vector with m. + * + * Often used to calculate a translation vector in relation to the rotation + * matrix for first person or vector camera perspectives. + * + * @param m Input matrix + * @param v0 Input vector + * @param v1 Output vector + * @return Pointer to v1. + */ VECTOR *ApplyMatrixLV(MATRIX *m, VECTOR *v0, VECTOR *v1); +/** + * @brief Normalizes a VECTOR into SVECTOR format + * + * Normalizes a 32-bit vector into a 16-bit vector in 4.12 fixed-point format + * (4096 = 1.0, 2048 = 0.5). + * + * @param v0 Input (raw) 32-bit vector + * @param v1 Output (normalized) 16-bit vector + */ void VectorNormalS(VECTOR *v0, SVECTOR *v1); +/** + * @brief Calculates the square of a VECTOR + * + * @details Calculates the square of vector v0 and stores the result to v1. + * + * @param v0 Input vector + * @param v1 Output vector + */ void Square0(VECTOR *v0, VECTOR *v1); -int SquareRoot12(int v); -int SquareRoot0(int v); - -#define csin(a) isin(a) -#define ccos(a) icos(a) -#define rsin(a) isin(a) -#define rcos(a) icos(a) - #ifdef __cplusplus } #endif diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index 2106a53..dc1d52c 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -3,6 +3,23 @@ * (C) 2022 spicyjpeg - MPL licensed */ +/** + * @file psxpress.h + * @brief MDEC library header + * + * @details This is a fully original reimplementation of the official SDK's + * "data compression" library. This library is made up of two parts, the MDEC + * API and functions to decompress Huffman-encoded bitstreams (.BS files, or + * frames in .STR files) into data to be fed to the MDEC. Two different + * implementations of the latter are provided, one using the GTE and scratchpad + * region and an older one using a large lookup table in main RAM. + * + * FMV playback is not part of this library per se, but can implemented using + * the APIs defined here alongside some code to stream data from the CD drive. + * + * Currently only version 1 and 2 .BS files are supported. + */ + #ifndef __PSXPRESS_H #define __PSXPRESS_H @@ -70,7 +87,9 @@ extern "C" { #endif /** - * @brief Resets the MDEC and aborts any MDEC DMA transfers. If mode = 0, the + * @brief Resets and optionally initializes the MDEC. + * + * @details Resets the MDEC and aborts any MDEC DMA transfers. If mode = 0, the * default IDCT matrix and quantization tables are also loaded and the MDEC is * put into color output mode, discarding any custom environment previously set * with DecDCTPutEnv(). @@ -78,15 +97,19 @@ extern "C" { * DecDCTReset(0) must be called at least once prior to using the MDEC. * * @param mode + * + * @see DecDCTPutEnv() */ void DecDCTReset(int mode); /** - * @brief Uploads the specified decoding environment's quantization tables and - * IDCT matrix to the MDEC, or restores the default tables if a null pointer is - * passed. Calling this function is normally not required as DecDCTReset(0) - * initializes the MDEC with the default tables, but it may be useful for e.g. - * decoding JPEG or a format with custom quantization tables. + * @brief Loads default or custom quantization and IDCT tables into the MDEC. + * + * @details Uploads the specified decoding environment's quantization tables + * and IDCT matrix to the MDEC, or restores the default tables if a null + * pointer is passed. Calling this function is normally not required as + * DecDCTReset(0) initializes the MDEC with the default tables, but it may be + * useful for e.g. decoding JPEG or a format with custom quantization tables. * * The second argument, not present in the official SDK, specifies whether the * MDEC shall be put into color (0) or monochrome (1) output mode. In @@ -103,12 +126,16 @@ void DecDCTReset(int mode); void DecDCTPutEnv(const DECDCTENV *env, int mono); /** - * @brief Sets up the MDEC to start fetching and decoding the given buffer. - * This function is meant to be used with buffers generated by DecDCTvlc(): the - * first 32-bit word of the buffer is initially copied to the MDEC0 register, - * then all subsequent data is read in 128-byte (32-word) chunks. The length of - * the stream (in 32-bit units, minus the first word) is encoded by DecDCTvlc() - * in the lower 16 bits of the first word. + * @brief Feeds the MDEC with a run-length code buffer from the specified + * location. + * + * @details Sets up the MDEC to start fetching and decoding the given buffer. + * This function is meant to be used with buffers generated by DecDCTvlc(), + * DecDCTvlc2() or their variants: the first 32-bit word of the buffer is + * initially copied to the MDEC0 register, then all subsequent data is read in + * 128-byte (32-word) chunks. The length of the stream (in 32-bit units, minus + * the first word) is encoded by DecDCTvlc() in the lower 16 bits of the first + * word. * * The mode argument optionally specifies the output color depth (0 for 16bpp, * 1 for 24bpp) if not already set in the first word. Passing -1 will result in @@ -117,11 +144,15 @@ void DecDCTPutEnv(const DECDCTENV *env, int mono); * * @param data * @param mode DECDCT_MODE_* or -1 + * + * @see DecDCTinRaw(), DecDCTinSync() */ void DecDCTin(const uint32_t *data, int mode); /** - * @brief Configures the MDEC to automatically fetch data (the input stream, + * @brief Feeds the MDEC with raw data from the specified location. + * + * @details Configures the MDEC to automatically fetch data (the input stream, * IDCT matrix or quantization tables) in 128-byte (32-word) chunks from the * specified address in main RAM. The transfer is stopped, and any callback * registered with DMACallback(0) is fired, once a certain number of 32-bit @@ -135,13 +166,17 @@ void DecDCTin(const uint32_t *data, int mode); * * @param data * @param length Number of 32-bit words to read (must be multiple of 32) + * + * @see DecDCTin(), DecDCTinSync() */ void DecDCTinRaw(const uint32_t *data, size_t length); /** - * @brief Waits for the MDEC to finish decoding the input stream (if mode = 0) - * or returns whether it is busy (if mode = 1). MDEC commands can be issued - * only when the MDEC isn't busy. + * @brief Waits for an MDEC input transfer to finish or returns its status. + * + * @details Waits for the MDEC to finish decoding the input stream (if + * mode = 0) or returns whether it is busy (if mode = 1). MDEC commands can be + * issued only when the MDEC isn't busy. * * WARNING: DecDCTinSync(0) might time out and return -1 if the MDEC can't * output decoded data, e.g. if the length passed DecDCTout() was too small and @@ -155,7 +190,9 @@ void DecDCTinRaw(const uint32_t *data, size_t length); int DecDCTinSync(int mode); /** - * @brief Configures the MDEC to automatically transfer decoded image data in + * @brief Writes image data decoded by the MDEC to the specified location. + * + * @details Configures the MDEC to automatically transfer decoded image data in * 128-byte (32-word) chunks to the specified address in main RAM. MDEC * operation is paused once a certain number of 32-bit words have been output * and can be resumed by calling DecDCTout() again: the MDEC will continue @@ -168,12 +205,16 @@ int DecDCTinSync(int mode); * * @param data * @param length Number of 32-bit words to output (must be multiple of 32) + * + * @see DecDCToutSync() */ void DecDCTout(uint32_t *data, size_t length); /** - * @brief Waits until the transfer set up by DecDCTout() finishes (if mode = 0) - * or returns whether it is still in progress (if mode = 1). + * @brief Waits for an MDEC output transfer to finish or returns its status. + * + * @details Waits until the transfer set up by DecDCTout() finishes (if + * mode = 0) or returns whether it is still in progress (if mode = 1). * * WARNING: DecDCToutSync(0) might time out and return -1 if the MDEC is unable * to consume enough input data in order to produce the desired amount of data. @@ -186,7 +227,9 @@ void DecDCTout(uint32_t *data, size_t length); int DecDCToutSync(int mode); /** - * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * @brief Decompresses or begins decompressing a .BS file into MDEC codes. + * + * @details Begins decompressing the contents of a .BS file (or of a single STR * frame) into a buffer that can be passed to DecDCTin(). This function uses a * small (<1 KB) lookup table combined with the GTE to accelerate the process; * performance is roughly on par with DecDCTvlcStart2() if the lookup table @@ -212,11 +255,15 @@ int DecDCToutSync(int mode); * @param max_size Maximum number of 32-bit words to output * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcContinue(), DecDCTvlcCopyTable() */ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); /** - * @brief Resumes the decompression process started by DecDCTvlcStart(). The + * @brief Resumes or finishes decompressing a .BS file into MDEC codes. + * + * @details Resumes the decompression process started by DecDCTvlcStart(). The * state of the decompressor is contained entirely in the VLC_Context structure * so an arbitrary number of bitstreams can be decoded concurrently (although * the limited CPU power makes it impractical to do so) by keeping a separate @@ -236,14 +283,18 @@ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint3 * @param buf * @param max_size Maximum number of 32-bit words to output * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcStart() */ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); /** - * A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for compatibility - * with the official SDK. This function uses an internal context; additionally, - * the maximum output buffer size is not passed as an argument but is instead - * set by calling DecDCTvlcSize(). + * @brief Decompresses a .BS file into MDEC codes. + * + * @details A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for + * compatibility with the official SDK. This function uses an internal context; + * additionally, the maximum output buffer size is not passed as an argument + * but is instead set by calling DecDCTvlcSize(). * * This function behaves identically to DecDCTvlcContinue() if bs = 0 and * DecDCTvlcStart() otherwise. @@ -257,21 +308,34 @@ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); * @param bs Pointer to bitstream data or 0 to resume decoding * @param buf * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcSize(), DecDCTvlcCopyTable() */ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); /** - * @brief Sets the maximum number of 32-bit words that a single call to + * @brief Sets the maximum amount of data to be decompressed. + * + * @details Sets the maximum number of 32-bit words that a single call to * DecDCTvlc() will output. If size = 0, the entire frame will always be * decoded in one shot. * + * Note that DecDCTvlcStart() and DecDCTvlcContinue() do not use the value set + * by this function and instead expect the maximum size to be passed as an + * argument. + * * @param size Maximum number of 32-bit words to output - * @return Previously set value + * @return Previously set value + * + * @see DecDCTvlc() */ size_t DecDCTvlcSize(size_t size); /** - * @brief Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), + * @brief Moves the lookup table used by the .BS decompressor to the scratchpad + * region. + * + * @details Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified * address. A copy of this table is always present in main RAM, however this * function can be used to copy it to the scratchpad region to boost @@ -287,7 +351,10 @@ size_t DecDCTvlcSize(size_t size); void DecDCTvlcCopyTable(DECDCTTAB *addr); /** - * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * @brief Decompresses or begins decompressing a .BS file into MDEC codes + * (alternate implementation). + * + * @details Begins decompressing the contents of a .BS file (or of a single STR * frame) into a buffer that can be passed to DecDCTin(). This function uses a * large (34 KB) lookup table that must be loaded into main RAM beforehand by * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. @@ -311,11 +378,16 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr); * @param max_size Maximum number of 32-bit words to output * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcContinue2(), DecDCTvlcBuild() */ int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); /** - * @brief Resumes the decompression process started by DecDCTvlcStart2(). The + * @brief Resumes or finishes decompressing a .BS file into MDEC codes + * (alternate implementation). + * + * @details Resumes the decompression process started by DecDCTvlcStart2(). The * state of the decompressor is contained entirely in the VLC_Context structure * so an arbitrary number of bitstreams can be decoded concurrently (although * the limited CPU power makes it impractical to do so) by keeping a separate @@ -333,11 +405,15 @@ int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint * @param buf * @param max_size Maximum number of 32-bit words to output * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcStart2() */ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); /** - * A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for + * @brief Decompresses a .BS file into MDEC codes (alternate implementation). + * + * @details A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for * compatibility with the official SDK. This function uses an internal context; * additionally, the maximum output buffer size is not passed as an argument * but is instead set by calling DecDCTvlcSize2(). @@ -353,21 +429,35 @@ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); * @param buf * @param table Pointer to decompressed table or 0 to use last table used * @return 0, 1 if more data needs to be output or -1 in case of failure + * + * @see DecDCTvlcSize2(), DecDCTvlcBuild() */ int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table); /** - * @brief Sets the maximum number of 32-bit words that a single call to + * @brief Sets the maximum amount of data to be decompressed (alternate + * implementation). + * + * @details Sets the maximum number of 32-bit words that a single call to * DecDCTvlc2() will output. If size = 0, the entire frame will always be * decoded in one shot. * + * Note that DecDCTvlcStart2() and DecDCTvlcContinue2() do not use the value + * set by this function and instead expect the maximum size to be passed as an + * argument. + * * @param size Maximum number of 32-bit words to output - * @return Previously set value + * @return Previously set value + * + * @see DecDCTvlc2() */ size_t DecDCTvlcSize2(size_t size); /** - * @brief Generates the lookup table required by DecDCTvlcStart2(), + * @brief Generates the lookup table used by the alternate implementation of + * the .BS decompressor. + * + * @details Generates the lookup table required by DecDCTvlcStart2(), * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the * specified buffer. Since the table is relatively large (34 KB), it is * recommended to only generate it in a dynamically-allocated buffer when diff --git a/libpsn00b/include/psxsio.h b/libpsn00b/include/psxsio.h index 3f571d7..449e43a 100644 --- a/libpsn00b/include/psxsio.h +++ b/libpsn00b/include/psxsio.h @@ -1,64 +1,281 @@ +/* + * PSn00bSDK serial port library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + */ + +/** + * @file psxsio.h + * @brief Serial port library header + * + * @details This library provides a custom API to access the PS1's serial port. + * Sending and receiving data is done fully asynchronously using a pair of + * 128-byte FIFOs kept in main RAM, with optional hardware flow control. More + * advanced use cases such as custom callbacks for each byte received are also + * supported. + * + * A BIOS TTY driver to redirect stdin/stdout (including BIOS messages as well + * as PSn00bSDK's own debug logging) to the serial port is also provided for + * debugging purposes. + */ + #ifndef __PSXSIO_H #define __PSXSIO_H -#define SR_TXRDY 0x1 -#define SR_RXRDY 0x2 -#define SR_TXU 0x4 -#define SR_PERROR 0x8 -#define SR_OE 0x10 -#define SR_FE 0x20 -#define SR_DSR 0x80 -#define SR_CTS 0x100 -#define SR_IRQ 0x200 - -#define SIO_TXRDY 0x1 -#define SIO_RXRDY 0x2 -#define SIO_TXU 0x4 -#define SIO_PERROR 0x8 -#define SIO_OE 0x10 -#define SIO_FE 0x20 -#define SIO_DSR 0x80 -#define SIO_CTS 0x100 -#define SIO_IRQ 0x200 - -#define MR_CHLEN_5 0x00 -#define MR_CHLEN_6 0x04 -#define MR_CHLEN_7 0x08 -#define MR_CHLEN_8 0x0C -#define MR_PEN 0x10 -#define MR_P_EVEN 0x30 -#define MR_SB_01 0x40 -#define MR_SB_10 0x80 -#define MR_SB_11 0xc0 - -#define CR_TXEN 0x1 -#define CR_DTR 0x2 -#define CR_RXEN 0x4 -#define CR_BRK 0x8 -#define CR_INTRST 0x10 -#define CR_RTS 0x20 -#define CR_ERRRST 0x40 -#define CR_BUFSZ_1 0x00 -#define CR_BUFSZ_2 0x100 -#define CR_BUFSZ_4 0x200 -#define CR_BUFSZ_8 0x300 -#define CR_TXIEN 0x400 -#define CR_RXIEN 0x800 -#define CR_DSRIEN 0x1000 +#include <stdint.h> + +/* Enum and register definitions */ + +typedef enum _SIO_StatusRegFlag { + SR_TXRDY = 1 << 0, + SR_RXRDY = 1 << 1, + SR_TXU = 1 << 2, + SR_PERROR = 1 << 3, + SR_OE = 1 << 4, + SR_FE = 1 << 5, + SR_DSR = 1 << 7, + SR_CTS = 1 << 8, + SR_IRQ = 1 << 9 +} SIO_StatusRegFlag; + +typedef enum _SIO_ModeRegFlag { + MR_BR_1 = 1 << 0, + MR_BR_16 = 2 << 0, + MR_BR_64 = 3 << 0, + MR_CHLEN_5 = 0 << 2, + MR_CHLEN_6 = 1 << 2, + MR_CHLEN_7 = 2 << 2, + MR_CHLEN_8 = 3 << 2, + MR_PEN = 1 << 4, + MR_P_EVEN = 1 << 5, + MR_SB_01 = 1 << 6, + MR_SB_10 = 2 << 6, + MR_SB_11 = 3 << 6 +} SIO_ModeRegFlag; + +typedef enum _SIO_ControlRegFlag { + CR_TXEN = 1 << 0, + CR_DTR = 1 << 1, + CR_RXEN = 1 << 2, + CR_BRK = 1 << 3, + CR_INTRST = 1 << 4, + CR_RTS = 1 << 5, + CR_ERRRST = 1 << 6, + CR_BUFSZ_1 = 0 << 8, + CR_BUFSZ_2 = 1 << 8, + CR_BUFSZ_4 = 2 << 8, + CR_BUFSZ_8 = 3 << 8, + CR_TXIEN = 1 << 10, + CR_RXIEN = 1 << 11, + CR_DSRIEN = 1 << 12 +} SIO_ControlRegFlag; + +typedef enum _SIO_FlowControl { + SIO_FC_NONE = 0, + SIO_FC_RTS_CTS = 1 + //SIO_FC_DTR_DSR = 2 +} SIO_FlowControl; + +/* Public API */ #ifdef __cplusplus extern "C" { #endif -int _sio_control(int cmd, int arg, int param); -void AddSIO(int baud); -void DelSIO(void); +/** + * @brief Initializes the serial port driver. + * + * @details Resets the serial port, initializes the library's internal ring + * buffers and installs a serial IRQ handler. The given mode value (normally + * MR_CHLEN_8|MR_SB_01 for 8 data bits, 1 stop bit and no parity) is copied to + * the SIO_MODE register. Flow control is disabled by default (see + * SIO_SetFlowControl() for more details). + * + * This function must be called prior to using SIO_ReadByte(), SIO_ReadSync(), + * SIO_WriteByte(), SIO_WriteSync() or SIO_SetFlowControl(), and must not be + * called from an IRQ callback. + * + * @param baud Baud rate in bits per second + * @param mode Binary OR of SIO_ModeRegFlag enum members + * + * @see SIO_Quit() + */ +void SIO_Init(int baud, uint16_t mode); + +/** + * @brief Uninstalls the serial port driver. + * + * @details Resets the serial port and removes the IRQ callback added by + * SIO_Init(), restoring any previously installed handler if any. If SIO_Init() + * was previously invoked, calling SIO_Quit() before accessing serial port + * registers manually is highly recommended. + * + * @see SIO_Init() + */ +void SIO_Quit(void); -void *Sio1Callback(void (*func)(void)); +/** + * @brief Sets the flow control mode. + * + * @details Changes the serial port's flow control mode. The following modes + * are available: + * + * - SIO_FC_NONE (default): do not assert RTS or DTR automatically and ignore + * DSR. Note that the hardware will still wait for CTS to be asserted before + * sending any data; there is no way to disable this behavior. + * - SIO_FC_RTS_CTS: assert RTS when the RX buffer is full and wait for CTS to + * be asserted before sending any data. + * + * The flow control mode shall only be changed while the TX and RX buffers are + * empty. + * + * @param mode + */ +void SIO_SetFlowControl(SIO_FlowControl mode); -// ORIGINAL -void WaitSIO(void); -int kbhit(); +/** + * @brief Reads a byte from the RX buffer (blocking). + * + * @details Reads a byte from the RX buffer. If the buffer is empty, blocks + * indefinitely until a byte is received. + * + * WARNING: this function shall not be used in a critical section or IRQ + * callback as no data is sent or received while interrupts are disabled. It + * also lacks a timeout, so consider polling for new data using SIO_ReadByte2() + * or SIO_ReadSync(1) and implementing a timeout instead. + * + * @return Received byte + * + * @see SIO_ReadByte2(), SIO_ReadSync() + */ +int SIO_ReadByte(void); + +/** + * @brief Reads a byte from the RX buffer (non-blocking). + * + * @details Non-blocking variant of SIO_ReadByte(). Reads a byte from the RX + * buffer or returns -1 if the buffer is empty. Unlike SIO_ReadByte() this + * function is safe to use in a critical section (although no data will be + * received while interrupts are disabled). + * + * @return Received byte, -1 if no data is available + * + * @see SIO_ReadByte() + */ +int SIO_ReadByte2(void); + +/** + * @brief Waits for a byte to be received or returns the RX buffer's length. + * + * @details Waits for at least one byte to be available in the RX buffer (if + * mode = 0) or returns the length of the RX buffer (if mode = 1). + * + * WARNING: this function shall not be used in a critical section or IRQ + * callback as no data is sent or received while interrupts are disabled. Using + * SIO_ReadSync(0) is additionally discouraged as it lacks a timeout; consider + * polling for new data using SIO_ReadByte2() or SIO_ReadSync(1) and + * implementing a timeout instead. + * + * @param mode + * @return Number of RX bytes in the buffer + */ +int SIO_ReadSync(int mode); + +/** + * @brief Sets a callback for received bytes. + * + * @details Registers a function to be called whenever a byte is received. The + * received byte is passed as an argument to the callback, which shall then + * return a zero value to also store the byte in the RX buffer or a non-zero + * value to drop it. This can be used to e.g. filter or validate incoming data, + * or to bypass the library's RX buffer for custom buffering purposes. + * + * The callback will run in the exception handler's context, so it should be as + * fast as possible and shall not call any function that relies on interrupts + * being enabled. + * + * @param func + * @return Previously set callback or NULL + */ +void *SIO_ReadCallback(int (*func)(uint8_t)); + +/** + * @brief Writes a byte to the TX buffer (blocking). + * + * @details Sends the given byte, or appends it to the TX buffer if the serial + * port is busy. If the buffer is full, blocks until the byte can be stored in + * the buffer (with a timeout). + * + * WARNING: this function shall not be used in a critical section or IRQ + * callback as no data is sent or received while interrupts are disabled. + * + * @param value + * @return Number of TX bytes previously pending, -1 in case of a timeout + * + * @see SIO_WriteByte2(), SIO_WriteSync() + */ +int SIO_WriteByte(uint8_t value); + +/** + * @brief Writes a byte to the TX buffer (non-blocking). + * + * @details Non-blocking variant of SIO_WriteByte(). Sends the given byte, or + * appends it to the TX buffer if the serial port is busy. If the buffer is + * full, returns -1 without actually sending the byte. Unlike SIO_WriteByte() + * this function is safe to use in a critical section (although no data will be + * sent while interrupts are disabled). + * + * @param value + * @return Number of TX bytes previously pending, -1 in case of failure + * + * @see SIO_WriteByte() + */ +int SIO_WriteByte2(uint8_t value); + +/** + * @brief Waits for all bytes to be sent or returns the TX buffer's length. + * + * @details Waits for all bytes pending in the TX buffer to be sent (if + * mode = 0) or returns the length of the TX buffer (if mode = 1). + * + * WARNING: this function shall not be used in a critical section or IRQ + * callback as no data is sent or received while interrupts are disabled. + * + * @param mode + * @return Number of TX bytes pending, -1 in case of a timeout (mode = 0) + */ +int SIO_WriteSync(int mode); + +/** + * @brief Installs the serial port TTY driver. + * + * @details Installs a BIOS file driver to redirect TTY stdin/stdout to the + * serial port. Uses SIO_Init() internally. The port is configured for 8 data + * bits, 1 stop bit and no parity. + * + * This function shall only be used for debugging purposes. Picking a high baud + * rate is recommended as all TTY writes are blocking and bypass the TX buffer. + * + * NOTE: some executable loaders, such as Unirom and Caetla, already replace + * the BIOS TTY driver with a custom one. Calling AddSIO() will break the + * built-in TTY functionality of these loaders. + * + * @param baud Baud rate in bits per second + * + * @see DelSIO() + */ +void AddSIO(int baud); + +/** + * @brief Removes the serial port TTY driver. + * + * @details Uninstalls the BIOS driver installed by AddSIO() and attempts to + * restore the default "dummy" TTY driver. Uses SIO_Quit() internally. Calling + * this function is not recommended as any further TTY usage may crash the + * system. + * + * @see AddSIO() + */ +void DelSIO(void); #ifdef __cplusplus } diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h index cf78e3d..cdc3ac7 100644 --- a/libpsn00b/include/psxspu.h +++ b/libpsn00b/include/psxspu.h @@ -73,6 +73,20 @@ typedef struct _SpuCommonAttr { SpuExtAttr cd, ext; } SpuCommonAttr; +/* Macros */ + +#define getSPUAddr(addr) ((uint16_t) (((addr) + 7) / 8)) +#define getSPUSampleRate(rate) ((uint16_t) (((rate) * (1 << 12)) / 44100)) + +#define getSPUADSR(ar, dr, sr, rr, sl) ( \ + (sl) | \ + ((dr) << 4) | \ + ((ar) << 8) | \ + ((rr) << 16) | \ + ((sr) << 22) | \ + (1 << 30) \ +) + /* "Useless" macros for official SDK compatibility */ #define SpuSetCommonMasterVolume(left, right) \ @@ -87,21 +101,29 @@ typedef struct _SpuCommonAttr { ((enable) ? (SPU_CTRL |= 0x0002) : (SPU_CTRL &= 0xfffd)) #define SpuSetReverbAddr(addr) \ - (SPU_REVERB_ADDR = ((addr) + 7) / 8) + (SPU_REVERB_ADDR = getSPUAddr(addr)) #define SpuSetIRQAddr(addr) \ - (SPU_IRQ_ADDR = ((addr) + 7) / 8) + (SPU_IRQ_ADDR = getSPUAddr(addr)) #define SpuSetVoiceVolume(ch, left, right) \ (SPU_CH_VOL_L(ch) = (left), SPU_CH_VOL_R(ch) = (right)) #define SpuSetVoicePitch(ch, pitch) \ (SPU_CH_FREQ(ch) = (pitch)) #define SpuSetVoiceStartAddr(ch, addr) \ - (SPU_CH_ADDR(ch) = ((addr) + 7) / 8) -#define SpuSetVoiceADSR(ch, ar, dr, sr, rr, sl) \ - (SPU_CH_ADSR(ch) = ((sl)) | ((dr) << 4) | ((ar) << 8) | ((rr) << 16) | ((sr) << 22) | (1 << 30)) + (SPU_CH_ADDR(ch) = getSPUAddr(addr)) +#define SpuSetVoiceADSR(ch, ar, dr, sr, rr, sl) ( \ + SPU_CH_ADSR1(ch) = (sl) | ((dr) << 4) | ((ar) << 8), \ + SPU_CH_ADSR2(ch) = (rr) | ((sr) << 6) | (1 << 14) \ +) #define SpuSetKey(enable, voice_bit) \ - ((enable) ? (SPU_KEY_ON = (voice_bit)) : (SPU_KEY_OFF = (voice_bit))) + ((enable) ? ( \ + SPU_KEY_ON1 = (uint16_t) (voice_bit), \ + SPU_KEY_ON2 = (uint16_t) ((voice_bit) >> 16) \ + ) : ( \ + SPU_KEY_OFF1 = (uint16_t) (voice_bit), \ + SPU_KEY_OFF2 = (uint16_t) ((voice_bit) >> 16) \ + )) /* Public API */ @@ -111,8 +133,9 @@ extern "C" { void SpuInit(void); -void SpuRead(uint32_t *data, size_t size); -void SpuWrite(const uint32_t *data, size_t size); +size_t SpuRead(uint32_t *data, size_t size); +size_t SpuWrite(const uint32_t *data, size_t size); +size_t SpuWritePartly(const uint32_t *data, size_t size); SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode); uint32_t SpuSetTransferStartAddr(uint32_t addr); int SpuIsTransferCompleted(int mode); diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h index f0753c1..049d067 100644 --- a/libpsn00b/include/stdlib.h +++ b/libpsn00b/include/stdlib.h @@ -31,17 +31,19 @@ extern "C" { extern int __argc; extern const char **__argv; +void abort(void); + int rand(void); -void srand(unsigned long seed); +void srand(int seed); int abs(int j); long labs(long i); -long long strtoll(const char *nptr, char **endptr, int base); -long strtol(const char *nptr, char **endptr, int base); -long double strtold(const char *nptr, char **endptr); -double strtod(const char *nptr, char **endptr); +long strtol(const char *nptr, char **endptr, int base); +long long strtoll(const char *nptr, char **endptr, int base); float strtof(const char *nptr, char **endptr); +double strtod(const char *nptr, char **endptr); +long double strtold(const char *nptr, char **endptr); void InitHeap(void *addr, size_t size); void *sbrk(ptrdiff_t incr); diff --git a/libpsn00b/libc/_start.s b/libpsn00b/libc/_start.s index 56075c8..fcd4c4c 100644 --- a/libpsn00b/libc/_start.s +++ b/libpsn00b/libc/_start.s @@ -2,17 +2,16 @@ # (C) 2022 spicyjpeg - MPL licensed # # This file provides a weak function that can be easily overridden to e.g. set -# $sp or perform additional initialization before the "real" _start() +# $sp or perform additional initialization before the "real" _start() function # (_start_inner()) is called. .set noreorder -.section .text +.section .text._start .global _start .type _start, @function .weak _start _start: la $gp, _gp - j _start_inner nop diff --git a/libpsn00b/libc/abort.c b/libpsn00b/libc/abort.c index 2db5016..0a3c325 100644 --- a/libpsn00b/libc/abort.c +++ b/libpsn00b/libc/abort.c @@ -1,19 +1,26 @@ +/* + * PSn00bSDK assert macro and internal logging + * (C) 2022 spicyjpeg - MPL licensed + */ -#include <psxetc.h> +#undef SDK_LIBRARY_NAME -/* Standard abort */ +#include <assert.h> +#include <psxapi.h> -void abort(void) { - _sdk_log("abort()\n"); +/* Internal function used by assert() macro */ + +void _assert_abort(const char *file, int line, const char *expr) { + _sdk_log("%s:%d: assert(%s)\n", file, line, expr); for (;;) __asm__ volatile(""); } -/* Internal function used by assert() macro */ +/* Standard abort */ -void _assert_abort(const char *file, int line, const char *expr) { - _sdk_log("%s:%d: assert(%s)\n", file, line, expr); +void abort(void) { + _sdk_log("abort()\n"); for (;;) __asm__ volatile(""); diff --git a/libpsn00b/libc/memset.s b/libpsn00b/libc/memset.s index 5a1589d..6ef84ec 100644 --- a/libpsn00b/libc/memset.s +++ b/libpsn00b/libc/memset.s @@ -38,8 +38,9 @@ memset: sb $a1, 0xc($a0) sb $a1, 0xd($a0) sb $a1, 0xe($a0) - jr $ra sb $a1, 0xf($a0) + jr $ra + nop .Llarge_fill: # Initialize fast filling by repeating the fill byte 4 times, so it can be diff --git a/libpsn00b/psxcd/getsector.c b/libpsn00b/psxcd/getsector.c index 31d0ac7..a214d7a 100644 --- a/libpsn00b/psxcd/getsector.c +++ b/libpsn00b/psxcd/getsector.c @@ -4,7 +4,7 @@ */ #include <stdint.h> -#include <psxetc.h> +#include <assert.h> #include <psxcd.h> #include <hwregs_c.h> @@ -46,6 +46,6 @@ int CdDataSync(int mode) { return 0; } - _sdk_log("psxcd: CdDataSync() timeout\n"); + _sdk_log("CdDataSync() timeout\n"); return -1; } diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c index 0425c0d..e00ddeb 100644 --- a/libpsn00b/psxcd/isofs.c +++ b/libpsn00b/psxcd/isofs.c @@ -1,10 +1,14 @@ + +#undef SDK_LIBRARY_NAME +#define SDK_LIBRARY_NAME "psxcd/iso" + #include <stdint.h> #include <stdlib.h> #include <string.h> +#include <assert.h> #include <psxgpu.h> #include <psxapi.h> -#include <psxetc.h> -#include "psxcd.h" +#include <psxcd.h> #include "isofs.h" #define DEFAULT_PATH_SEP '\\' @@ -43,7 +47,7 @@ static int _CdReadIsoDescriptor(int session_offs) CdControl(CdlNop, 0, 0); if( (CdStatus()&0x10) ) { - _sdk_log("psxcd: Lid is still open.\n"); + _sdk_log("Lid is still open.\n"); _cd_iso_error = CdlIsoLidOpen; return -1; @@ -58,45 +62,45 @@ static int _CdReadIsoDescriptor(int session_offs) return 0; } - _sdk_log("psxcd: Parsing ISO file system.\n"); + _sdk_log("Parsing ISO file system.\n"); // Seek to volume descriptor CdIntToPos(16+session_offs, &loc); if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { - _sdk_log("psxcd: Could not set seek destination.\n"); + _sdk_log("Could not set seek destination.\n"); _cd_iso_error = CdlIsoSeekError; return -1; } - _sdk_log("psxcd: Read sectors.\n"); + _sdk_log("Read sectors.\n"); // Read volume descriptor CdRead(1, (uint32_t*)_cd_iso_descriptor_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { - _sdk_log("psxcd: Error reading ISO volume descriptor.\n"); + _sdk_log("Error reading ISO volume descriptor.\n"); _cd_iso_error = CdlIsoReadError; return -1; } - _sdk_log("psxcd: Read complete.\n"); + _sdk_log("Read complete.\n"); // Verify if volume descriptor is present descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff; if( strncmp("CD001", descriptor->header.id, 5) ) { - _sdk_log("psxcd: Disc does not contain a ISO9660 file system.\n"); + _sdk_log("Disc does not contain a ISO9660 file system.\n"); _cd_iso_error = CdlIsoInvalidFs; return -1; } - _sdk_log("psxcd: Path table LBA = %d\n", descriptor->pathTable1Offs); - _sdk_log("psxcd: Path table len = %d\n", descriptor->pathTableSize.lsb); + _sdk_log("Path table LBA = %d\n", descriptor->pathTable1Offs); + _sdk_log("Path table len = %d\n", descriptor->pathTableSize.lsb); // Allocate path table buffer i = ((2047+descriptor->pathTableSize.lsb)>>11)<<11; @@ -106,7 +110,7 @@ static int _CdReadIsoDescriptor(int session_offs) } _cd_iso_pathtable_buff = (uint8_t*)malloc(i); - _sdk_log("psxcd: Allocated %d bytes for path table.\n", i); + _sdk_log("Allocated %d bytes for path table.\n", i); // Read path table CdIntToPos(descriptor->pathTable1Offs, &loc); @@ -114,7 +118,7 @@ static int _CdReadIsoDescriptor(int session_offs) CdRead(i>>11, (uint32_t*)_cd_iso_pathtable_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { - _sdk_log("psxcd: Error reading ISO path table.\n"); + _sdk_log("Error reading ISO path table.\n"); _cd_iso_error = CdlIsoReadError; return -1; @@ -142,11 +146,11 @@ static int _CdReadIsoDirectory(int lba) CdIntToPos(lba, &loc); i = CdPosToInt(&loc); - _sdk_log("psxcd: Seek to sector %d\n", i); + _sdk_log("Seek to sector %d\n", i); if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { - _sdk_log("psxcd: Could not set seek destination.\n"); + _sdk_log("Could not set seek destination.\n"); _cd_iso_error = CdlIsoSeekError; return -1; @@ -162,7 +166,7 @@ static int _CdReadIsoDirectory(int lba) CdRead(1, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { - _sdk_log("psxcd: Error reading initial directory record.\n"); + _sdk_log("Error reading initial directory record.\n"); _cd_iso_error = CdlIsoReadError; return -1; @@ -171,14 +175,14 @@ static int _CdReadIsoDirectory(int lba) direntry = (ISO_DIR_ENTRY*)_cd_iso_directory_buff; _cd_iso_directory_len = direntry->entrySize.lsb; - _sdk_log("psxcd: Location of directory record = %d\n", direntry->entryOffs.lsb); - _sdk_log("psxcd: Size of directory record = %d\n", _cd_iso_directory_len); + _sdk_log("Location of directory record = %d\n", direntry->entryOffs.lsb); + _sdk_log("Size of directory record = %d\n", _cd_iso_directory_len); if( _cd_iso_directory_len > 2048 ) { if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) ) { - _sdk_log("psxcd: Could not set seek destination.\n"); + _sdk_log("Could not set seek destination.\n"); _cd_iso_error = CdlIsoSeekError; return -1; @@ -188,12 +192,12 @@ static int _CdReadIsoDirectory(int lba) i = ((2047+_cd_iso_directory_len)>>11)<<11; _cd_iso_directory_buff = (uint8_t*)malloc(i); - _sdk_log("psxcd: Allocated %d bytes for directory record.\n", i); + _sdk_log("Allocated %d bytes for directory record.\n", i); CdRead(i>>11, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed); if( CdReadSync(0, 0) ) { - _sdk_log("psxcd: Error reading remaining directory record.\n"); + _sdk_log("Error reading remaining directory record.\n"); _cd_iso_error = CdlIsoReadError; return -1; @@ -215,7 +219,7 @@ static void dump_directory(void) ISO_DIR_ENTRY *dir_entry; char namebuff[16]; - _sdk_log("psxcd: Cached directory record contents:\n"); + _sdk_log("Cached directory record contents:\n"); i = 0; dir_pos = 0; @@ -226,7 +230,7 @@ static void dump_directory(void) strncpy(namebuff, _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), dir_entry->identifierLen); - _sdk_log("psxcd: P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff); + _sdk_log("P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff); dir_pos += dir_entry->entryLength; i++; @@ -245,7 +249,7 @@ static void dump_directory(void) } } - _sdk_log("psxcd: --\n"); + _sdk_log("--\n"); } @@ -256,7 +260,7 @@ static void dump_pathtable(void) ISO_DESCRIPTOR *descriptor; char namebuff[16]; - _sdk_log("psxcd: Path table entries:\n"); + _sdk_log("Path table entries:\n"); descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff; @@ -270,7 +274,7 @@ static void dump_pathtable(void) tbl_pos+sizeof(ISO_PATHTABLE_ENTRY), tbl_entry->nameLength); - _sdk_log("psxcd: %s\n", namebuff); + _sdk_log("%s\n", namebuff); // Advance to next entry tbl_pos += sizeof(ISO_PATHTABLE_ENTRY) @@ -366,7 +370,7 @@ static int find_dir_entry(const char *name, ISO_DIR_ENTRY *dirent) ISO_DIR_ENTRY *dir_entry; char namebuff[16]; - _sdk_log("psxcd: Locating file %s.\n", name); + _sdk_log("Locating file %s.\n", name); i = 0; dir_pos = 0; @@ -459,11 +463,11 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) // Read ISO descriptor and path table if( _CdReadIsoDescriptor(0) ) { - _sdk_log("psxcd: Could not read ISO file system.\n"); + _sdk_log("Could not read ISO file system.\n"); return NULL; } - // _sdk_log("psxcd: ISO file system cache updated.\n"); + // _sdk_log("ISO file system cache updated.\n"); // _cd_media_changed = 0; //} @@ -471,23 +475,23 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) num_dirs = get_pathtable_entry(0, NULL, NULL); #ifndef NDEBUG - _sdk_log("psxcd: Directories in path table: %d\n", num_dirs); + _sdk_log("Directories in path table: %d\n", num_dirs); rbuff = resolve_pathtable_path(num_dirs-1, tpath_rbuff+127); if( !rbuff ) { - _sdk_log("psxcd: Could not resolve path.\n"); + _sdk_log("Could not resolve path.\n"); } else { - _sdk_log("psxcd: Longest path: %s|\n", rbuff); + _sdk_log("Longest path: %s|\n", rbuff); } #endif if( get_pathname(search_path, filename) ) { - _sdk_log("psxcd: Search path = %s|\n", search_path); + _sdk_log("Search path = %s|\n", search_path); } // Search the pathtable for a matching path @@ -495,7 +499,7 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) for(i=1; i<num_dirs; i++) { rbuff = resolve_pathtable_path(i, tpath_rbuff+127); - _sdk_log("psxcd: Found = %s|\n", rbuff); + _sdk_log("Found = %s|\n", rbuff); if( rbuff ) { @@ -509,14 +513,14 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) if( !found_dir ) { - _sdk_log("psxcd: Directory path not found.\n"); + _sdk_log("Directory path not found.\n"); return NULL; } - _sdk_log("psxcd: Found directory at record %d!\n", found_dir); + _sdk_log("Found directory at record %d!\n", found_dir); get_pathtable_entry(found_dir, &tbl_entry, NULL); - _sdk_log("psxcd: Directory LBA = %d\n", tbl_entry.dirOffs); + _sdk_log("Directory LBA = %d\n", tbl_entry.dirOffs); _CdReadIsoDirectory(tbl_entry.dirOffs); get_filename(fp->name, filename); @@ -533,12 +537,12 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) if( find_dir_entry(fp->name, &dir_entry) ) { - _sdk_log("psxcd: Could not find file.\n"); + _sdk_log("Could not find file.\n"); return NULL; } - _sdk_log("psxcd: Located file at LBA %d.\n", dir_entry.entryOffs.lsb); + _sdk_log("Located file at LBA %d.\n", dir_entry.entryOffs.lsb); CdIntToPos(dir_entry.entryOffs.lsb, &fp->pos); fp->size = dir_entry.entrySize.lsb; @@ -562,11 +566,11 @@ CdlDIR *CdOpenDir(const char* path) // Read ISO descriptor and path table if( _CdReadIsoDescriptor( 0 ) ) { - _sdk_log( "psxcd: Could not read ISO file system.\n" ); + _sdk_log( "Could not read ISO file system.\n" ); return NULL; } -// _sdk_log( "psxcd: ISO file system cache updated.\n" ); +// _sdk_log( "ISO file system cache updated.\n" ); // _cd_media_changed = 0; // } @@ -576,7 +580,7 @@ CdlDIR *CdOpenDir(const char* path) for( i=1; i<num_dirs; i++ ) { rbuff = resolve_pathtable_path( i, tpath_rbuff+127 ); - _sdk_log( "psxcd: Found = %s|\n", rbuff ); + _sdk_log( "Found = %s|\n", rbuff ); if( rbuff ) { @@ -590,14 +594,14 @@ CdlDIR *CdOpenDir(const char* path) if( !found_dir ) { - _sdk_log( "psxcd: Directory path not found.\n" ); + _sdk_log( "Directory path not found.\n" ); return NULL; } - _sdk_log( "psxcd: Found directory at record %d!\n", found_dir ); + _sdk_log( "Found directory at record %d!\n", found_dir ); get_pathtable_entry( found_dir, &tbl_entry, NULL ); - _sdk_log( "psxcd: Directory LBA = %d\n", tbl_entry.dirOffs ); + _sdk_log( "Directory LBA = %d\n", tbl_entry.dirOffs ); _CdReadIsoDirectory( tbl_entry.dirOffs ); @@ -662,11 +666,11 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file) file->size = dir_entry->entrySize.lsb; - _sdk_log("psxcd: dir_entry->entryLength = %d, ", dir_entry->entryLength); + _sdk_log("dir_entry->entryLength = %d, ", dir_entry->entryLength); d_dir->_pos += dir_entry->entryLength; - _sdk_log("psxcd: d_dir->_pos = %d\n", d_dir->_pos); + _sdk_log("d_dir->_pos = %d\n", d_dir->_pos); // Check if padding is reached (end of record sector) if( d_dir->_dir[d_dir->_pos] == 0 ) @@ -770,13 +774,13 @@ int CdLoadSession(int session) int i; // Seek to specified session - _sdk_log("psxcd: CdLoadSession(): Seeking to session %d...\n", session); + _sdk_log("CdLoadSession(): Seeking to session %d...\n", session); CdControl(CdlSetsession, (unsigned char*)&session, (unsigned char*)&resultbuff); if( CdSync(0, 0) == CdlDiskError ) { - _sdk_log("psxcd: CdLoadSession(): Session seek failed, session does not exist. Restarting CD-ROM...\n"); + _sdk_log("CdLoadSession(): Session seek failed, session does not exist. Restarting CD-ROM...\n"); // Restart CD-ROM on session seek failure CdControl(CdlNop, 0, 0); @@ -797,7 +801,7 @@ int CdLoadSession(int session) _ses_scanbuff = scanbuff; // Begin scan for an ISO volume descriptor - _sdk_log("psxcd: CdLoadSession(): Scanning for ISO9660 volume descriptor.\n"); + _sdk_log("CdLoadSession(): Scanning for ISO9660 volume descriptor.\n"); i = CdlModeSpeed; CdControl(CdlSetmode, (unsigned char*)&i, 0); @@ -812,7 +816,7 @@ int CdLoadSession(int session) if( !_ses_scanfound ) { - _sdk_log("psxcd: CdLoadSession(): Did not find volume descriptor.\n"); + _sdk_log("CdLoadSession(): Did not find volume descriptor.\n"); _cd_iso_error = CdlIsoInvalidFs; EnterCriticalSection(); @@ -841,11 +845,11 @@ int CdLoadSession(int session) loc = (CdlLOC*)resultbuff; - _sdk_log("psxcd: CdLoadSession(): Session found in %02d:%02d:%02d (LBA=%d)\n", + _sdk_log("CdLoadSession(): Session found in %02d:%02d:%02d (LBA=%d)\n", btoi(loc->minute), btoi(loc->second), btoi(loc->sector), CdPosToInt(loc)); i = CdPosToInt(loc)-17; - _sdk_log("psxcd: CdLoadSession(): Session starting at LBA=%d\n", i); + _sdk_log("CdLoadSession(): Session starting at LBA=%d\n", i); _cd_media_changed = 1; diff --git a/libpsn00b/psxcd/psxcd.c b/libpsn00b/psxcd/psxcd.c index b914b5e..9392d30 100644 --- a/libpsn00b/psxcd/psxcd.c +++ b/libpsn00b/psxcd/psxcd.c @@ -1,8 +1,8 @@ #include <stdint.h> +#include <assert.h> #include <psxgpu.h> -#include <psxetc.h> #include <psxapi.h> -#include "psxcd.h" +#include <psxcd.h> #define READ_TIMEOUT 600 // 10 seconds for NTSC @@ -39,9 +39,9 @@ int CdInit(void) { if(CdSync(0, 0) != CdlDiskError) { CdControl(CdlDemute, 0, 0); - _sdk_log("psxcd: setup done\n"); + _sdk_log("setup done\n"); } else { - _sdk_log("psxcd: setup error, bad disc/drive or no disc inserted\n"); + _sdk_log("setup error, bad disc/drive or no disc inserted\n"); } return 1; @@ -208,7 +208,7 @@ CdlLOC *CdIntToPos(int i, CdlLOC *p) { } -int CdPosToInt(CdlLOC *p) +int CdPosToInt(const CdlLOC *p) { return ((75*(btoi(p->minute)*60))+(75*btoi(p->second))+btoi(p->sector))-150; } @@ -305,7 +305,7 @@ static void CdDoRetry() { int cb; - _sdk_log("psxcd: retrying read...\n"); + _sdk_log("retrying read...\n"); // Stop reading CdControl(CdlPause, 0, 0); diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c index b85a7df..ccf7a7c 100644 --- a/libpsn00b/psxetc/dl.c +++ b/libpsn00b/psxetc/dl.c @@ -1,6 +1,6 @@ /* * PSn00bSDK dynamic linker - * (C) 2021 spicyjpeg - MPL licensed + * (C) 2021-2022 spicyjpeg - MPL licensed * * The bulk of this code is MIPS-specific but not PS1-specific, so the whole * dynamic linker could be ported to other MIPS platforms that do not have one @@ -23,23 +23,21 @@ * of entries */ +#undef SDK_LIBRARY_NAME +#define SDK_LIBRARY_NAME "psxetc/dl" + #include <stdint.h> +#include <stddef.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <ctype.h> +#include <assert.h> #include <elf.h> #include <dlfcn.h> #include <string.h> -#include <psxetc.h> #include <psxapi.h> -/* Compile options */ - -// Comment before building to disable functions that rely on BIOS file APIs, -// i.e. DL_LoadSymbolMapFromFile() and DL_LoadDLLFromFile(). -// FIXME: those seem to be broken currently, and shouldn't be used anyway -//#define USE_FILE_API - /* Private types */ typedef struct { @@ -48,17 +46,15 @@ typedef struct { } MapEntry; typedef struct { - uint32_t nbucket; - uint32_t nchain; + int nbucket, nchain, index; MapEntry *entries; uint32_t *bucket; uint32_t *chain; } SymbolMap; -/* Data */ +/* Internal globals */ -static DL_Error _error_code = RTLD_E_NONE; static SymbolMap _symbol_map; // Accessed by _dl_resolve_helper, stores the pointer to the current resolver @@ -67,11 +63,6 @@ void *(*_dl_resolve_callback)(DLL *, const char *) = 0; /* Private utilities */ -#define _ERROR(code, ret) { \ - _error_code = code; \ - return ret; \ -} - void _dl_resolve_wrapper(void); // Called by _dl_resolve_wrapper() (which is in turn called by GCC stubs) to @@ -79,29 +70,28 @@ void _dl_resolve_wrapper(void); void *_dl_resolve_helper(DLL *dll, uint32_t index) { Elf32_Sym *sym = &(dll->symtab[index]); const char *_name = &(dll->strtab[sym->st_name]); - void *address; + void *addr; if (_dl_resolve_callback) - address = _dl_resolve_callback(dll, _name); + addr = _dl_resolve_callback(dll, _name); else - address = DL_GetSymbolByName(_name); + addr = DL_GetMapSymbol(_name); - if (!address) { - _sdk_log("psxetc: FATAL! can't resolve %s, locking up\n", _name); - while (1) - __asm__ volatile("nop"); + if (!addr) { + _sdk_log("FATAL! can't resolve %s, aborting\n", _name); + abort(); } // Patch the GOT entry to "cache" the resolved address. This can probably // be implemented in a faster way, but this thing is already too complex. - for (uint32_t i = 0; i < dll->got_length; i++) { + for (int i = 0; i < dll->got_length; i++) { if (dll->got[2 + i] == (uint32_t) sym->st_value) { - dll->got[2 + i] = (uint32_t) address; + dll->got[2 + i] = (uint32_t) addr; break; } } - return address; + return addr; } // Implementation of the weird obscure hashing function used in the ELF .hash @@ -124,142 +114,121 @@ static uint32_t _elf_hash(const char *str) { return value; } -#ifdef USE_FILE_API -static uint8_t *_dl_load_file(const char *filename, size_t *size_output) { - int32_t fd = open(filename, 1); - if (fd < 0) { - _sdk_log("psxetc: can't open %s, error = %d\n", filename, fd); - _ERROR(RTLD_E_FILE_OPEN, 0); - } +/* Symbol map loading/introspection API */ - // Extract file size from the file's associated control block. - // https://problemkaputt.de/psx-spx.htm#biosmemorymap - FCB *fcb = (FCB *) *((FCB **) 0x80000140); - size_t size = fcb[fd].filesize; +int DL_InitSymbolMap(int num_entries) { + if (_symbol_map.entries) + DL_UnloadSymbolMap(); - uint8_t *buffer = malloc(size); - if (!buffer) { - _sdk_log("psxetc: unable to allocate %d bytes for %s\n", size, filename); - _ERROR(RTLD_E_FILE_ALLOC, 0); - } + // TODO: find a way to calculate the optimal number of hash table "buckets" + // in order to minimize hash table size + _symbol_map.nbucket = num_entries; + _symbol_map.nchain = num_entries; + _symbol_map.index = 0; + _sdk_log( + "allocating nbucket = %d, nchain = %d\n", + _symbol_map.nbucket, num_entries + ); - //_sdk_log("psxetc: loading %s (%d bytes)..", filename, size); + _symbol_map.entries = malloc(sizeof(MapEntry) * num_entries); + _symbol_map.bucket = malloc(sizeof(uint32_t) * num_entries); + _symbol_map.chain = malloc(sizeof(uint32_t) * num_entries); - for (uint32_t offset = 0; offset < size; ) { - int32_t length = read(fd, &(buffer[offset]), 0x800); + if (!_symbol_map.entries || !_symbol_map.bucket || !_symbol_map.chain) { + _sdk_log("unable to allocate symbol map table\n"); + return -1; + } - if (length <= 0) { - close(fd); - free(buffer); + memset(_symbol_map.bucket, 0xff, sizeof(uint32_t) * num_entries); + memset(_symbol_map.chain, 0xff, sizeof(uint32_t) * num_entries); - _sdk_log("failed, error = %d\n", length); - _ERROR(RTLD_E_FILE_READ, 0); - } + return 0; +} - //_sdk_log("."); - offset += length; - } +void DL_UnloadSymbolMap(void) { + if (!_symbol_map.entries) + return; - close(fd); - _sdk_log(" done\n"); + free(_symbol_map.entries); + free(_symbol_map.bucket); + free(_symbol_map.chain); - if (size_output) - *size_output = size; - return buffer; + _symbol_map.entries = 0; + _symbol_map.bucket = 0; + _symbol_map.chain = 0; } -#endif -/* Symbol map loading/parsing API */ +void DL_AddMapSymbol(const char *name, void *ptr) { + uint32_t hash = _elf_hash(name); + int index = _symbol_map.index; + _symbol_map.index = index + 1; + + MapEntry *entry = &(_symbol_map.entries[index]); + entry->hash = hash; + entry->ptr = ptr; -int32_t DL_ParseSymbolMap(const char *ptr, size_t size) { - DL_UnloadSymbolMap(); + // Append a reference to the entry to the hash table's chain. + uint32_t *hash_entry = &(_symbol_map.bucket[hash % _symbol_map.nbucket]); + while (*hash_entry != 0xffffffff) + hash_entry = &(_symbol_map.chain[*hash_entry]); + + *hash_entry = index; +} + +int DL_ParseSymbolMap(const char *ptr, size_t size) { + int entries = 0; // Perform a quick scan over the entire map text and count the number of // newlines. This allows us to (over)estimate the number of entries and - // allocate a sufficiently large hash/entry table. - uint32_t entries = 0; - for (uint32_t pos = 0; pos < size; pos++) { + // allocate a sufficiently large hash table. + for (int pos = 0; pos < size; pos++) { if (ptr[pos] == '\n') entries++; } - // TODO: find a way to calculate the optimal number of hash table "buckets" - // in order to minimize hash table size - _symbol_map.nbucket = entries; - _symbol_map.nchain = entries; - _sdk_log( - "psxetc: allocating nbucket = %d, nchain = %d\n", - _symbol_map.nbucket, - entries - ); + int err = DL_InitSymbolMap(entries); + if (err) + return err; - // Allocate an entry table to store parsed symbols in, and an associated - // hash table (same format as .hash section, with 8-byte header). - _symbol_map.entries = malloc(sizeof(MapEntry) * entries); - _symbol_map.bucket = malloc(sizeof(uint32_t) * _symbol_map.nbucket); - _symbol_map.chain = malloc(sizeof(uint32_t) * entries); + // Go again through the symbol map and fill in the hash table by calling + // DL_AddMapSymbol() for each valid entry. + entries = 0; - if (!_symbol_map.entries || !_symbol_map.bucket || !_symbol_map.chain) { - _sdk_log("psxetc: unable to allocate symbol map table\n"); - _ERROR(RTLD_E_MAP_ALLOC, -1); - } - - for (uint32_t i = 0; i < _symbol_map.nbucket; i++) - _symbol_map.bucket[i] = 0xffffffff; - for (uint32_t i = 0; i < entries; i++) - _symbol_map.chain[i] = 0xffffffff; - - // Go again through the symbol map and fill in the hash table. - uint32_t index = 0; - for (uint32_t pos = 0; (pos < size) && ptr[pos]; pos++) { - char name[64]; - char type_string[2]; - uint64_t address64; + for (int pos = 0; (pos < size) && ptr[pos]; pos++) { + uint64_t full_addr; + char name[64], type_string[4]; size_t _size; // e.g. "main T ffffffff80000000 100 ...\n" - int32_t parsed = sscanf( + int parsed = sscanf( &(ptr[pos]), "%63s %1s %Lx %x", name, type_string, - &address64, + &full_addr, &_size // Optional, unused (yet) ); if (parsed >= 3) { // Drop the upper 32 bits of the address (for some reason MIPS nm - // insists on printing 64-bit addresses... wtf) and normalize the - // type letter to upper case, then check if the entry is valid and - // non-null. - void *address = (void *) address64; - char _type = toupper(type_string[0]); - uint32_t hash = _elf_hash(name); - uint32_t hash_mod = hash % _symbol_map.nbucket; - - if (address && ( + // insists on printing 64-bit addresses... wtf) and check if the + // entry is valid and non-null. + void *addr = (void *) ((uint32_t) full_addr); + char _type = toupper(type_string[0]); + + if (addr && ( (_type == 'T') || // .text (_type == 'R') || // .rodata (_type == 'D') || // .data (_type == 'B') // .bss )) { //_sdk_log( - //"psxetc: map sym: %08x,%08x [%c %s]\n", - //address, _size, _type, name + //"map sym: %08x,%08x [%c %s]\n", + //addr, _size, _type, name //); - MapEntry *entry = &(_symbol_map.entries[index]); - entry->hash = hash; - entry->ptr = address; - - // Append a reference to the entry to the hash table's chain - // for the current hash_mod. I can't explain this properly. - uint32_t *hash_entry = &(_symbol_map.bucket[hash_mod]); - while (*hash_entry != 0xffffffff) - hash_entry = &(_symbol_map.chain[*hash_entry]); - - *hash_entry = index; - index++; + DL_AddMapSymbol(name, addr); + entries++; } } @@ -269,92 +238,61 @@ int32_t DL_ParseSymbolMap(const char *ptr, size_t size) { pos++; } - _sdk_log("psxetc: parsed %d symbols\n", entries); - if (!entries) - _ERROR(RTLD_E_NO_SYMBOLS, -1); - - return entries; -} - -#ifdef USE_FILE_API -int32_t DL_LoadSymbolMapFromFile(const char *filename) { - size_t size; - char *ptr = _dl_load_file(filename, &size); - if (!ptr) - return -1; - - int32_t entries = DL_ParseSymbolMap(ptr, size); - free(ptr); - + _sdk_log("parsed %d symbols\n", entries); return entries; } -#endif - -void DL_UnloadSymbolMap(void) { - if (!_symbol_map.entries) - return; - free(_symbol_map.entries); - free(_symbol_map.bucket); - free(_symbol_map.chain); - _symbol_map.entries = 0; -} - -void *DL_GetSymbolByName(const char *name) { +void *DL_GetMapSymbol(const char *name) { if (!_symbol_map.entries) { - _sdk_log("psxetc: attempted lookup with no map loaded\n"); - _ERROR(RTLD_E_NO_MAP, 0); + _sdk_log("DL_GetMapSymbol() with no map loaded\n"); + return 0; } - // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html - uint32_t hash = _elf_hash(name); - uint32_t hash_mod = hash % _symbol_map.nbucket; - // Go through the hash table's chain until the symbol hash matches the one // calculated. - for (uint32_t i = _symbol_map.bucket[hash_mod]; i != 0xffffffff;) { + // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html + uint32_t hash = _elf_hash(name); + + for (int i = _symbol_map.bucket[hash % _symbol_map.nbucket]; i != 0xffffffff;) { if (i >= _symbol_map.nchain) { _sdk_log( - "psxetc: GetSymbolByName() index out of bounds (%d >= %d)\n", + "DL_GetMapSymbol() index out of bounds (%d >= %d)\n", i, _symbol_map.nchain ); - _ERROR(RTLD_E_HASH_LOOKUP, 0); + return 0; } MapEntry *entry = &(_symbol_map.entries[i]); if (hash == entry->hash) { - //_sdk_log("psxetc: map lookup [%s = %08x]\n", name, entry->ptr); + //_sdk_log("map lookup [%s = %08x]\n", name, entry->ptr); return entry->ptr; } i = _symbol_map.chain[i]; } - _sdk_log("psxetc: map lookup [%s not found]\n", name); - _ERROR(RTLD_E_MAP_SYMBOL, 0); + _sdk_log("map lookup [%s not found]\n", name); + return 0; } -void DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) { +void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) { + void *old_callback = _dl_resolve_callback; _dl_resolve_callback = callback; + + return old_callback; } /* Library loading and linking API */ -DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { - if (!ptr) - _ERROR(RTLD_E_DLL_NULL, 0); - - DLL *dll = malloc(sizeof(DLL)); - if (!dll) { - _sdk_log("psxetc: unable to allocate DLL struct\n"); - _ERROR(RTLD_E_DLL_ALLOC, 0); - } +DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) { + if (!dll || !ptr) + return 0; dll->ptr = ptr; - dll->malloc_ptr = (mode & RTLD_FREE_ON_DESTROY) ? ptr : 0; + dll->malloc_ptr = (mode & DL_FREE_ON_DESTROY) ? ptr : 0; dll->size = size; - _sdk_log("psxetc: initializing DLL at %08x\n", ptr); + _sdk_log("initializing DLL at %08x\n", ptr); // Interpret the key-value pairs in the .dynamic section to obtain info // about all the other sections. The pairs are null-terminated, which makes @@ -363,52 +301,35 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { uint32_t first_got_sym = 0; for (Elf32_Dyn *dyn = (Elf32_Dyn *) ptr; dyn->d_tag; dyn++) { - //_sdk_log("psxetc: .dynamic %08x=%08x ", dyn->d_tag, dyn->d_un.d_val); + //_sdk_log(".dynamic %08x=%08x ", dyn->d_tag, dyn->d_un.d_val); switch (dyn->d_tag) { // Offset of .got section case DT_PLTGOT: - //_sdk_log("[PLTGOT]\n"); - dll->got = (void *) (ptr + dyn->d_un.d_val); break; // Offset of .hash section case DT_HASH: - //_sdk_log("[HASH]\n"); - dll->hash = (void *) (ptr + dyn->d_un.d_val); break; // Offset of .dynstr (NOT .strtab) section case DT_STRTAB: - //_sdk_log("[STRTAB]\n"); - dll->strtab = (void *) (ptr + dyn->d_un.d_val); break; // Offset of .dynsym (NOT .symtab) section case DT_SYMTAB: - //_sdk_log("[SYMTAB]\n"); - dll->symtab = (void *) (ptr + dyn->d_un.d_val); break; - // Length of .dynstr section - //case DT_STRSZ: - //_sdk_log("[STRSZ]\n"); - //break; - // Length of each .dynsym entry case DT_SYMENT: - //_sdk_log("[SYMENT]\n"); - // Only 16-byte symbol table entries are supported. if (dyn->d_un.d_val != sizeof(Elf32_Sym)) { - free(dll); - - _sdk_log("psxetc: invalid DLL symtab entry size %d\n", dyn->d_un.d_val); - _ERROR(RTLD_E_DLL_FORMAT, 0); + _sdk_log("invalid DLL symtab entry size %d\n", dyn->d_un.d_val); + return 0; } break; @@ -418,73 +339,44 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { // Versions other than 1 are unsupported (do they even exist?). if (dyn->d_un.d_val != 1) { - free(dll); - - _sdk_log("psxetc: invalid DLL version %d\n", dyn->d_un.d_val); - _ERROR(RTLD_E_DLL_FORMAT, 0); + _sdk_log("invalid DLL version %d\n", dyn->d_un.d_val); + return 0; } break; // DLL/ABI flags case DT_MIPS_FLAGS: - //_sdk_log("[MIPS_FLAGS]\n"); - // Shortcut pointers (whatever they are) are not supported. if (dyn->d_un.d_val & RHF_QUICKSTART) { - free(dll); - - _sdk_log("psxetc: invalid DLL flags\n"); - _ERROR(RTLD_E_DLL_FORMAT, 0); + _sdk_log("invalid DLL flags\n"); + return 0; } break; // Number of local (not to resolve) GOT entries case DT_MIPS_LOCAL_GOTNO: - //_sdk_log("[MIPS_LOCAL_GOTNO]\n"); - local_got_len = dyn->d_un.d_val; break; // Base address DLL was compiled for case DT_MIPS_BASE_ADDRESS: - //_sdk_log("[MIPS_BASE_ADDRESS]\n"); - // Base addresses other than zero are not supported. It would // be easy enough to support them, but why? if (dyn->d_un.d_val) { - free(dll); - - _sdk_log("psxetc: invalid DLL base address %08x\n", dyn->d_un.d_val); - _ERROR(RTLD_E_DLL_FORMAT, 0); + _sdk_log("invalid DLL base address %08x\n", dyn->d_un.d_val); + return 0; } break; // Number of symbol table entries case DT_MIPS_SYMTABNO: - //_sdk_log("[MIPS_SYMTABNO]\n"); - dll->symbol_count = dyn->d_un.d_val; break; - // Index of first unresolved symbol table entry - //case DT_MIPS_UNREFEXTNO: - //_sdk_log("[MIPS_UNREFEXTNO]\n"); - //break; - // Index of first symbol table entry which has a matching GOT entry case DT_MIPS_GOTSYM: - //_sdk_log("[MIPS_GOTSYM]\n"); - first_got_sym = dyn->d_un.d_val; break; - - // Number of pages the GOT is split into (does not apply to PS1) - //case DT_MIPS_HIPAGENO: - //_sdk_log("[MIPS_HIPAGENO]\n"); - //break; - - //default: - //_sdk_log("[ignored]\n"); } } @@ -497,7 +389,7 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { dll->got_length = local_got_len + (dll->symbol_count - first_got_sym) - 2; _sdk_log( - "psxetc: %d symbols, %d GOT entries\n", + "%d symbols, %d GOT entries\n", dll->symbol_count, dll->got_length ); @@ -510,14 +402,14 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { dll->got[0] = (uint32_t) &_dl_resolve_wrapper; dll->got[1] = (uint32_t) dll; - for (uint32_t i = 0; i < dll->got_length; i++) + for (int i = 0; i < dll->got_length; i++) dll->got[2 + i] += (uint32_t) ptr; // Fix addresses in the symbol table. // TODO: clean this shit up uint32_t got_offset = first_got_sym; - for (uint32_t i = 0; i < dll->symbol_count; i++) { + for (int i = 0; i < dll->symbol_count; i++) { Elf32_Sym *sym = &(dll->symtab[i]); const char *_name = &(dll->strtab[sym->st_name]); @@ -526,16 +418,16 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { sym->st_value += (uint32_t) ptr; //_sdk_log( - //"psxetc: DLL sym: %08x,%08x [%s]\n", + //"DLL sym: %08x,%08x [%s]\n", //sym->st_value, sym->st_size, _name //); - // If RTLD_NOW was passed, resolve GOT entries ahead of time by + // If DL_NOW was passed, resolve GOT entries ahead of time by // cross-referencing them with the symbol table. - if (!(mode & RTLD_NOW)) + if (!(mode & DL_NOW)) continue; - for (uint32_t j = got_offset; j < dll->got_length; j++) { + for (int j = got_offset; j < dll->got_length; j++) { if (dll->got[2 + j] != (uint32_t) sym->st_value) continue; @@ -550,10 +442,8 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { )) { dll->got[2 + j] = (uint32_t) _dl_resolve_callback(dll, _name); - if (!dll->got[2 + j]) { - free(dll); - _ERROR(RTLD_E_MAP_SYMBOL, 0); - } + if (!dll->got[2 + j]) + return 0; } break; @@ -570,7 +460,7 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { // DLL itself. const uint32_t *ctor_list = DL_GetDLLSymbol(dll, "__CTOR_LIST__"); if (ctor_list) { - for (uint32_t i = ((uint32_t) ctor_list[0]); i >= 1; i--) { + for (int i = ((int) ctor_list[0]); i >= 1; i--) { void (*ctor)(void) = (void (*)(void)) ctor_list[i]; DL_PRE_CALL(ctor); ctor(); @@ -580,84 +470,60 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) { return dll; } -#ifdef USE_FILE_API -DLL *DL_LoadDLLFromFile(const char *filename, DL_ResolveMode mode) { - size_t size; - char *ptr = _dl_load_file(filename, &size); - if (!ptr) - return 0; - - DLL *dll = DL_CreateDLL(ptr, size, mode | RTLD_FREE_ON_DESTROY); - if (!dll) - free(ptr); - - return dll; -} -#endif - void DL_DestroyDLL(DLL *dll) { - if (dll == RTLD_DEFAULT) + if (!dll) return; if (dll->ptr) { // Call the DLL's global destructors. const uint32_t *dtor_list = DL_GetDLLSymbol(dll, "__DTOR_LIST__"); if (dtor_list) { - for (uint32_t i = 0; i < ((uint32_t) dtor_list[0]); i++) { + for (int i = 0; i < ((int) dtor_list[0]); i++) { void (*dtor)(void) = (void (*)(void)) dtor_list[i + 1]; DL_PRE_CALL(dtor); dtor(); } } + + dll->ptr = 0; } - // If the DLL is associated to a buffer allocated by DL_LoadDLLFromFile(), - // free that buffer. - if (dll->malloc_ptr) + // If the DLL is associated to a buffer, free that buffer. + if (dll->malloc_ptr) { free(dll->malloc_ptr); - - free(dll); + dll->malloc_ptr = 0; + } } void *DL_GetDLLSymbol(const DLL *dll, const char *name) { - if (dll == RTLD_DEFAULT) - return DL_GetSymbolByName(name); - //return _dl_resolve_callback(RTLD_DEFAULT, name); + if (!dll) + return DL_GetMapSymbol(name); + //return _dl_resolve_callback(0, name); - // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html uint32_t nbucket = dll->hash[0]; uint32_t nchain = dll->hash[1]; const uint32_t *bucket = &(dll->hash[2]); const uint32_t *chain = &(dll->hash[2 + nbucket]); - uint32_t hash_mod = _elf_hash(name) % nbucket; - // Go through the hash table's chain until the symbol name matches the one // provided. - for (uint32_t i = bucket[hash_mod]; i != 0xffffffff;) { + for (int i = bucket[_elf_hash(name) % nbucket]; i != 0xffffffff;) { if (i >= nchain) { - _sdk_log("psxetc: DL_GetDLLSymbol() index out of bounds (%d >= %d)\n", i, nchain); - _ERROR(RTLD_E_HASH_LOOKUP, 0); + _sdk_log("DL_GetDLLSymbol() index out of bounds (%d >= %d)\n", i, nchain); + return 0; } Elf32_Sym *sym = &(dll->symtab[i]); const char *_name = &(dll->strtab[sym->st_name]); if (!strcmp(name, _name)) { - //_sdk_log("psxetc: DLL lookup [%s = %08x]\n", name, sym->st_value); + //_sdk_log("DLL lookup [%s = %08x]\n", name, sym->st_value); return sym->st_value; } i = chain[i]; } - _sdk_log("psxetc: DLL lookup [%s not found]\n", name); - _ERROR(RTLD_E_DLL_SYMBOL, 0); -} - -DL_Error DL_GetLastError(void) { - DL_Error last = _error_code; - _error_code = RTLD_E_NONE; - - return last; + _sdk_log("DLL lookup [%s not found]\n", name); + return 0; } diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c index cc9d12c..0d926c4 100644 --- a/libpsn00b/psxetc/interrupts.c +++ b/libpsn00b/psxetc/interrupts.c @@ -98,7 +98,7 @@ static void _global_dma_handler(void) { /* IRQ and DMA handler API */ -void *InterruptCallback(int irq, void (*func)(void)) { +void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) { if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) return 0; @@ -115,14 +115,14 @@ void *InterruptCallback(int irq, void (*func)(void)) { return old_callback; } -void *GetInterruptCallback(int irq) { +void *GetInterruptCallback(IRQ_Channel irq) { if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) return 0; return _irq_handlers[irq]; } -void *DMACallback(int dma, void (*func)(void)) { +void *DMACallback(DMA_Channel dma, void (*func)(void)) { if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) return 0; @@ -150,7 +150,7 @@ void *DMACallback(int dma, void (*func)(void)) { return old_callback; } -void *GetDMACallback(int dma) { +void *GetDMACallback(DMA_Channel dma) { if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) return 0; diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index a262472..9f45f10 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -4,6 +4,7 @@ */ #include <stdint.h> +#include <assert.h> #include <psxetc.h> #include <psxapi.h> #include <psxgpu.h> @@ -37,6 +38,9 @@ static volatile uint16_t _last_hblank; /* Private interrupt handlers */ +#define _ENTER_CRITICAL() uint16_t mask = IRQ_MASK; IRQ_MASK = 0; +#define _EXIT_CRITICAL() IRQ_MASK = mask; + static void _vblank_handler(void) { _vblank_counter++; @@ -50,9 +54,10 @@ static void _gpu_dma_handler(void) { __asm__ volatile(""); if (--_queue_length) { - volatile QueueEntry *entry = &_draw_queue[_queue_head++]; - _queue_head %= QUEUE_LENGTH; + int head = _queue_head; + _queue_head = (head + 1) % QUEUE_LENGTH; + volatile QueueEntry *entry = &_draw_queue[head]; entry->func(entry->arg1, entry->arg2, entry->arg3); } else { GPU_GP1 = 0x04000000; // Disable DMA request @@ -75,7 +80,7 @@ void ResetGraph(int mode) { _gpu_video_mode = (GPU_GP1 >> 20) & 1; ExitCriticalSection(); - _sdk_log("psxgpu: setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC"); + _sdk_log("setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC"); } if (mode == 3) { @@ -113,8 +118,7 @@ static void _default_vsync_halt(void) { return; } - _sdk_log("psxgpu: VSync() timeout\n"); - _sdk_dump_log(); + _sdk_log("VSync() timeout\n"); ChangeClearPAD(0); ChangeClearRCnt(3, 0); } @@ -130,7 +134,6 @@ int VSync(int mode) { // Wait for at least one vertical blank event to occur. do { - _sdk_dump_log(); _vsync_halt_func(); // If interlaced mode is enabled, wait until the GPU starts displaying @@ -146,19 +149,22 @@ int VSync(int mode) { } void *VSyncHaltFunction(void (*func)(void)) { + //_ENTER_CRITICAL(); + void *old_callback = _vsync_halt_func; _vsync_halt_func = func; + //_EXIT_CRITICAL(); return old_callback; } void *VSyncCallback(void (*func)(void)) { - EnterCriticalSection(); + _ENTER_CRITICAL(); void *old_callback = _vsync_callback; _vsync_callback = func; - ExitCriticalSection(); + _EXIT_CRITICAL(); return old_callback; } @@ -177,37 +183,36 @@ int EnqueueDrawOp( // to checking if DMA is busy; disabling them afterwards would create a // race condition where the DMA transfer could end while interrupts are // being disabled. Interrupts are disabled through the IRQ_MASK register - // rather than by calling EnterCriticalSection() for performance reasons. - uint16_t mask = IRQ_MASK; - IRQ_MASK = 0; - - if (_queue_length) { - if (_queue_length >= QUEUE_LENGTH) { - IRQ_MASK = mask; - _sdk_log("psxgpu: draw queue overflow, dropping commands\n"); - return -1; - } + // rather than via syscalls for performance reasons. + _ENTER_CRITICAL(); + int length = _queue_length; - int length = _queue_length; - _queue_length = length + 1; + if (!length) { + _queue_length = 1; + _EXIT_CRITICAL(); - volatile QueueEntry *entry = &_draw_queue[_queue_tail++]; - _queue_tail %= QUEUE_LENGTH; - - entry->func = func; - entry->arg1 = arg1; - entry->arg2 = arg2; - entry->arg3 = arg3; + func(arg1, arg2, arg3); + return 0; + } + if (length >= QUEUE_LENGTH) { + _EXIT_CRITICAL(); - IRQ_MASK = mask; - return length; + _sdk_log("draw queue overflow, dropping commands\n"); + return -1; } - _queue_length = 1; + int tail = _queue_tail; + _queue_tail = (tail + 1) % QUEUE_LENGTH; + _queue_length = length + 1; + + volatile QueueEntry *entry = &_draw_queue[tail]; + entry->func = func; + entry->arg1 = arg1; + entry->arg2 = arg2; + entry->arg3 = arg3; - IRQ_MASK = mask; - func(arg1, arg2, arg3); - return 0; + _EXIT_CRITICAL(); + return length; } int DrawSync(int mode) { @@ -230,20 +235,19 @@ int DrawSync(int mode) { while (!(GPU_GP1 & (1 << 26))) __asm__ volatile(""); } else { - _sdk_log("psxgpu: DrawSync() timeout\n"); - _sdk_dump_log(); + _sdk_log("DrawSync() timeout\n"); } return _queue_length; } void *DrawSyncCallback(void (*func)(void)) { - EnterCriticalSection(); + _ENTER_CRITICAL(); void *old_callback = _drawsync_callback; _drawsync_callback = func; - ExitCriticalSection(); + _EXIT_CRITICAL(); return old_callback; } diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index 968dde5..bbdb7c8 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -4,7 +4,7 @@ */ #include <stdint.h> -#include <psxetc.h> +#include <assert.h> #include <psxgpu.h> #include <hwregs_c.h> @@ -15,11 +15,11 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { size_t length = rect->w * rect->h; if (length % 2) - _sdk_log("psxgpu: can't transfer an odd number of pixels\n"); + _sdk_log("can't transfer an odd number of pixels\n"); length /= 2; if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("psxgpu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } diff --git a/libpsn00b/psxgte/applymatrixlv.s b/libpsn00b/psxgte/applymatrixlv.s deleted file mode 100644 index 3180d0f..0000000 --- a/libpsn00b/psxgte/applymatrixlv.s +++ /dev/null @@ -1,40 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global ApplyMatrixLV -.type ApplyMatrixLV, @function -ApplyMatrixLV: - - # Load matrix to GTE - lw $t0, 0($a0) - lw $t1, 4($a0) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a0) - lw $t1, 12($a0) - lhu $t2, 16($a0) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lw $t0, 0($a1) - lw $t1, 4($a1) - mtc2 $t0, C2_IR1 - lw $t0, 8($a1) - mtc2 $t1, C2_IR2 - mtc2 $t0, C2_IR3 - - nMVMVA(1, 0, 3, 3, 0) - - swc2 C2_IR1, 0($a2) - swc2 C2_IR2, 4($a2) - swc2 C2_IR3, 8($a2) - - jr $ra - move $v0, $a2 -
\ No newline at end of file diff --git a/libpsn00b/psxgte/compmatrixlv.s b/libpsn00b/psxgte/compmatrixlv.s deleted file mode 100644 index 2908eb9..0000000 --- a/libpsn00b/psxgte/compmatrixlv.s +++ /dev/null @@ -1,100 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.set MATRIX_r11r12, 0 -.set MATRIX_r13r21, 4 -.set MATRIX_r22r23, 8 -.set MATRIX_r31r32, 12 -.set MATRIX_r33, 16 -.set MATRIX_trx, 20 -.set MATRIX_try, 24 -.set MATRIX_trz, 28 - - -.global CompMatrixLV -.type CompMatrixLV, @function -CompMatrixLV: - - # Load matrix v0 to GTE - lw $t0, MATRIX_r11r12($a0) - lw $t1, MATRIX_r13r21($a0) - ctc2 $t0, C2_R11R12 - ctc2 $t1, C2_R13R21 - lw $t0, MATRIX_r22r23($a0) - lw $t1, MATRIX_r31r32($a0) - lhu $t2, MATRIX_r33($a0) - ctc2 $t0, C2_R22R23 - lw $t0, MATRIX_trx($a0) - ctc2 $t1, C2_R31R32 - lw $t1, MATRIX_try($a0) - ctc2 $t2, C2_R33 - lw $t2, MATRIX_trz($a0) - ctc2 $t0, C2_TRX - ctc2 $t1, C2_TRY - ctc2 $t2, C2_TRZ - - lw $t0, MATRIX_trx($a1) - lw $t1, MATRIX_try($a1) - mtc2 $t0, C2_IR1 - lw $t0, MATRIX_trz($a1) - mtc2 $t1, C2_IR2 - mtc2 $t0, C2_IR3 - - nMVMVA(1, 0, 3, 0, 0) - - swc2 C2_IR1, MATRIX_trx($a2) - swc2 C2_IR2, MATRIX_try($a2) - swc2 C2_IR3, MATRIX_trz($a2) - - lhu $t1, 2*(0+(3*1))($a1) # Load values for first - lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a1) # Load values for second - lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a2) - sh $t0, 2*(0+(3*2))($a2) - - lhu $t1, 2*(2+(3*1))($a1) # Load values for third - lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a2) - sh $t0, 2*(1+(3*2))($a2) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a2) - sh $t0, 2*(2+(3*2))($a2) - - jr $ra - move $v0, $a2
\ No newline at end of file diff --git a/libpsn00b/psxgte/hirotmatrix.c b/libpsn00b/psxgte/hirotmatrix.c deleted file mode 100644 index 56516b0..0000000 --- a/libpsn00b/psxgte/hirotmatrix.c +++ /dev/null @@ -1,35 +0,0 @@ -#include <psxgte.h> - -MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) { - - short s[3],c[3]; - MATRIX tm[3]; - - s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz); - c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz); - - // mX - m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0; - m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0]; - m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0]; - - // mY - tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1]; - tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0; - tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1]; - - // mZ - tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0; - tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0; - tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE; - - PushMatrix(); - - MulMatrix0( m, &tm[0], &tm[2] ); - MulMatrix0( &tm[2], &tm[1], m ); - - PopMatrix(); - - return m; - -} diff --git a/libpsn00b/psxgte/hisin.c b/libpsn00b/psxgte/hisin.c deleted file mode 100644 index 68d5d28..0000000 --- a/libpsn00b/psxgte/hisin.c +++ /dev/null @@ -1,33 +0,0 @@ -/* Based on isin_S4 implementation from coranac: - * http://www.coranac.com/2009/07/sines/ - * - */ - -#define qN 15 -#define qA 12 -#define B 19900 -#define C 3516 - -int hisin(int x) { - - int c, x2, y; - - c= x<<(30-qN); // Semi-circle info into carry. - x -= 1<<qN; // sine -> cosine calc - - x= x<<(31-qN); // Mask with PI - x= x>>(31-qN); // Note: SIGNED shift! (to qN) - x= x*x>>(2*qN-14); // x=x^2 To Q14 - - y= B - (x*C>>14); // B - x^2*C - y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C) - - return c>=0 ? y : -y; - -} - -int hicos(int x) { - - return hisin( x+32768 ); - -} diff --git a/libpsn00b/psxgte/initgeom.s b/libpsn00b/psxgte/initgeom.s index d004ecc..6d1e36a 100644 --- a/libpsn00b/psxgte/initgeom.s +++ b/libpsn00b/psxgte/initgeom.s @@ -2,9 +2,7 @@ .include "gtereg.inc" -.section .text - - +.section .text.InitGeom .global InitGeom .type InitGeom, @function InitGeom: diff --git a/libpsn00b/psxgte/isin.c b/libpsn00b/psxgte/isin.c index 79e2970..a0397bd 100644 --- a/libpsn00b/psxgte/isin.c +++ b/libpsn00b/psxgte/isin.c @@ -1,34 +1,46 @@ -/* Based on isin_S4 implementation from coranac: - * http://www.coranac.com/2009/07/sines/ +/* + * PSn00bSDK (incomplete) trigonometry library + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed * + * Based on isin_S4 implementation from coranac: + * https://www.coranac.com/2009/07/sines */ -#define qN 10 -#define qA 12 -#define B 19900 -#define C 3516 +#define qN_l 10 +#define qN_h 15 +#define qA 12 +#define B 19900 +#define C 3516 -int isin(int x) { - - int c, x2, y; +static inline int _isin(int qN, int x) { + int c, x2, y; - c= x<<(30-qN); // Semi-circle info into carry. - x -= 1<<qN; // sine -> cosine calc + c = x << (30 - qN); // Semi-circle info into carry. + x -= 1 << qN; // sine -> cosine calc - x= x<<(31-qN); // Mask with PI - x= x>>(31-qN); // Note: SIGNED shift! (to qN) + x <<= (31 - qN); // Mask with PI + x >>= (31 - qN); // Note: SIGNED shift! (to qN) + x *= x; + x >>= (2 * qN - 14); // x=x^2 To Q14 - x= x*x>>(2*qN-14); // x=x^2 To Q14 + y = B - (x * C >> 14); // B - x^2*C + y = (1 << qA) - (x * y >> 16); // A - x^2*(B-x^2*C) - y= B - (x*C>>14); // B - x^2*C - y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C) - - return c>=0 ? y : -y; + return (c >= 0) ? y : (-y); +} +int isin(int x) { + return _isin(qN_l, x); } int icos(int x) { + return _isin(qN_l, x + (1 << qN_l)); +} - return isin( x+1024 ); +int hisin(int x) { + return _isin(qN_h, x); +} +int hicos(int x) { + return _isin(qN_h, x + (1 << qN_h)); } diff --git a/libpsn00b/psxgte/matrix.c b/libpsn00b/psxgte/matrix.c index b4dea12..805fd1b 100644 --- a/libpsn00b/psxgte/matrix.c +++ b/libpsn00b/psxgte/matrix.c @@ -1,7 +1,6 @@ #include <psxgte.h> MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) { - short s[3],c[3]; MATRIX tm[3]; @@ -24,22 +23,47 @@ MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) { tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE; PushMatrix(); - MulMatrix0( m, &tm[0], &tm[2] ); MulMatrix0( &tm[2], &tm[1], m ); - PopMatrix(); return m; +} + +MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) { + short s[3],c[3]; + MATRIX tm[3]; + + s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz); + c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz); + // mX + m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0; + m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0]; + m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0]; + + // mY + tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1]; + tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0; + tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1]; + + // mZ + tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0; + tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0; + tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE; + + PushMatrix(); + MulMatrix0( m, &tm[0], &tm[2] ); + MulMatrix0( &tm[2], &tm[1], m ); + PopMatrix(); + + return m; } MATRIX *TransMatrix(MATRIX *m, VECTOR *r) { - m->t[0] = r->vx; m->t[1] = r->vy; m->t[2] = r->vz; return m; - } diff --git a/libpsn00b/psxgte/matrix.s b/libpsn00b/psxgte/matrix.s new file mode 100644 index 0000000..9de0ccd --- /dev/null +++ b/libpsn00b/psxgte/matrix.s @@ -0,0 +1,439 @@ +.set noreorder + +.include "gtereg.inc" +.include "inline_s.inc" + +.set MATRIX_r11r12, 0 +.set MATRIX_r13r21, 4 +.set MATRIX_r22r23, 8 +.set MATRIX_r31r32, 12 +.set MATRIX_r33, 16 +.set MATRIX_trx, 20 +.set MATRIX_try, 24 +.set MATRIX_trz, 28 + +.section .text.ApplyMatrixLV +.global ApplyMatrixLV +.type ApplyMatrixLV, @function +ApplyMatrixLV: + # Load matrix to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lw $t0, 0($a1) + lw $t1, 4($a1) + mtc2 $t0, C2_IR1 + lw $t0, 8($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 3, 0) + + swc2 C2_IR1, 0($a2) + swc2 C2_IR2, 4($a2) + swc2 C2_IR3, 8($a2) + + jr $ra + move $v0, $a2 + +.section .text.CompMatrixLV +.global CompMatrixLV +.type CompMatrixLV, @function +CompMatrixLV: + # Load matrix v0 to GTE + lw $t0, MATRIX_r11r12($a0) + lw $t1, MATRIX_r13r21($a0) + ctc2 $t0, C2_R11R12 + ctc2 $t1, C2_R13R21 + lw $t0, MATRIX_r22r23($a0) + lw $t1, MATRIX_r31r32($a0) + lhu $t2, MATRIX_r33($a0) + ctc2 $t0, C2_R22R23 + lw $t0, MATRIX_trx($a0) + ctc2 $t1, C2_R31R32 + lw $t1, MATRIX_try($a0) + ctc2 $t2, C2_R33 + lw $t2, MATRIX_trz($a0) + ctc2 $t0, C2_TRX + ctc2 $t1, C2_TRY + ctc2 $t2, C2_TRZ + + lw $t0, MATRIX_trx($a1) + lw $t1, MATRIX_try($a1) + mtc2 $t0, C2_IR1 + lw $t0, MATRIX_trz($a1) + mtc2 $t1, C2_IR2 + mtc2 $t0, C2_IR3 + + nMVMVA(1, 0, 3, 0, 0) + + swc2 C2_IR1, MATRIX_trx($a2) + swc2 C2_IR2, MATRIX_try($a2) + swc2 C2_IR3, MATRIX_trz($a2) + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 + +.section .text.MulMatrix +.global MulMatrix +.type MulMatrix, @function +MulMatrix: + # Load m1 to GTE + lw $t0, 0($a1) + lw $t1, 4($a1) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a1) + lw $t1, 12($a1) + lhu $t2, 16($a1) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a0) # Load values for first + lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a0) # Load values for second + lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a0) + sh $t0, 2*(0+(3*2))($a0) + + lhu $t1, 2*(2+(3*1))($a0) # Load values for third + lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a0) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a0) + sh $t0, 2*(1+(3*2))($a0) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a0) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a0) + sh $t0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.MulMatrix0 +.global MulMatrix0 +.type MulMatrix0, @function +MulMatrix0: + # Load m1 to GTE + lw $t0, 0($a0) + lw $t1, 4($a0) + ctc2 $t0, $0 + ctc2 $t1, $1 + lw $t0, 8($a0) + lw $t1, 12($a0) + lhu $t2, 16($a0) + ctc2 $t0, $2 + ctc2 $t1, $3 + ctc2 $t2, $4 + + lhu $t1, 2*(0+(3*1))($a1) # Load values for first + lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(0+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + lhu $t1, 2*(1+(3*1))($a1) # Load values for second + lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 + MVMVA(1, 0, 0, 3, 0) # First multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(1+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of first + mfc2 $t1, C2_IR2 + sh $t0, 2*(0+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(0+(3*1))($a2) + sh $t0, 2*(0+(3*2))($a2) + + lhu $t1, 2*(2+(3*1))($a1) # Load values for third + lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 + MVMVA(1, 0, 0, 3, 0) # Second multiply + sll $t1, 16 + or $t0, $t1 + lhu $t1, 2*(2+(3*2))($a1) + mtc2 $t0, C2_VXY0 + mtc2 $t1, C2_VZ0 + + mfc2 $t0, C2_IR1 # Store results of second + mfc2 $t1, C2_IR2 + sh $t0, 2*(1+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(1+(3*1))($a2) + sh $t0, 2*(1+(3*2))($a2) + MVMVA(1, 0, 0, 3, 0) # Third multiply + + mfc2 $t0, C2_IR1 # Store results of third + mfc2 $t1, C2_IR2 + sh $t0, 2*(2+(3*0))($a2) + mfc2 $t0, C2_IR3 + sh $t1, 2*(2+(3*1))($a2) + sh $t0, 2*(2+(3*2))($a2) + + jr $ra + move $v0, $a2 + +.section .text.ScaleMatrix +.global ScaleMatrix +.type ScaleMatrix, @function +ScaleMatrix: + lwc2 C2_IR0, 0($a1) # X + + lh $v0, 2*(0+(3*0))($a0) + lh $v1, 2*(0+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(0+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(0+(3*1))($a0) + sh $v0, 2*(0+(3*2))($a0) + + lwc2 C2_IR0, 4($a1) # Y + + lh $v0, 2*(1+(3*0))($a0) + lh $v1, 2*(1+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(1+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(1+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*1))($a0) + sh $v0, 2*(1+(3*2))($a0) + + lwc2 C2_IR0, 8($a1) # Z + + lh $v0, 2*(2+(3*0))($a0) + lh $v1, 2*(2+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(2+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(2+(3*1))($a0) + sh $v0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.ScaleMatrixL +.global ScaleMatrixL +.type ScaleMatrixL, @function +ScaleMatrixL: + lwc2 C2_IR0, 0($a1) # X + + lh $v0, 2*(0+(3*0))($a0) + lh $v1, 2*(1+(3*0))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*0))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*0))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*0))($a0) + sh $v0, 2*(2+(3*0))($a0) + + lwc2 C2_IR0, 4($a1) # Y + + lh $v0, 2*(0+(3*1))($a0) + lh $v1, 2*(1+(3*1))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*1))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*1))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*1))($a0) + sh $v0, 2*(2+(3*1))($a0) + + lwc2 C2_IR0, 8($a1) # Z + + lh $v0, 2*(0+(3*2))($a0) + lh $v1, 2*(1+(3*2))($a0) + mtc2 $v0, C2_IR1 + lh $v0, 2*(2+(3*2))($a0) + mtc2 $v1, C2_IR2 + mtc2 $v0, C2_IR3 + + nGPF(1) + + mfc2 $v0, C2_IR1 + mfc2 $v1, C2_IR2 + sh $v0, 2*(0+(3*2))($a0) + mfc2 $v0, C2_IR3 + sh $v1, 2*(1+(3*2))($a0) + sh $v0, 2*(2+(3*2))($a0) + + jr $ra + move $v0, $a0 + +.section .text.PushMatrix +.global PushMatrix +.type PushMatrix, @function +PushMatrix: + la $a0, _matrix_stack + cfc2 $v0, C2_R11R12 + cfc2 $v1, C2_R13R21 + sw $v0, 0($a0) + cfc2 $v0, C2_R22R23 + sw $v1, 4($a0) + sw $v0, 8($a0) + cfc2 $v0, C2_R31R32 + cfc2 $v1, C2_R33 + sw $v0, 12($a0) + sw $v1, 16($a0) + cfc2 $v0, C2_TRX + cfc2 $v1, C2_TRY + sw $v0, 20($a0) + cfc2 $v0, C2_TRZ + sw $v1, 24($a0) + jr $ra + sw $v0, 28($a0) + +.section .text.PopMatrix +.global PopMatrix +.type PopMatrix, @function +PopMatrix: + la $a0, _matrix_stack + lw $v0, 0($a0) + lw $v1, 4($a0) + ctc2 $v0, C2_R11R12 + ctc2 $v1, C2_R13R21 + lw $v0, 8($a0) + lw $v1, 12($a0) + ctc2 $v0, C2_R22R23 + lw $v0, 16($a0) + ctc2 $v1, C2_R31R32 + ctc2 $v0, C2_R33 + lw $v0, 20($a0) + lw $v1, 24($a0) + ctc2 $v0, C2_TRX + lw $v0, 28($a0) + ctc2 $v1, C2_TRY + ctc2 $v0, C2_TRZ + jr $ra + nop + +.section .data._matrix_stack +.type _matrix_stack, @object +_matrix_stack: + .word 0, 0, 0, 0, 0, 0, 0, 0 diff --git a/libpsn00b/psxgte/mulmatrix.s b/libpsn00b/psxgte/mulmatrix.s deleted file mode 100644 index 08c79c2..0000000 --- a/libpsn00b/psxgte/mulmatrix.s +++ /dev/null @@ -1,74 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global MulMatrix -.type MulMatrix, @function -MulMatrix: - - # Load m1 to GTE - lw $t0, 0($a1) - lw $t1, 4($a1) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a1) - lw $t1, 12($a1) - lhu $t2, 16($a1) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lhu $t1, 2*(0+(3*1))($a0) # Load values for first - lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a0) # Load values for second - lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a0) - sh $t0, 2*(0+(3*2))($a0) - - lhu $t1, 2*(2+(3*1))($a0) # Load values for third - lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a0) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a0) - sh $t0, 2*(1+(3*2))($a0) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a0) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a0) - sh $t0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 diff --git a/libpsn00b/psxgte/mulmatrix0.s b/libpsn00b/psxgte/mulmatrix0.s deleted file mode 100644 index c2fd859..0000000 --- a/libpsn00b/psxgte/mulmatrix0.s +++ /dev/null @@ -1,74 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global MulMatrix0 -.type MulMatrix0, @function -MulMatrix0: - - # Load m1 to GTE - lw $t0, 0($a0) - lw $t1, 4($a0) - ctc2 $t0, $0 - ctc2 $t1, $1 - lw $t0, 8($a0) - lw $t1, 12($a0) - lhu $t2, 16($a0) - ctc2 $t0, $2 - ctc2 $t1, $3 - ctc2 $t2, $4 - - lhu $t1, 2*(0+(3*1))($a1) # Load values for first - lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31 - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(0+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - lhu $t1, 2*(1+(3*1))($a1) # Load values for second - lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32 - MVMVA(1, 0, 0, 3, 0) # First multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(1+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of first - mfc2 $t1, C2_IR2 - sh $t0, 2*(0+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(0+(3*1))($a2) - sh $t0, 2*(0+(3*2))($a2) - - lhu $t1, 2*(2+(3*1))($a1) # Load values for third - lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33 - MVMVA(1, 0, 0, 3, 0) # Second multiply - sll $t1, 16 - or $t0, $t1 - lhu $t1, 2*(2+(3*2))($a1) - mtc2 $t0, C2_VXY0 - mtc2 $t1, C2_VZ0 - - mfc2 $t0, C2_IR1 # Store results of second - mfc2 $t1, C2_IR2 - sh $t0, 2*(1+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(1+(3*1))($a2) - sh $t0, 2*(1+(3*2))($a2) - MVMVA(1, 0, 0, 3, 0) # Third multiply - - mfc2 $t0, C2_IR1 # Store results of third - mfc2 $t1, C2_IR2 - sh $t0, 2*(2+(3*0))($a2) - mfc2 $t0, C2_IR3 - sh $t1, 2*(2+(3*1))($a2) - sh $t0, 2*(2+(3*2))($a2) - - jr $ra - move $v0, $a2 diff --git a/libpsn00b/psxgte/pushpopmatrix.s b/libpsn00b/psxgte/pushpopmatrix.s deleted file mode 100644 index ca6b992..0000000 --- a/libpsn00b/psxgte/pushpopmatrix.s +++ /dev/null @@ -1,68 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global PushMatrix -.type PushMatrix, @function -PushMatrix: - la $a0, _matrix_stack - cfc2 $v0, C2_R11R12 - cfc2 $v1, C2_R13R21 - sw $v0, 0($a0) - cfc2 $v0, C2_R22R23 - sw $v1, 4($a0) - sw $v0, 8($a0) - cfc2 $v0, C2_R31R32 - cfc2 $v1, C2_R33 - sw $v0, 12($a0) - sw $v1, 16($a0) - cfc2 $v0, C2_TRX - cfc2 $v1, C2_TRY - sw $v0, 20($a0) - cfc2 $v0, C2_TRZ - sw $v1, 24($a0) - jr $ra - sw $v0, 28($a0) - -.global PopMatrix -.type PopMatrix, @function -PopMatrix: - la $a0, _matrix_stack - lw $v0, 0($a0) - lw $v1, 4($a0) - ctc2 $v0, C2_R11R12 - ctc2 $v1, C2_R13R21 - lw $v0, 8($a0) - lw $v1, 12($a0) - ctc2 $v0, C2_R22R23 - lw $v0, 16($a0) - ctc2 $v1, C2_R31R32 - ctc2 $v0, C2_R33 - lw $v0, 20($a0) - lw $v1, 24($a0) - ctc2 $v0, C2_TRX - lw $v0, 28($a0) - ctc2 $v1, C2_TRY - ctc2 $v0, C2_TRZ - jr $ra - nop - - -.section .data - - -.type matrix_stack, @object -_matrix_stack: - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - .word 0 - diff --git a/libpsn00b/psxgte/scalematrix.s b/libpsn00b/psxgte/scalematrix.s deleted file mode 100644 index 1b2b6dd..0000000 --- a/libpsn00b/psxgte/scalematrix.s +++ /dev/null @@ -1,68 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global ScaleMatrix -.type ScaleMatrix, @function -ScaleMatrix: - - lwc2 C2_IR0, 0($a1) # X - - lh $v0, 2*(0+(3*0))($a0) - lh $v1, 2*(0+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(0+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(0+(3*1))($a0) - sh $v0, 2*(0+(3*2))($a0) - - lwc2 C2_IR0, 4($a1) # Y - - lh $v0, 2*(1+(3*0))($a0) - lh $v1, 2*(1+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(1+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(1+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*1))($a0) - sh $v0, 2*(1+(3*2))($a0) - - lwc2 C2_IR0, 8($a1) # Z - - lh $v0, 2*(2+(3*0))($a0) - lh $v1, 2*(2+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(2+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(2+(3*1))($a0) - sh $v0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 diff --git a/libpsn00b/psxgte/scalematrixl.s b/libpsn00b/psxgte/scalematrixl.s deleted file mode 100644 index 53c2d14..0000000 --- a/libpsn00b/psxgte/scalematrixl.s +++ /dev/null @@ -1,68 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global ScaleMatrixL -.type ScaleMatrixL, @function -ScaleMatrixL: - - lwc2 C2_IR0, 0($a1) # X - - lh $v0, 2*(0+(3*0))($a0) - lh $v1, 2*(1+(3*0))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*0))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*0))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*0))($a0) - sh $v0, 2*(2+(3*0))($a0) - - lwc2 C2_IR0, 4($a1) # Y - - lh $v0, 2*(0+(3*1))($a0) - lh $v1, 2*(1+(3*1))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*1))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*1))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*1))($a0) - sh $v0, 2*(2+(3*1))($a0) - - lwc2 C2_IR0, 8($a1) # Z - - lh $v0, 2*(0+(3*2))($a0) - lh $v1, 2*(1+(3*2))($a0) - mtc2 $v0, C2_IR1 - lh $v0, 2*(2+(3*2))($a0) - mtc2 $v1, C2_IR2 - mtc2 $v0, C2_IR3 - - nGPF(1) - - mfc2 $v0, C2_IR1 - mfc2 $v1, C2_IR2 - sh $v0, 2*(0+(3*2))($a0) - mfc2 $v0, C2_IR3 - sh $v1, 2*(1+(3*2))($a0) - sh $v0, 2*(2+(3*2))($a0) - - jr $ra - move $v0, $a0 diff --git a/libpsn00b/psxgte/square0.s b/libpsn00b/psxgte/square0.s deleted file mode 100644 index a8ca107..0000000 --- a/libpsn00b/psxgte/square0.s +++ /dev/null @@ -1,27 +0,0 @@ -.set noreorder - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - - -.global Square0 -.type Square0, @function -Square0: - - # a0 - Pointer to input vector (v0) - # a1 - Pointer to output vector (v1) - - lwc2 C2_IR1, 0($a0) - lwc2 C2_IR2, 4($a0) - lwc2 C2_IR3, 8($a0) - - nSQR(0) - - swc2 C2_IR1, 0($a1) - swc2 C2_IR2, 4($a1) - swc2 C2_IR3, 8($a1) - - jr $ra - nop diff --git a/libpsn00b/psxgte/squareroot.s b/libpsn00b/psxgte/squareroot.s index 72198b4..4377ee4 100644 --- a/libpsn00b/psxgte/squareroot.s +++ b/libpsn00b/psxgte/squareroot.s @@ -3,10 +3,7 @@ .include "gtereg.inc" .include "inline_s.inc" -.section .text - -# Implementation based from Sony libs - +.section .text.SquareRoot12 .global SquareRoot12 .type SquareRoot12, @function SquareRoot12: @@ -34,27 +31,26 @@ SquareRoot12: .Lvalue_greater12: addi $t4, -64 sll $t4, 1 - la $t5, sqrt_table + la $t5, _sqrt_table addu $t5, $t4 lh $t5, 0($t5) nop - + bltz $t1, .L1594c nop jr $ra sllv $v0, $t5, $t1 - -.L1594c: +.L1594c: sub $t1, $0 , $t1 jr $ra srl $v0, $t5, $t1 - + .Lbad_sqr12: jr $ra move $v0, $0 - - + +.section .text.SquareRoot0 .global SquareRoot0 .type SquareRoot0, @function SquareRoot0: @@ -82,7 +78,7 @@ SquareRoot0: .Lvalue_greater: addi $t4, -64 sll $t4, 1 - la $t5, sqrt_table + la $t5, _sqrt_table addu $t5, $t4 lh $t5, 0($t5) nop @@ -92,32 +88,31 @@ SquareRoot0: .Lbad_sqr: jr $ra move $v0, $0 - -.section .data - -sqrt_table: - .hword 0x1000,0x101f,0x103f,0x105e,0x107e,0x109c,0x10bb,0x10da - .hword 0x10f8,0x1116,0x1134,0x1152,0x116f,0x118c,0x11a9,0x11c6 - .hword 0x11e3,0x1200,0x121c,0x1238,0x1254,0x1270,0x128c,0x12a7 - .hword 0x12c2,0x12de,0x12f9,0x1314,0x132e,0x1349,0x1364,0x137e - .hword 0x1398,0x13b2,0x13cc,0x13e6,0x1400,0x1419,0x1432,0x144c - .hword 0x1465,0x147e,0x1497,0x14b0,0x14c8,0x14e1,0x14f9,0x1512 - .hword 0x152a,0x1542,0x155a,0x1572,0x158a,0x15a2,0x15b9,0x15d1 - .hword 0x15e8,0x1600,0x1617,0x162e,0x1645,0x165c,0x1673,0x1689 - .hword 0x16a0,0x16b7,0x16cd,0x16e4,0x16fa,0x1710,0x1726,0x173c - .hword 0x1752,0x1768,0x177e,0x1794,0x17aa,0x17bf,0x17d5,0x17ea - .hword 0x1800,0x1815,0x182a,0x183f,0x1854,0x1869,0x187e,0x1893 - .hword 0x18a8,0x18bd,0x18d1,0x18e6,0x18fa,0x190f,0x1923,0x1938 - .hword 0x194c,0x1960,0x1974,0x1988,0x199c,0x19b0,0x19c4,0x19d8 - .hword 0x19ec,0x1a00,0x1a13,0x1a27,0x1a3a,0x1a4e,0x1a61,0x1a75 - .hword 0x1a88,0x1a9b,0x1aae,0x1ac2,0x1ad5,0x1ae8,0x1afb,0x1b0e - .hword 0x1b21,0x1b33,0x1b46,0x1b59,0x1b6c,0x1b7e,0x1b91,0x1ba3 - .hword 0x1bb6,0x1bc8,0x1bdb,0x1bed,0x1c00,0x1c12,0x1c24,0x1c36 - .hword 0x1c48,0x1c5a,0x1c6c,0x1c7e,0x1c90,0x1ca2,0x1cb4,0x1cc6 - .hword 0x1cd8,0x1ce9,0x1cfb,0x1d0d,0x1d1e,0x1d30,0x1d41,0x1d53 - .hword 0x1d64,0x1d76,0x1d87,0x1d98,0x1daa,0x1dbb,0x1dcc,0x1ddd - .hword 0x1dee,0x1e00,0x1e11,0x1e22,0x1e33,0x1e43,0x1e54,0x1e65 - .hword 0x1e76,0x1e87,0x1e98,0x1ea8,0x1eb9,0x1eca,0x1eda,0x1eeb - .hword 0x1efb,0x1f0c,0x1f1c,0x1f2d,0x1f3d,0x1f4e,0x1f5e,0x1f6e - .hword 0x1f7e,0x1f8f,0x1f9f,0x1faf,0x1fbf,0x1fcf,0x1fdf,0x1fef
\ No newline at end of file +.section .data._sqrt_table +.type _sqrt_table, @object +_sqrt_table: + .hword 0x1000, 0x101f, 0x103f, 0x105e, 0x107e, 0x109c, 0x10bb, 0x10da + .hword 0x10f8, 0x1116, 0x1134, 0x1152, 0x116f, 0x118c, 0x11a9, 0x11c6 + .hword 0x11e3, 0x1200, 0x121c, 0x1238, 0x1254, 0x1270, 0x128c, 0x12a7 + .hword 0x12c2, 0x12de, 0x12f9, 0x1314, 0x132e, 0x1349, 0x1364, 0x137e + .hword 0x1398, 0x13b2, 0x13cc, 0x13e6, 0x1400, 0x1419, 0x1432, 0x144c + .hword 0x1465, 0x147e, 0x1497, 0x14b0, 0x14c8, 0x14e1, 0x14f9, 0x1512 + .hword 0x152a, 0x1542, 0x155a, 0x1572, 0x158a, 0x15a2, 0x15b9, 0x15d1 + .hword 0x15e8, 0x1600, 0x1617, 0x162e, 0x1645, 0x165c, 0x1673, 0x1689 + .hword 0x16a0, 0x16b7, 0x16cd, 0x16e4, 0x16fa, 0x1710, 0x1726, 0x173c + .hword 0x1752, 0x1768, 0x177e, 0x1794, 0x17aa, 0x17bf, 0x17d5, 0x17ea + .hword 0x1800, 0x1815, 0x182a, 0x183f, 0x1854, 0x1869, 0x187e, 0x1893 + .hword 0x18a8, 0x18bd, 0x18d1, 0x18e6, 0x18fa, 0x190f, 0x1923, 0x1938 + .hword 0x194c, 0x1960, 0x1974, 0x1988, 0x199c, 0x19b0, 0x19c4, 0x19d8 + .hword 0x19ec, 0x1a00, 0x1a13, 0x1a27, 0x1a3a, 0x1a4e, 0x1a61, 0x1a75 + .hword 0x1a88, 0x1a9b, 0x1aae, 0x1ac2, 0x1ad5, 0x1ae8, 0x1afb, 0x1b0e + .hword 0x1b21, 0x1b33, 0x1b46, 0x1b59, 0x1b6c, 0x1b7e, 0x1b91, 0x1ba3 + .hword 0x1bb6, 0x1bc8, 0x1bdb, 0x1bed, 0x1c00, 0x1c12, 0x1c24, 0x1c36 + .hword 0x1c48, 0x1c5a, 0x1c6c, 0x1c7e, 0x1c90, 0x1ca2, 0x1cb4, 0x1cc6 + .hword 0x1cd8, 0x1ce9, 0x1cfb, 0x1d0d, 0x1d1e, 0x1d30, 0x1d41, 0x1d53 + .hword 0x1d64, 0x1d76, 0x1d87, 0x1d98, 0x1daa, 0x1dbb, 0x1dcc, 0x1ddd + .hword 0x1dee, 0x1e00, 0x1e11, 0x1e22, 0x1e33, 0x1e43, 0x1e54, 0x1e65 + .hword 0x1e76, 0x1e87, 0x1e98, 0x1ea8, 0x1eb9, 0x1eca, 0x1eda, 0x1eeb + .hword 0x1efb, 0x1f0c, 0x1f1c, 0x1f2d, 0x1f3d, 0x1f4e, 0x1f5e, 0x1f6e + .hword 0x1f7e, 0x1f8f, 0x1f9f, 0x1faf, 0x1fbf, 0x1fcf, 0x1fdf, 0x1fef diff --git a/libpsn00b/psxgte/vector.s b/libpsn00b/psxgte/vector.s new file mode 100644 index 0000000..2f2f8d6 --- /dev/null +++ b/libpsn00b/psxgte/vector.s @@ -0,0 +1,123 @@ +.set noreorder +.set noat + +.include "gtereg.inc" +.include "inline_s.inc" + +.section .text.Square0 +.global Square0 +.type Square0, @function +Square0: + # a0 - Pointer to input vector (v0) + # a1 - Pointer to output vector (v1) + + lwc2 C2_IR1, 0($a0) + lwc2 C2_IR2, 4($a0) + lwc2 C2_IR3, 8($a0) + + nSQR(0) + + swc2 C2_IR1, 0($a1) + swc2 C2_IR2, 4($a1) + swc2 C2_IR3, 8($a1) + + jr $ra + nop + +.section .text.VectorNormalS +.global VectorNormalS +.type VectorNormalS, @function +VectorNormalS: + + lw $t0, 0($a0) + lw $t1, 4($a0) + lw $t2, 8($a0) + + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + + nSQR(0) + + mfc2 $t3, C2_MAC1 + mfc2 $t4, C2_MAC2 + mfc2 $t5, C2_MAC3 + + add $t3, $t4 + add $v0, $t3, $t5 + mtc2 $v0, C2_LZCS + nop + nop + mfc2 $v1, C2_LZCR + + addiu $at, $0 , -2 + and $v1, $at + + addiu $t6, $0 , 0x1f + sub $t6, $v1 + sra $t6, 1 + addiu $t3, $v1, -24 + + bltz $t3, .Lvalue_neg + nop + b .Lvalue_pos + sllv $t4, $v0, $t3 +.Lvalue_neg: + addiu $t3, $0 , 24 + sub $t3, $v1 + srav $t4, $v0, $t3 +.Lvalue_pos: + addi $t4, -64 + sll $t4, 1 + + la $t5, _norm_table + addu $t5, $t4 + lh $t5, 0($t5) + + mtc2 $t0, C2_IR1 + mtc2 $t1, C2_IR2 + mtc2 $t2, C2_IR3 + mtc2 $t5, C2_IR0 + + nGPF(0) + + mfc2 $t0, C2_MAC1 + mfc2 $t1, C2_MAC2 + mfc2 $t2, C2_MAC3 + + sra $t0, $t6 + sra $t1, $t6 + sra $t2, $t6 + + sh $t0, 0($a1) + sh $t1, 2($a1) + jr $ra + sh $t2, 4($a1) + +.section .data._norm_table +.type _norm_table, @object +_norm_table: + .hword 0x1000, 0x0fe0, 0x0fc1, 0x0fa3, 0x0f85, 0x0f68, 0x0f4c, 0x0f30 + .hword 0x0f15, 0x0efb, 0x0ee1, 0x0ec7, 0x0eae, 0x0e96, 0x0e7e, 0x0e66 + .hword 0x0e4f, 0x0e38, 0x0e22, 0x0e0c, 0x0df7, 0x0de2, 0x0dcd, 0x0db9 + .hword 0x0da5, 0x0d91, 0x0d7e, 0x0d6b, 0x0d58, 0x0d45, 0x0d33, 0x0d21 + .hword 0x0d10, 0x0cff, 0x0cee, 0x0cdd, 0x0ccc, 0x0cbc, 0x0cac, 0x0c9c + .hword 0x0c8d, 0x0c7d, 0x0c6e, 0x0c5f, 0x0c51, 0x0c42, 0x0c34, 0x0c26 + .hword 0x0c18, 0x0c0a, 0x0bfd, 0x0bef, 0x0be2, 0x0bd5, 0x0bc8, 0x0bbb + .hword 0x0baf, 0x0ba2, 0x0b96, 0x0b8a, 0x0b7e, 0x0b72, 0x0b67, 0x0b5b + .hword 0x0b50, 0x0b45, 0x0b39, 0x0b2e, 0x0b24, 0x0b19, 0x0b0e, 0x0b04 + .hword 0x0af9, 0x0aef, 0x0ae5, 0x0adb, 0x0ad1, 0x0ac7, 0x0abd, 0x0ab4 + .hword 0x0aaa, 0x0aa1, 0x0a97, 0x0a8e, 0x0a85, 0x0a7c, 0x0a73, 0x0a6a + .hword 0x0a61, 0x0a59, 0x0a50, 0x0a47, 0x0a3f, 0x0a37, 0x0a2e, 0x0a26 + .hword 0x0a1e, 0x0a16, 0x0a0e, 0x0a06, 0x09fe, 0x09f6, 0x09ef, 0x09e7 + .hword 0x09e0, 0x09d8, 0x09d1, 0x09c9, 0x09c2, 0x09bb, 0x09b4, 0x09ad + .hword 0x09a5, 0x099e, 0x0998, 0x0991, 0x098a, 0x0983, 0x097c, 0x0976 + .hword 0x096f, 0x0969, 0x0962, 0x095c, 0x0955, 0x094f, 0x0949, 0x0943 + .hword 0x093c, 0x0936, 0x0930, 0x092a, 0x0924, 0x091e, 0x0918, 0x0912 + .hword 0x090d, 0x0907, 0x0901, 0x08fb, 0x08f6, 0x08f0, 0x08eb, 0x08e5 + .hword 0x08e0, 0x08da, 0x08d5, 0x08cf, 0x08ca, 0x08c5, 0x08bf, 0x08ba + .hword 0x08b5, 0x08b0, 0x08ab, 0x08a6, 0x08a1, 0x089c, 0x0897, 0x0892 + .hword 0x088d, 0x0888, 0x0883, 0x087e, 0x087a, 0x0875, 0x0870, 0x086b + .hword 0x0867, 0x0862, 0x085e, 0x0859, 0x0855, 0x0850, 0x084c, 0x0847 + .hword 0x0843, 0x083e, 0x083a, 0x0836, 0x0831, 0x082d, 0x0829, 0x0824 + .hword 0x0820, 0x081c, 0x0818, 0x0814, 0x0810, 0x080c, 0x0808, 0x0804 diff --git a/libpsn00b/psxgte/vectornormals.s b/libpsn00b/psxgte/vectornormals.s deleted file mode 100644 index 85e94e6..0000000 --- a/libpsn00b/psxgte/vectornormals.s +++ /dev/null @@ -1,110 +0,0 @@ -.set noreorder -.set noat - -.include "gtereg.inc" -.include "inline_s.inc" - -.section .text - -# Implementation based from Sony libs - -.global VectorNormalS -.type VectorNormalS, @function -VectorNormalS: - - lw $t0, 0($a0) - lw $t1, 4($a0) - lw $t2, 8($a0) - - mtc2 $t0, C2_IR1 - mtc2 $t1, C2_IR2 - mtc2 $t2, C2_IR3 - - nSQR(0) - - mfc2 $t3, C2_MAC1 - mfc2 $t4, C2_MAC2 - mfc2 $t5, C2_MAC3 - - add $t3, $t4 - add $v0, $t3, $t5 - mtc2 $v0, C2_LZCS - nop - nop - mfc2 $v1, C2_LZCR - - addiu $at, $0 , -2 - and $v1, $at - - addiu $t6, $0 , 0x1f - sub $t6, $v1 - sra $t6, 1 - addiu $t3, $v1, -24 - - bltz $t3, .Lvalue_neg - nop - b .Lvalue_pos - sllv $t4, $v0, $t3 -.Lvalue_neg: - addiu $t3, $0 , 24 - sub $t3, $v1 - srav $t4, $v0, $t3 -.Lvalue_pos: - addi $t4, -64 - sll $t4, 1 - - la $t5, _norm_table - addu $t5, $t4 - lh $t5, 0($t5) - - mtc2 $t0, C2_IR1 - mtc2 $t1, C2_IR2 - mtc2 $t2, C2_IR3 - mtc2 $t5, C2_IR0 - - nGPF(0) - - mfc2 $t0, C2_MAC1 - mfc2 $t1, C2_MAC2 - mfc2 $t2, C2_MAC3 - - sra $t0, $t6 - sra $t1, $t6 - sra $t2, $t6 - - sh $t0, 0($a1) - sh $t1, 2($a1) - jr $ra - sh $t2, 4($a1) - - -.section .data - -.global _norm_table -.type _norm_table, @object -_norm_table: - .hword 0x1000, 0x0FE0, 0x0FC1, 0x0FA3, 0x0F85, 0x0F68, 0x0F4C, 0x0F30 - .hword 0x0F15, 0x0EFB, 0x0EE1, 0x0EC7, 0x0EAE, 0x0E96, 0x0E7E, 0x0E66 - .hword 0x0E4F, 0x0E38, 0x0E22, 0x0E0C, 0x0DF7, 0x0DE2, 0x0DCD, 0x0DB9 - .hword 0x0DA5, 0x0D91, 0x0D7E, 0x0D6B, 0x0D58, 0x0D45, 0x0D33, 0x0D21 - .hword 0x0D10, 0x0CFF, 0x0CEE, 0x0CDD, 0x0CCC, 0x0CBC, 0x0CAC, 0x0C9C - .hword 0x0C8D, 0x0C7D, 0x0C6E, 0x0C5F, 0x0C51, 0x0C42, 0x0C34, 0x0C26 - .hword 0x0C18, 0x0C0A, 0x0BFD, 0x0BEF, 0x0BE2, 0x0BD5, 0x0BC8, 0x0BBB - .hword 0x0BAF, 0x0BA2, 0x0B96, 0x0B8A, 0x0B7E, 0x0B72, 0x0B67, 0x0B5B - .hword 0x0B50, 0x0B45, 0x0B39, 0x0B2E, 0x0B24, 0x0B19, 0x0B0E, 0x0B04 - .hword 0x0AF9, 0x0AEF, 0x0AE5, 0x0ADB, 0x0AD1, 0x0AC7, 0x0ABD, 0x0AB4 - .hword 0x0AAA, 0x0AA1, 0x0A97, 0x0A8E, 0x0A85, 0x0A7C, 0x0A73, 0x0A6A - .hword 0x0A61, 0x0A59, 0x0A50, 0x0A47, 0x0A3F, 0x0A37, 0x0A2E, 0x0A26 - .hword 0x0A1E, 0x0A16, 0x0A0E, 0x0A06, 0x09FE, 0x09F6, 0x09EF, 0x09E7 - .hword 0x09E0, 0x09D8, 0x09D1, 0x09C9, 0x09C2, 0x09BB, 0x09B4, 0x09AD - .hword 0x09A5, 0x099E, 0x0998, 0x0991, 0x098A, 0x0983, 0x097C, 0x0976 - .hword 0x096F, 0x0969, 0x0962, 0x095C, 0x0955, 0x094F, 0x0949, 0x0943 - .hword 0x093C, 0x0936, 0x0930, 0x092A, 0x0924, 0x091E, 0x0918, 0x0912 - .hword 0x090D, 0x0907, 0x0901, 0x08FB, 0x08F6, 0x08F0, 0x08EB, 0x08E5 - .hword 0x08E0, 0x08DA, 0x08D5, 0x08CF, 0x08CA, 0x08C5, 0x08BF, 0x08BA - .hword 0x08B5, 0x08B0, 0x08AB, 0x08A6, 0x08A1, 0x089C, 0x0897, 0x0892 - .hword 0x088D, 0x0888, 0x0883, 0x087E, 0x087A, 0x0875, 0x0870, 0x086B - .hword 0x0867, 0x0862, 0x085E, 0x0859, 0x0855, 0x0850, 0x084C, 0x0847 - .hword 0x0843, 0x083E, 0x083A, 0x0836, 0x0831, 0x082D, 0x0829, 0x0824 - .hword 0x0820, 0x081C, 0x0818, 0x0814, 0x0810, 0x080C, 0x0808, 0x0804 -
\ No newline at end of file diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index d43436f..d55dbbb 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -4,7 +4,7 @@ */ #include <stdint.h> -#include <psxetc.h> +#include <assert.h> #include <psxapi.h> #include <psxpress.h> #include <hwregs_c.h> @@ -127,7 +127,7 @@ void DecDCTin(const uint32_t *data, int mode) { // the stream. void DecDCTinRaw(const uint32_t *data, size_t length) { if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("psxpress: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } @@ -149,8 +149,7 @@ int DecDCTinSync(int mode) { return 0; } - _sdk_log("psxpress: DecDCTinSync() timeout\n"); - _sdk_dump_log(); + _sdk_log("DecDCTinSync() timeout\n"); return -1; } @@ -158,7 +157,7 @@ void DecDCTout(uint32_t *data, size_t length) { DecDCToutSync(0); if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("psxpress: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } @@ -180,7 +179,6 @@ int DecDCToutSync(int mode) { return 0; } - _sdk_log("psxpress: DecDCToutSync() timeout\n"); - _sdk_dump_log(); + _sdk_log("DecDCToutSync() timeout\n"); return -1; } diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s index 885a3f7..f3a1c67 100644 --- a/libpsn00b/psxpress/vlc.s +++ b/libpsn00b/psxpress/vlc.s @@ -29,17 +29,17 @@ .set VLC_Context_block_index, 20 .set VLC_Context_coeff_index, 21 -.set DECDCTSMALLTAB_lut0, 0 -.set DECDCTSMALLTAB_lut2, 4 -.set DECDCTSMALLTAB_lut3, 36 -.set DECDCTSMALLTAB_lut4, 292 -.set DECDCTSMALLTAB_lut5, 308 -.set DECDCTSMALLTAB_lut7, 324 -.set DECDCTSMALLTAB_lut8, 356 -.set DECDCTSMALLTAB_lut9, 420 -.set DECDCTSMALLTAB_lut10, 484 -.set DECDCTSMALLTAB_lut11, 548 -.set DECDCTSMALLTAB_lut12, 612 +.set DECDCTTAB_lut0, 0 +.set DECDCTTAB_lut2, 4 +.set DECDCTTAB_lut3, 36 +.set DECDCTTAB_lut4, 292 +.set DECDCTTAB_lut5, 308 +.set DECDCTTAB_lut7, 324 +.set DECDCTTAB_lut8, 356 +.set DECDCTTAB_lut9, 420 +.set DECDCTTAB_lut10, 484 +.set DECDCTTAB_lut11, 548 +.set DECDCTTAB_lut12, 612 .section .text.DecDCTvlcStart .global DecDCTvlcStart @@ -115,7 +115,7 @@ _vlc_skip_context_load: # Obtain the addresses of the lookup table and jump area in advance so that # they don't have to be retrieved for each coefficient decoded. lw $t8, _vlc_huffman_table - la $t9, .Lac_jump_area + la $t9, .Lac_prefix_10 beqz $a2, .Lstop_processing addiu $a1, 4 # output = (uint16_t *) &output[1] @@ -123,67 +123,67 @@ _vlc_skip_context_load: .Lprocess_next_code_loop: # while (max_size) # This is the "hot" part of the decoder, executed for each code in the # bitstream. The first step is to determine if the next code is a DC or AC - # coefficient. The GTE is also given the task of counting the number of - # leading zeroes/ones, which takes 2 more cycles. + # coefficient. bnez $t7, .Lprocess_ac_coefficient - mtc2 $t0, $30 + addiu $t7, 1 # coeff_index++ bnez $t4, .Lprocess_dc_v3_coefficient - #nop + li $v1, 0x01ff .Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) # The DC coefficient in version 2 frames is not compressed. Value 0x1ff is # used to signal the end of the bitstream. srl $v0, $t0, 22 # prefix = (window >> (32 - 10)) - li $v1, 0x01ff beq $v0, $v1, .Lstop_processing # if (prefix == 0x1ff) break or $v0, $t3 # *output = prefix | quant_scale sll $t0, 10 # window <<= 10 - addiu $t5, -10 # bit_offset -= 10 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -10 # bit_offset -= 10 .Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) # TODO: version 3 is currently not supported. jr $ra li $v0, -1 - #b .Lwrite_value .Lprocess_ac_coefficient: # if (coeff_index) - # Check whether the prefix code is one of the shorter, more common ones. + # Check whether the prefix code is one of the shorter, more common ones, + # and start counting the number of leading zeroes/ones using the GTE (which + # takes 2 more cycles). srl $v0, $t0, 30 li $v1, 3 beq $v0, $v1, .Lac_prefix_11 li $v1, 2 beq $v0, $v1, .Lac_prefix_10 li $v1, 1 + mtc2 $t0, $30 beq $v0, $v1, .Lac_prefix_01 - #srl $v0, $t0, 29 - #beq $v0, $v1, .Lac_prefix_001 - #nop + nop # If the code is longer, retrieve the number of leading zeroes from the GTE # and use it as an index into the jump area. Each block in the area is 8 # instructions long and handles decoding a specific prefix. mfc2 $v0, $31 - nop - andi $v0, 15 # jump_addr = &ac_jump_area[(prefix % 16) * 8 * sizeof(u32)] - sll $v0, 5 + li $v1, 11 + bgt $v0, $v1, .Lreturn_error # if (prefix > 11) return -1 + sll $v0, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(u32)] addu $v0, $t9 jr $v0 nop +.Lreturn_error: + jr $ra + li $v0, -1 + .Lac_prefix_11: # Prefix 11 is followed by a single bit. srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16) andi $v0, 2 addu $v0, $t8 # value = table->lut0[index] - lhu $v0, DECDCTSMALLTAB_lut0($v0) + lhu $v0, DECDCTTAB_lut0($v0) sll $t0, 3 # window <<= 3 - addiu $t5, -3 # bit_offset -= 3 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -3 # bit_offset -= 3 + #.word 0 -.Lac_jump_area: .Lac_prefix_10: # Prefix 10 marks the end of a block. li $v0, 0xfe00 # value = 0xfe00 @@ -202,11 +202,10 @@ _vlc_skip_context_load: srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32) andi $v0, 28 addu $v0, $t8 # value = table->lut2[index] - lw $v0, DECDCTSMALLTAB_lut2($v0) - addiu $t7, 1 # coeff_index++ + lw $v0, DECDCTTAB_lut2($v0) b .Lupdate_window_and_write srl $v1, $v0, 16 # length = value >> 16 - .word 0 + .word 0, 0 .Lac_prefix_001: # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a @@ -214,136 +213,106 @@ _vlc_skip_context_load: srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32) andi $v0, 252 addu $v0, $t8 # value = table->lut3[index] - lw $v0, DECDCTSMALLTAB_lut3($v0) - addiu $t7, 1 # coeff_index++ + lw $v0, DECDCTTAB_lut3($v0) b .Lupdate_window_and_write srl $v1, $v0, 16 # length = value >> 16 - .word 0 + .word 0, 0 .Lac_prefix_0001: # Prefix 0001 is followed by a 3-bit lookup index. srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16) andi $v0, 14 addu $v0, $t8 # value = table->lut4[index] - lhu $v0, DECDCTSMALLTAB_lut4($v0) + lhu $v0, DECDCTTAB_lut4($v0) sll $t0, 7 # window <<= 4 + 3 - addiu $t5, -7 # bit_offset -= 4 + 3 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -7 # bit_offset -= 4 + 3 + .word 0 .Lac_prefix_00001: # Prefix 00001 is followed by a 3-bit lookup index. srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16) andi $v0, 14 addu $v0, $t8 # value = table->lut5[index] - lhu $v0, DECDCTSMALLTAB_lut5($v0) + lhu $v0, DECDCTTAB_lut5($v0) sll $t0, 8 # window <<= 5 + 3 - addiu $t5, -8 # bit_offset -= 5 + 3 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -8 # bit_offset -= 5 + 3 + .word 0 .Lac_prefix_000001: # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. srl $v0, $t0, 10 # value = window >> (32 - 6 - 16) sll $t0, 22 # window <<= 6 + 16 - addiu $t5, -22 # bit_offset -= 6 + 16 b .Lwrite_value - addiu $t7, 1 # coeff_index++ - .word 0, 0, 0 + addiu $t5, -22 # bit_offset -= 6 + 16 + .word 0, 0, 0, 0 .Lac_prefix_0000001: # Prefix 0000001 is followed by a 4-bit lookup index. srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16) andi $v0, 30 addu $v0, $t8 # value = table->lut7[index] - lhu $v0, DECDCTSMALLTAB_lut7($v0) + lhu $v0, DECDCTTAB_lut7($v0) sll $t0, 11 # window <<= 7 + 4 - addiu $t5, -11 # bit_offset -= 7 + 4 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -11 # bit_offset -= 7 + 4 + .word 0 .Lac_prefix_00000001: # Prefix 00000001 is followed by a 5-bit lookup index. srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16) andi $v0, 62 addu $v0, $t8 # value = table->lut8[index] - lhu $v0, DECDCTSMALLTAB_lut8($v0) + lhu $v0, DECDCTTAB_lut8($v0) sll $t0, 13 # window <<= 8 + 5 - addiu $t5, -13 # bit_offset -= 8 + 5 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -13 # bit_offset -= 8 + 5 + .word 0 .Lac_prefix_000000001: # Prefix 000000001 is followed by a 5-bit lookup index. srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16) andi $v0, 62 addu $v0, $t8 # value = table->lut9[index] - lhu $v0, DECDCTSMALLTAB_lut9($v0) + lhu $v0, DECDCTTAB_lut9($v0) sll $t0, 14 # window <<= 9 + 5 - addiu $t5, -14 # bit_offset -= 9 + 5 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -14 # bit_offset -= 9 + 5 + .word 0 .Lac_prefix_0000000001: # Prefix 0000000001 is followed by a 5-bit lookup index. srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16) andi $v0, 62 addu $v0, $t8 # value = table->lut10[index] - lhu $v0, DECDCTSMALLTAB_lut10($v0) + lhu $v0, DECDCTTAB_lut10($v0) sll $t0, 15 # window <<= 10 + 5 - addiu $t5, -15 # bit_offset -= 10 + 5 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -15 # bit_offset -= 10 + 5 + .word 0 .Lac_prefix_00000000001: # Prefix 00000000001 is followed by a 5-bit lookup index. srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16) andi $v0, 62 addu $v0, $t8 # value = table->lut11[index] - lhu $v0, DECDCTSMALLTAB_lut11($v0) + lhu $v0, DECDCTTAB_lut11($v0) sll $t0, 16 # window <<= 11 + 5 - addiu $t5, -16 # bit_offset -= 11 + 5 b .Lwrite_value - addiu $t7, 1 # coeff_index++ + addiu $t5, -16 # bit_offset -= 11 + 5 + .word 0 .Lac_prefix_000000000001: # Prefix 000000000001 is followed by a 5-bit lookup index. srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16) andi $v0, 62 addu $v0, $t8 # value = table->lut12[index] - lhu $v0, DECDCTSMALLTAB_lut12($v0) + lhu $v0, DECDCTTAB_lut12($v0) sll $t0, 17 # window <<= 12 + 5 - addiu $t5, -17 # bit_offset -= 12 + 5 b .Lwrite_value - addiu $t7, 1 # coeff_index++ - - # Prefix 0000000000001 is not valid. - beqz $t0, .Lstop_processing - nop - jr $ra - li $v0, -1 - .word 0, 0, 0, 0 - - # Prefix 00000000000001 is not valid. - beqz $t0, .Lstop_processing - nop - jr $ra - li $v0, -1 - .word 0, 0, 0, 0 - - # Prefix 000000000000001 is not valid. - beqz $t0, .Lstop_processing - nop - jr $ra - li $v0, -1 - .word 0, 0, 0, 0 - - # Prefix 0000000000000001 is not valid. - beqz $t0, .Lstop_processing - nop - jr $ra - li $v0, -1 - #.word 0, 0, 0, 0 + addiu $t5, -17 # bit_offset -= 12 + 5 + .word 0 .Lupdate_window_and_write: sllv $t0, $t0, $v1 # window <<= length diff --git a/libpsn00b/psxsio/_sio_control.s b/libpsn00b/psxsio/_sio_control.s deleted file mode 100644 index 6378def..0000000 --- a/libpsn00b/psxsio/_sio_control.s +++ /dev/null @@ -1,184 +0,0 @@ -.set noreorder - -.include "hwregs_a.inc" - -.section .text - -# Currently implemented serial control functions: -# -# cmd(a0) sub(a1) -# 0 0 Get serial status -# 0 1 Get control line status -# 0 2 Get serial mode -# 0 3 Get baud rate -# 0 4 Read 1 byte -# 1 1 Set serial control -# 1 2 Set serial mode -# 1 3 Set baud rate -# 1 4 Write 1 byte -# 2 0 Reset serial -# 2 1 Acknowledge serial - -.global _sio_control -.type _sio_control, @function -_sio_control: - - # a0 - command - # a1 - subcommand - # a2 - argument - - lui $a3, IOBASE - - beq $a0, $0, .Lcmd0 - nop - beq $a0, 1, .Lcmd1 - nop - beq $a0, 2, .Lcmd2 - nop - jr $ra - nop - - -.Lcmd0: - - beq $a1, $0, .Lcmd0arg0 - nop - beq $a1, 1, .Lcmd0arg1 - nop - beq $a1, 2, .Lcmd0arg2 - nop - beq $a1, 3, .Lcmd0arg3 - nop - beq $a1, 4, .Lcmd0arg4 - nop - jr $ra - nop - -.Lcmd0arg0: # Get SIO status - - lhu $v0, SIO_STAT($a3) - nop - - jr $ra - andi $v0, 0x3FF - -.Lcmd0arg1: # Get control line status - - lhu $v1, SIO_CTRL($a3) - nop - srl $v0, $v1, 1 - andi $v0, 0x1 - srl $v1, 4 - andi $v1, 0x2 - - jr $ra - or $v0, $v1 - - -.Lcmd0arg2: # Get serial mode - - lhu $v0, SIO_MODE($a3) - nop - jr $ra - andi $v0, 0xFF - -.Lcmd0arg3: - - lui $v1, 0x1f - lhu $v0, SIO_BAUD($a3) - ori $v1, 0xa400 - div $v1, $v0 - nop - nop - mflo $v0 - jr $ra - nop - -.Lcmd0arg4: # Serial RX read - - lbu $v0, SIO_TXRX($a3) - nop - jr $ra - nop - - -.Lcmd1: - - beq $a1, 1, .Lcmd1arg1 - nop - beq $a1, 2, .Lcmd1arg2 - nop - beq $a1, 3, .Lcmd1arg3 - nop - beq $a1, 4, .Lcmd1arg4 - nop - jr $ra - nop - -.Lcmd1arg1: - - andi $v0, $a2, 0x1CFF - sh $a2, SIO_CTRL($a3) - jr $ra - nop - -.Lcmd1arg2: - - sh $a2, SIO_MODE($a3) - jr $ra - nop - -.Lcmd1arg3: - - lui $v0, 0x1f - ori $v0, 0xa400 - divu $v0, $a2 - bnez $a2, .Lgood_baud - nop - jr $ra - nop - -.Lgood_baud: - - mflo $v0 - sh $v0, SIO_BAUD($a3) - nop - jr $ra - nop - -.Lcmd1arg4: - - sb $a2, SIO_TXRX($a3) - nop - jr $ra - nop - -.Lcmd2: - - beq $a1, $0 , .Lcmd2arg0 - li $v0, 1 - beq $a1, $v0, .Lcmd2arg1 - nop - jr $ra - nop - -.Lcmd2arg0: - - li $v0, 0x40 - sh $v0, SIO_CTRL($a3) - sh $0 , SIO_MODE($a3) - sh $0 , SIO_BAUD($a3) - nop - jr $ra - nop - -.Lcmd2arg1: - - lhu $v0, SIO_CTRL($a3) - nop - ori $v0, 0x10 - sh $v0, SIO_CTRL($a3) - jr $ra - nop - -
\ No newline at end of file diff --git a/libpsn00b/psxsio/sio.c b/libpsn00b/psxsio/sio.c new file mode 100644 index 0000000..6b80352 --- /dev/null +++ b/libpsn00b/psxsio/sio.c @@ -0,0 +1,269 @@ +/* + * PSn00bSDK buffered serial port driver + * (C) 2022 spicyjpeg - MPL licensed + * + * TODO: add proper support for DTR/DSR flow control + */ + +#include <stdint.h> +#include <assert.h> +#include <psxetc.h> +#include <psxapi.h> +#include <psxsio.h> +#include <hwregs_c.h> + +#define BUFFER_LENGTH 128 +#define SIO_SYNC_TIMEOUT 0x100000 + +/* Private types */ + +typedef struct { + uint8_t data[BUFFER_LENGTH]; + uint8_t head, tail, length; +} RingBuffer; + +/* Internal globals */ + +static SIO_FlowControl _flow_control; +static uint16_t _ctrl_reg_flag; + +static int (*_read_callback)(uint8_t) = (void *) 0; +static void (*_old_sio_handler)(void) = (void *) 0; + +static volatile RingBuffer _tx_buffer, _rx_buffer; + +/* Private interrupt handler */ + +#define _ENTER_CRITICAL() uint16_t mask = IRQ_MASK; IRQ_MASK = 0; +#define _EXIT_CRITICAL() IRQ_MASK = mask; + +static void _sio_handler(void) { + // Handle any incoming bytes. + while (SIO_STAT & SR_RXRDY) { + uint8_t value = SIO_TXRX; + + // Skip storing this byte into the RX buffer if the callback returns a + // non-zero value. + if (_read_callback) { + if (_read_callback(value)) + continue; + } + + int length = _rx_buffer.length; + + if (length >= BUFFER_LENGTH) { + //_sdk_log("RX overrun, dropping bytes\n"); + break; + } + + int tail = _rx_buffer.tail; + _rx_buffer.tail = (tail + 1) % BUFFER_LENGTH; + _rx_buffer.length = length + 1; + + _rx_buffer.data[tail] = value; + } + + // Send the next byte in the buffer if the TX unit is ready. Note that + // checking for CTS is unnecessary as the serial port is already hardwired + // to do so. + if (SIO_STAT & (SR_TXRDY | SR_TXU)) { + int length = _tx_buffer.length; + + if (length) { + int head = _tx_buffer.head; + _tx_buffer.head = (head + 1) % BUFFER_LENGTH; + _tx_buffer.length = length - 1; + + SIO_CTRL |= CR_TXIEN; + SIO_TXRX = _tx_buffer.data[head]; + } else { + SIO_CTRL &= CR_TXIEN ^ 0xffff; + } + } + + // Acknowledge the IRQ and update flow control signals. + if (_rx_buffer.length < BUFFER_LENGTH) + SIO_CTRL = CR_INTRST | (SIO_CTRL | _ctrl_reg_flag); + else + SIO_CTRL = CR_INTRST | (SIO_CTRL & (_ctrl_reg_flag ^ 0xffff)); +} + +/* Serial port initialization API */ + +void SIO_Init(int baud, uint16_t mode) { + EnterCriticalSection(); + _old_sio_handler = InterruptCallback(8, &_sio_handler); + + SIO_CTRL = CR_ERRRST; + SIO_MODE = (mode & 0xfffc) | MR_BR_16; + SIO_BAUD = (uint16_t) ((int) 0x1fa400 / baud); + SIO_CTRL = CR_TXEN | CR_RXEN | CR_RXIEN; + + _tx_buffer.head = 0; + _tx_buffer.tail = 0; + _tx_buffer.length = 0; + _rx_buffer.head = 0; + _rx_buffer.tail = 0; + _rx_buffer.length = 0; + + _flow_control = SIO_FC_NONE; + _ctrl_reg_flag = 0; + + ExitCriticalSection(); +} + +void SIO_Quit(void) { + EnterCriticalSection(); + InterruptCallback(8, _old_sio_handler); + + SIO_CTRL = CR_ERRRST; + + ExitCriticalSection(); +} + +void SIO_SetFlowControl(SIO_FlowControl mode) { + _ENTER_CRITICAL(); + + switch (mode) { + case SIO_FC_NONE: + _flow_control = SIO_FC_NONE; + _ctrl_reg_flag = 0; + + SIO_CTRL &= 0xffff ^ CR_DSRIEN; + break; + + case SIO_FC_RTS_CTS: + _flow_control = SIO_FC_RTS_CTS; + _ctrl_reg_flag = CR_RTS; + + SIO_CTRL &= 0xffff ^ CR_DSRIEN; + break; + + /*case SIO_FC_DTR_DSR: + _flow_control = SIO_FC_DTR_DSR; + _ctrl_reg_flag = CR_DTR; + + SIO_CTRL |= CR_DSRIEN; + break;*/ + } + + _EXIT_CRITICAL(); +} + +/* Reading API */ + +int SIO_ReadByte(void) { + /*for (int i = SIO_SYNC_TIMEOUT; i; i--) { + if (_rx_buffer.length) + return SIO_ReadByte2(); + }*/ + while (!_rx_buffer.length) + __asm__ volatile(""); + + return SIO_ReadByte2(); +} + +int SIO_ReadByte2(void) { + if (!_rx_buffer.length) + return -1; + + _ENTER_CRITICAL(); + + int head = _rx_buffer.head; + _rx_buffer.head = (head + 1) % BUFFER_LENGTH; + _rx_buffer.length--; + + _EXIT_CRITICAL(); + return _rx_buffer.data[head]; +} + +int SIO_ReadSync(int mode) { + if (mode) + return _rx_buffer.length; + + /*for (int i = SIO_SYNC_TIMEOUT; i; i--) { + if (_rx_buffer.length) + return 0; + }*/ + while (!_rx_buffer.length) + __asm__ volatile(""); + + return 0; +} + +void *SIO_ReadCallback(int (*func)(uint8_t)) { + EnterCriticalSection(); + + void *old_callback = _read_callback; + _read_callback = func; + + ExitCriticalSection(); +} + +/* Writing API */ + +int SIO_WriteByte(uint8_t value) { + for (int i = SIO_SYNC_TIMEOUT; i; i--) { + if (_tx_buffer.length < BUFFER_LENGTH) + return SIO_WriteByte2(value); + } + + //_sdk_log("SIO_WriteByte() timeout\n"); + return -1; +} + +int SIO_WriteByte2(uint8_t value) { + // If the TX unit is currently busy, append the byte to the buffer instead + // of sending it immediately. Note that interrupts must be disabled *prior* + // to checking if TX is busy; disabling them afterwards would create a race + // condition where the transfer could end while interrupts are being + // disabled. Interrupts are disabled through the IRQ_MASK register rather + // than via syscalls for performance reasons. + _ENTER_CRITICAL(); + + if (SIO_STAT & (SR_TXRDY | SR_TXU)) { + SIO_TXRX = value; + _EXIT_CRITICAL(); + return 0; + } + + int length = _tx_buffer.length; + + if (length >= BUFFER_LENGTH) { + _EXIT_CRITICAL(); + + //_sdk_log("TX overrun, dropping bytes\n"); + return -1; + } + + int tail = _tx_buffer.tail; + _tx_buffer.tail = (tail + 1) % BUFFER_LENGTH; + _tx_buffer.length = length + 1; + + _tx_buffer.data[tail] = value; + SIO_CTRL |= CR_TXIEN; + + _EXIT_CRITICAL(); + return length; +} + +int SIO_WriteSync(int mode) { + if (mode) + return _tx_buffer.length; + + // Wait for the buffer to become empty. + for (int i = SIO_SYNC_TIMEOUT; i; i--) { + if (!_tx_buffer.length) + break; + } + + if (!_tx_buffer.length) { + // Wait for the TX unit to finish sending the last byte. + while (!(SIO_STAT & (SR_TXRDY | SR_TXU))) + __asm__ volatile(""); + } else { + //_sdk_log("SIO_WriteSync() timeout\n"); + } + + return _tx_buffer.length; +} diff --git a/libpsn00b/psxsio/siocons.c b/libpsn00b/psxsio/siocons.c deleted file mode 100644 index 5937920..0000000 --- a/libpsn00b/psxsio/siocons.c +++ /dev/null @@ -1,220 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <ioctl.h> -#include <psxapi.h> -#include <psxetc.h> -#include <psxsio.h> - -#define SIO_BUFF_LEN 32 - -static volatile int _sio_key_pending; - -static volatile int _sio_buff_rpos; -static volatile int _sio_buff_wpos; -static volatile char _sio_buff[SIO_BUFF_LEN]; - -static void _sio_init() { - - _sio_key_pending = 0; - - memset((void*)_sio_buff, 0, SIO_BUFF_LEN); - _sio_buff_rpos = 0; - _sio_buff_wpos = 0; - -} - -static int _sio_open(FCB *fcb, const char* file, int mode) { - - fcb->diskid = 1; - - return 0; - -} - -static int _sio_inout(FCB *fcb, int cmd) { - - int i; - - if(cmd == 2) { // Write - - for(i=0; i<fcb->trns_len; i++) { - while(!(_sio_control(0, 0, 0) & SR_TXU)); - _sio_control(1, 4, ((char*)fcb->trns_addr)[i]); - } - - return fcb->trns_len; - - } else if (cmd == 1) { // Read - - /*for(i=0; i<fcb->trns_len; i++) { - while(!(_sio_control(0, 0, 0) & SR_RXRDY)); - ((char*)fcb->trns_addr)[i] = _sio_control(0, 4, 0); - }*/ - - - - for(i=0; i<fcb->trns_len; i++) { - - while( _sio_key_pending == 0 ); - - ((char*)fcb->trns_addr)[i] = _sio_buff[_sio_buff_rpos]; - _sio_key_pending--; - _sio_buff_rpos++; - if( _sio_buff_rpos >= SIO_BUFF_LEN ) - { - _sio_buff_rpos = 0; - } - - } - - return fcb->trns_len; - - } - - return 0; - -} - -static int _sio_ioctl(FCB *fcb, int cmd, int arg) -{ - if( cmd == FIOCSCAN ) - { - if( _sio_key_pending ) - { - return 0; - } - } - - return -1; -} - -static int _sio_close(int h) { - - return h; - -} - -static void _sio_tty_cb(void) -{ - _sio_key_pending++; - - // Get received byte - if( _sio_key_pending < SIO_BUFF_LEN ) - { - _sio_buff[_sio_buff_wpos] = _sio_control(0, 4, 0); - _sio_buff_wpos++; - if( _sio_buff_wpos >= SIO_BUFF_LEN ) - { - _sio_buff_wpos = 0; - } - } - else - { - _sio_control(0, 4, 0); - } - - // Acknowledge SIO IRQ - _sio_control(2, 1, 0); -} - -static int _sio_dummy(void) -{ - return -1; -} - -static DCB _sio_dcb = { - "tty", - 0x3, - 0x1, - 0x0, - (void*)_sio_init, // init - (void*)_sio_open, // open - (void*)_sio_inout, // inout - _sio_close, // close - _sio_ioctl, // ioctl - _sio_dummy, // read - _sio_dummy, // write - _sio_dummy, // erase - _sio_dummy, // undelete - _sio_dummy, // firstfile - _sio_dummy, // nextfile - _sio_dummy, // format - _sio_dummy, // chdir - _sio_dummy, // rename - _sio_dummy, // remove - _sio_dummy // testdevice -}; - - -volatile void (*_sio_callback)(void) = NULL; - -void AddSIO(int baud) { - - _sio_control(2, 0, 0); - _sio_control(1, 2, MR_SB_01|MR_CHLEN_8|0x02); - _sio_control(1, 3, baud); - _sio_control(1, 1, CR_RXEN|CR_TXEN|CR_RXIEN); - - close(0); - close(1); - - DelDev(_sio_dcb.name); - AddDev(&_sio_dcb); - - Sio1Callback(_sio_tty_cb); - - open(_sio_dcb.name, 2); - open(_sio_dcb.name, 1); - -} - -void DelSIO(void) { - - Sio1Callback(NULL); - - // Reset serial interface - _sio_control(2, 0, 0); - - // Remove TTY device - DelDev(_sio_dcb.name); - - // Add dummy TTY device - AddDummyTty(); - -} - -void WaitSIO(void) { - - while((_sio_control(0, 0, 0)&(SR_RXRDY)) != (SR_RXRDY)); - _sio_control(0, 4, NULL); - -} - -void *Sio1Callback(void (*func)()) { - - void *old_isr; //= *((int*)&_sio_callback); - - EnterCriticalSection(); - - if( func ) { - - old_isr = InterruptCallback(8, func); - //_sio_callback = func; - - } else { - - old_isr = InterruptCallback(8, NULL); - //_sio_callback = NULL; - - } - - ExitCriticalSection(); - - return old_isr; - -} - -int kbhit() -{ - return(_sio_key_pending>0); -} diff --git a/libpsn00b/psxsio/tty.c b/libpsn00b/psxsio/tty.c new file mode 100644 index 0000000..4dc9fd1 --- /dev/null +++ b/libpsn00b/psxsio/tty.c @@ -0,0 +1,107 @@ +/* + * PSn00bSDK serial port BIOS TTY driver + * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * + * This driver is designed to be as simple and reliable as possible: as such it + * only relies on the SIO_*() API for receiving and sends data synchronously. + * This allows printf() to work without issues, albeit slowly, if called from a + * critical section or even from an interrupt handler. + */ + +#include <ioctl.h> +#include <psxapi.h> +#include <psxsio.h> +#include <hwregs_c.h> + +/* TTY device control block */ + +static int _sio_open(FCB *fcb, const char* file, int mode) { + fcb->diskid = 1; + return 0; +} + +static int _sio_inout(FCB *fcb, int cmd) { + char *ptr = (char*) fcb->trns_addr; + + switch (cmd) { + case 1: // read + for (int i = 0; i < fcb->trns_len; i++) + *(ptr++) = (char) SIO_ReadByte(); + + return fcb->trns_len; + + case 2: // write + for (int i = 0; i < fcb->trns_len; i++) { + while (!(SIO_STAT & (SR_TXRDY | SR_TXU))) + __asm__ volatile(""); + + SIO_TXRX = *(ptr++); + } + + return fcb->trns_len; + + default: + return 0; + } +} + +static int _sio_close(FCB *fcb) { + return 0; +} + +static int _sio_ioctl(FCB *fcb, int cmd, int arg) { + switch (cmd) { + case FIOCSCAN: + return SIO_ReadSync(1) ? 0 : -1; + + default: + return -1; + } +} + +static int _sio_dummy(void) { + return -1; +} + +static DCB _sio_dcb = { + .name = "tty", + .flags = 3, + .ssize = 1, + .desc = "PSXSIO SERIAL CONSOLE", + .f_init = &_sio_dummy, + .f_open = &_sio_open, + .f_inout = &_sio_inout, + .f_close = &_sio_close, + .f_ioctl = &_sio_ioctl, + .f_read = &_sio_dummy, + .f_write = &_sio_dummy, + .f_erase = &_sio_dummy, + .f_undelete = &_sio_dummy, + .f_firstfile = &_sio_dummy, + .f_nextfile = &_sio_dummy, + .f_format = &_sio_dummy, + .f_chdir = &_sio_dummy, + .f_rename = &_sio_dummy, + .f_remove = &_sio_dummy, + .f_testdevice = &_sio_dummy +}; + +/* Public API */ + +void AddSIO(int baud) { + SIO_Init(baud, MR_SB_01 | MR_CHLEN_8); + + close(0); + close(1); + DelDev(_sio_dcb.name); + AddDev(&_sio_dcb); + open(_sio_dcb.name, 2); + open(_sio_dcb.name, 1); +} + +void DelSIO(void) { + SIO_Quit(); + + DelDev(_sio_dcb.name); + AddDummyTty(); +} diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c index 7d90858..d1dabfe 100644 --- a/libpsn00b/psxspu/common.c +++ b/libpsn00b/psxspu/common.c @@ -4,10 +4,12 @@ */ #include <stdint.h> -#include <psxetc.h> +#include <assert.h> #include <psxspu.h> #include <hwregs_c.h> +#define _min(x, y) (((x) < (y)) ? (x) : (y)) + #define WRITABLE_AREA_ADDR 0x200 #define DMA_CHUNK_LENGTH 16 #define STATUS_TIMEOUT 0x100000 @@ -25,16 +27,16 @@ static void _wait_status(uint16_t mask, uint16_t value) { return; } - _sdk_log("psxspu: status register timeout (0x%04x)\n", SPU_STAT); + _sdk_log("status register timeout (0x%04x)\n", SPU_STAT); } -static void _dma_transfer(uint32_t *data, size_t length, int write) { +static size_t _dma_transfer(uint32_t *data, size_t length, int write) { if (length % 4) - _sdk_log("psxspu: can't transfer a number of bytes that isn't multiple of 4\n"); + _sdk_log("can't transfer a number of bytes that isn't multiple of 4\n"); length /= 4; if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } @@ -42,9 +44,11 @@ static void _dma_transfer(uint32_t *data, size_t length, int write) { _wait_status(0x0030, 0x0000); // Enable DMA request for writing (2) or reading (3) + uint16_t ctrl = write ? 0x0020 : 0x0030; + SPU_ADDR = _transfer_addr; - SPU_CTRL |= write ? 0x0020 : 0x0030; - _wait_status(0x0400, 0x0000); + SPU_CTRL |= ctrl; + _wait_status(0x0030, ctrl); DMA_MADR(4) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) @@ -53,6 +57,42 @@ static void _dma_transfer(uint32_t *data, size_t length, int write) { DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); DMA_CHCR(4) = 0x01000200 | write; + return length; +} + +static size_t _manual_write(const uint16_t *data, size_t length) { + if (length % 2) + _sdk_log("can't transfer a number of bytes that isn't multiple of 2\n"); + + length /= 2; + + SPU_CTRL &= 0xffcf; // Disable DMA request + _wait_status(0x0030, 0x0000); + + // Manual transfers have to be done by filling up the SPU's transfer buffer + // and then letting the SPU empty it one 64-byte chunk at a time. + uint16_t addr = _transfer_addr; + + while (length) { + size_t chunk = _min(DMA_CHUNK_LENGTH * 2, length); + length -= chunk; + + SPU_ADDR = addr; + addr += chunk / 4; + + for (; chunk; chunk--) + SPU_DATA = *(data++); + + SPU_CTRL |= 0x0010; // Manual transfer mode + _wait_status(0x0030, 0x0010); + _wait_status(0x0400, 0x0000); + + // This additional delay is required according to nocash docs. + for (int i = 0; i < 1000; i++) + __asm__ volatile(""); + } + + return length; } /* Public API */ @@ -65,10 +105,14 @@ void SpuInit(void) { SPU_MASTER_VOL_R = 0; SPU_REVERB_VOL_L = 0; SPU_REVERB_VOL_R = 0; - SPU_KEY_OFF = 0x00ffffff; - SPU_FM_MODE = 0; - SPU_NOISE_MODE = 0; - SPU_REVERB_ON = 0; + SPU_KEY_OFF1 = 0xffff; + SPU_KEY_OFF2 = 0x00ff; + SPU_FM_MODE1 = 0; + SPU_FM_MODE2 = 0; + SPU_NOISE_MODE1 = 0; + SPU_NOISE_MODE2 = 0; + SPU_REVERB_ON1 = 0; + SPU_REVERB_ON2 = 0; SPU_REVERB_ADDR = 0xfffe; SPU_CD_VOL_L = 0; SPU_CD_VOL_R = 0; @@ -78,17 +122,16 @@ void SpuInit(void) { DMA_DPCR |= 0x000b0000; // Enable DMA4 DMA_CHCR(4) = 0x00000201; // Stop DMA4 - SPU_CTRL = 0xc011; // Enable SPU, DAC, CD audio, set manual transfer mode - _wait_status(0x001f, 0x0011); + SPU_DMA_CTRL = 0x0004; // Reset transfer mode + SPU_CTRL = 0xc001; // Enable SPU, DAC, CD audio, disable DMA request + _wait_status(0x003f, 0x0001); - // Upload a dummy ADPCM block to the first 16 bytes of SPU RAM. This may be - // freely used or overwritten. - SPU_ADDR = WRITABLE_AREA_ADDR; - _wait_status(0x0400, 0x0000); + // Upload a dummy looping ADPCM block to the first 16 bytes of SPU RAM. + // This may be freely used or overwritten. + uint32_t block[4] = { 0x0500, 0, 0, 0 }; - SPU_DATA = 0x0500; - for (int i = 7; i; i--) - SPU_DATA = 0x0000; + _transfer_addr = WRITABLE_AREA_ADDR; + _manual_write((const uint16_t *) block, 16); // "Play" the dummy block on all channels. This will reset the start // address and ADSR envelope status of each channel. @@ -101,38 +144,34 @@ void SpuInit(void) { // Sony's implementation leaves everything muted, however it makes sense to // turn up at least the master and CD audio volume by default. - SPU_KEY_ON = 0x00ffffff; + SPU_KEY_ON1 = 0xffff; + SPU_KEY_ON2 = 0x00ff; SPU_MASTER_VOL_L = 0x3fff; SPU_MASTER_VOL_R = 0x3fff; SPU_CD_VOL_L = 0x7fff; SPU_CD_VOL_R = 0x7fff; } -void SpuRead(uint32_t *data, size_t size) { - _dma_transfer(data, size, 0); +size_t SpuRead(uint32_t *data, size_t size) { + return _dma_transfer(data, size, 0) * 4; } -void SpuWrite(const uint32_t *data, size_t size) { +size_t SpuWrite(const uint32_t *data, size_t size) { if (_transfer_addr < WRITABLE_AREA_ADDR) - return; + return 0; // I/O transfer mode is not that useful, but whatever. - if (_transfer_mode) { - SPU_ADDR = _transfer_addr; - SPU_CTRL = (SPU_CTRL & 0xffcf) | 0x0010; // Manual transfer mode - _wait_status(0x0400, 0x0000); - - for (int i = size; i; i -= 4) { - uint32_t value = *(data++); - - SPU_DATA = (uint16_t) value; - SPU_DATA = (uint16_t) (value >> 16); - } + if (_transfer_mode) + return _manual_write((const uint16_t *) data, size) * 2; + else + return _dma_transfer((uint32_t *) data, size, 1) * 4; +} - return; - } +size_t SpuWritePartly(const uint32_t *data, size_t size) { + size_t _size = SpuWrite(data, size); - _dma_transfer((uint32_t *) data, size, 1); + _transfer_addr += (_size + 1) / 2; + return _size; } SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) { |
