diff options
101 files changed, 3472 insertions, 2150 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71417c1..e84dbca 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,5 @@ # PSn00bSDK GitHub Actions CI script -# (C) 2021 spicyjpeg - MPL licensed +# (C) 2021-2023 spicyjpeg - MPL licensed # The GCC toolchain is stored in the GitHub Actions cache after being built. To # minimize build times, all the toolchain build steps are skipped if there is a @@ -10,9 +10,11 @@ name: Build PSn00bSDK on: [ push, pull_request ] env: - BINUTILS_VERSION: 2.39 - GCC_VERSION: 12.2.0 - GCC_TARGET: mipsel-none-elf + BINUTILS_VERSION: '2.40' + BINUTILS_OPTIONS: '--disable-docs --disable-nls --disable-werror --with-float=soft' + GCC_VERSION: '13.1.0' + GCC_OPTIONS: '--disable-docs --disable-nls --disable-werror --disable-libada --disable-libssp --disable-libquadmath --disable-threads --disable-libgomp --disable-libstdcxx-pch --disable-hosted-libstdcxx --enable-languages=c,c++ --without-isl --without-headers --with-float=soft --with-gnu-as --with-gnu-ld' + GCC_TARGET: 'mipsel-none-elf' jobs: # This is based on doc/toolchain.md, no surprises here other than the cache. @@ -52,7 +54,7 @@ jobs: run: | mkdir binutils_linux cd binutils_linux - ../binutils-${{ env.BINUTILS_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/linux --target=${{ env.GCC_TARGET }} --disable-docs --disable-nls --with-float=soft + ../binutils-${{ env.BINUTILS_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/linux --target=${{ env.GCC_TARGET }} ${{ env.BINUTILS_OPTIONS }} make -j 2 make install-strip echo "${{ github.workspace }}/gcc/linux/bin" >>$GITHUB_PATH @@ -62,7 +64,7 @@ jobs: run: | mkdir gcc_linux cd gcc_linux - ../gcc-${{ env.GCC_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/linux --target=${{ env.GCC_TARGET }} --disable-docs --disable-nls --disable-libada --disable-libssp --disable-libquadmath --disable-libstdc++-v3 --with-float=soft --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + ../gcc-${{ env.GCC_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/linux --target=${{ env.GCC_TARGET }} ${{ env.GCC_OPTIONS }} make -j 2 make install-strip @@ -71,7 +73,7 @@ jobs: run: | mkdir binutils_windows cd binutils_windows - ../binutils-${{ env.BINUTILS_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/windows --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 --target=${{ env.GCC_TARGET }} --disable-docs --disable-nls --with-float=soft + ../binutils-${{ env.BINUTILS_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/windows --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 --target=${{ env.GCC_TARGET }} ${{ env.BINUTILS_OPTIONS }} make -j 2 make install-strip @@ -80,7 +82,7 @@ jobs: run: | mkdir gcc_windows cd gcc_windows - ../gcc-${{ env.GCC_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/windows --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 --target=${{ env.GCC_TARGET }} --disable-docs --disable-nls --disable-libada --disable-libssp --disable-libquadmath --disable-libstdc++-v3 --with-float=soft --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + ../gcc-${{ env.GCC_VERSION }}/configure --prefix=${{ github.workspace }}/gcc/windows --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 --target=${{ env.GCC_TARGET }} ${{ env.GCC_OPTIONS }} make -j 2 make install-strip diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fa49db..971b71c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,63 @@ to ensure the changelog can be parsed correctly. ------------------------------------------------------------------------------- +## 2023-05-11: 0.23 + +- libc: Added some missing C++ STL headers (`cassert`, `cctype`, `cstdint`, + `cstdio`, `cstdlib`, `cstring`). + +- psxgpu: Added `SetDrawOpType()` and the `GPU_DrawOpType` enum for more + flexibility when using custom drawing queue callbacks with `EnqueueDrawOp()`. + Added GPU IRQ variants of all display list sending APIs (`DrawOTagIRQ()`, + `DrawOTagEnvIRQ()`, `DrawBufferIRQ()` and so on). + +- examples: Cleaned up `beginner/cppdemo` and updated it to use the new STL + headers. + +- docs: Added `drawing_queue.md`, a reference on the internals of the GPU + library's drawing queue implementation. + +## 2023-04-05 + +spicyjpeg: + +- libc: Fixed bugs in some string manipulation and number parsing functions. + Added `memccpy()`. Removed the `SYSTEM.CNF` argument parser as it would + interfere with manual argc/argv passing in some edge cases. + +- psxgpu: Added `IsIdleGPU()` and fixed a bug in `SetVideoMode()`. Added new + "tagless" primitive structures with the `_T` suffix (e.g. `POLY_FT4_T`, + `SPRT_T`, ...) and related macros (`setPolyFT4_T()`, `setSPRT_T()`, ...) that + can be used to group multiple primitives into a single display list packet + for better performance. Fixed some macros not working properly with negative + values. Renamed `DR_MASK` and `setDrawMask()` to `DR_STP` and `setDrawStp()` + respectively for consistency with the official SDK. + +- psxcd: Added `CdUnlock()`. Improved reliability of `CdGetRegion()` on early + Japanese console models. + +- psxetc: Added `SetDMAPriority()` and `GetDMAPriority()`. + +- psxapi: Added PCDRV (host file I/O) API, declared in the `psxsn.h` header. + +- psxpress: Fixed bugs in the MDEC API. Added support for bitstream version 3 + to the GTE-accelerated decoder. Replaced `DecDCTvlcCopyTable()` with + `DecDCTvlcCopyTableV2()` and `DecDCTvlcCopyTableV3()` for better control over + how much of the scratchpad is used for lookup tables. + +- examples: `mdec/strvideo` is now (finally) functional on real hardware and + can properly detect the end of a video file even if immediately followed by + another video on the disc. Added previously missing CD image dependencies to + the CMake scripts of the examples that make use of CD images. + +- tools: `elf2x` no longer adds region strings to converted executables, fixing + issues with DuckStation's region autodetection. Updated `mkpsxiso` to 2.03 in + order to fix a bug with its thread pool implementation on Linux. + +- Updated binutils and recompiled the GCC toolchain with the + `--disable-hosted-libstdcxx` option, allowing a subset of the C++ STL to be + used in PSn00bSDK projects. + ## 2022-12-18: 0.22 spicyjpeg: @@ -457,16 +514,16 @@ Lameguy64: - Libpsn00b: Added `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, `uint32_t` and `uint64_t` variable types in `sys/types.h`. - + - psxgte: Replaced unsigned int variable types with `u_long` to further improve compatibility with code written for the official Sony SDK and to make my tutorial examples easier to compile on PSn00bSDK. Example programs have been updated to account for this change. - + - psxcd: Changed type of 2nd argument of `CdRead()` from `u_int` to `u_long`, as well as changing the type of the size element in `CdlFILE` from `u_int` to `u_long`. - + ## 2021-02-17 Lameguy64: @@ -480,13 +537,13 @@ Lameguy64: - Fixed prefixes to allow SDK libraries and examples to be built with `mipsel-none-elf`. - + - examples: Fixed typo in `plasma_tbl.h` causing multiple definitions when compiling with newer versions of GCC in `n00bdemo`. - examples: `cartrom` example now marked as obsoleted, but still kept for reference purposes. - + - Includes alextrevisan's GTE macros in `inline_c.h`. - Added makefile template. @@ -520,7 +577,7 @@ Lameguy64: - examples: Removed redundant toolchain executable definitions in the makefiles. - + - examples: Included HDTV example for Github repo. ## 2020-09-19 @@ -532,7 +589,7 @@ Lameguy64: environment variable. Library installation and linking is also made easier with the `PSN00BSDK_LIBS` environment variable. See readme in the `libpsn00b` directory for details. - + - examples: Fixed libgcc not found error when compiling some of the examples. - libc: Added `strtok()`. @@ -540,7 +597,7 @@ Lameguy64: - libc: Added support for command line arguments. Pass arguments via `SYSTEM.CNF` `BOOT=` string or a string array with `Exec()`. Arguments can be read via `argc`/`argv[]` in `main()` or `__argc`/`__argv` anywhere else. - + - libc: Added `SetHeapSize()`. - psxgpu: Moved ISR and callback subsystem to `psxetc`. You'll have to link @@ -554,7 +611,7 @@ Lameguy64: - psxcd: Fixed crashing on PSIO and possibly some emulators by implementing a response buffer read limiter. - + - psxgpu: Interrupts are now disabled before setting up ISR and callbacks in `ResetGraph()`, as `LoadExec()` still has interrupts enabled when jumping to the loaded PS-EXE's entrypoint. Fixes programs made with PSn00bSDK crashing @@ -574,10 +631,10 @@ Lameguy64: - psxcd: Updated media change detection logic, media change is checked by lid open status bit in all CD-ROM file functions. `CdControl()` calls will also trigger the media change status on lid open. - + - psxcd: Fixed bug in `CdGetVolumeLabel()` where it constantly reparses the file system regardless of media change status. - + - examples: Updated `cdrom/cdbrowse` example slightly. - psxcd: Added `CdLoadSession()`. @@ -585,10 +642,10 @@ Lameguy64: - psxcd: Fixed bug where `CdReadDir()` locks up in an infinite loop when it encounters a NULL directory record, and the parser has not yet exceeded the length of the directory record. - + - doc: Replaced library version numbers with SVN revision numbers in the introduced fields. - + - doc: Started work on CD-ROM library overview. ## 2020-04-24 @@ -602,10 +659,10 @@ Lameguy64: - psxapi: Added BIOS `atoi()` and `atol()` calls. Temporary, may be replaced with a faster implementation. - + - psxsio: Added `ioctl()` support for `FIOCSCAN` to probe for pending input in serial tty driver. - + - examples: Reorganized examples, added new `tty` and `console` examples. ## 2020-03-11 @@ -637,7 +694,7 @@ Lameguy64: flag internal to the libraries when CD lid is opened, so file system functions can update the cached ISO descriptor when the disc has been changed. - + - psxcd: Made internal variables and functions for iso9660 parsing static. ## 2020-02-25 @@ -671,13 +728,13 @@ Lameguy64: - psxgpu: Added parenthesis to argument value in `setlen()`, `setaddr()` and `setcode()` macros, preventing `addPrims()` from being used in a more sensible manner (ie. `addPrims(ot, sub_ot+3, sub_ot)`). - + - examples: Added render2tex render to texture example. - psxspu: Fixed typo in `spuinit.s` on section specifier specifying a data section instead of text section, resulting to jump to non-instruction-aligned linker errors. - + - psxgpu: Increased ISR stack size to 2048 bytes. - psxsio: Added `kbhit()` to poll keyboard input asynchronously and stdin @@ -718,7 +775,7 @@ Lameguy64: - libc: Fixed negative integers not displaying properly in `vsprintf()`/`vsnprintf()`. - + - libc: Fixed zero padding not working in `vsprintf()`/`vsnprintf()`. - fpscam: Added debug text using `FntOpen()`, `FntPrint()` and `FntFlush()`. @@ -791,7 +848,7 @@ Lameguy64: - libc: Updated build method which takes `libgcc` from the compiler and adds its own object files into it, eliminating linker problems caused by having to order `libc` and `libgcc` libraries in a specific manner. - + - psxgpu: Added `RestartCallback()`. - psxgpu: Added `StoreImage()` function. @@ -810,11 +867,11 @@ Lameguy64: so its emulated like floats. `int64` still used for processing floats and doubles and old `vsprintf.c` file is still included for those who really want `int64` support for whatever reason. - + - libc: Removed `stdarg.h` which is part of GCC and not license compatible with MPL. The toolchain compiled with libgcc provides `stdarg.h` and other standard headers. - + - examples: Updated `sdk-common.mk` variable convention for better flexibility. - libpsn00b: Added `common.mk` file containing global values for all libraries. @@ -825,17 +882,17 @@ Lameguy64: to install due to `GetInterruptCallback()` retrieving the callback value immediately in the branch delay slot of a `jr` instruction, which resuls to an inconsistent return value. This also broke `DrawSyncCallback()`. - + - psxsio: Done fixes on `_sio_control()` from the aformentioned issues with load instructions in delay slots. - + - psxgte: Added `DVECTOR` struct. - psxgpu: Added `setLineF2()`, `setLineG2()`, `setLineF3()` and `setLineG3()` primitive macros. - + - Added more functions in documentation. - + ## 2019-07-01 williamblair: @@ -887,7 +944,7 @@ Lameguy64: - psxgpu: Implemented IRQ callback system with `InterruptCallback()` allowing to set interrupt callbacks very easily. - + - psxgpu: Implemented proper IRQ handler installed using HookEntryInt or `SetCustomExitFromException()` for handling VSync and other interrupts. `ChangeClearPAD(0)` must now be called after `_InitPad()` or vsync timeout @@ -897,7 +954,7 @@ Lameguy64: making them not appear in symbol lists resulting in a cleaner symbol dump. Still not possible to do function-scope local labels like in ASMPSX because GAS syntax is ASS (or ASS GAS which is farts, GAS is farts). - + - psxgpu: `DrawSync()` function now waits for DMA completion and GPU transfer ready instead of simply waiting for GPU transfer ready which is the likely cause of subtle GPU related timing issues, it also sets GPU DMA transfer diff --git a/CMakeLists.txt b/CMakeLists.txt index 19050f8..34afa52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ include(ExternalProject) project( PSn00bSDK LANGUAGES NONE - VERSION 0.22 + VERSION 0.23 DESCRIPTION "Open source PlayStation 1 SDK" HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk" ) @@ -1,85 +1,58 @@ # PSn00bSDK -PSn00bSDK is a 100% free and open source SDK project for the original Sony -PlayStation for developing homebrew applications and games for the console. -This SDK may be used for freeware, commercial, and open source homebrew -projects as far as what the SDK currently supports. Out of all the open -source PS1 SDK projects that have come and gone from active development -over the years, PSn00bSDK is arguably the most capable of them all. - -Much of the SDK is merely just a set of libraries (`libpsn00b`) and some -utilities for converting executables and data files to formats more usable -on the target platform. The compiler used is just the standard GNU GCC -toolchain compiled to target mipsel and has to be acquired separately. -The library API was deliberately written to resemble the library API of the -official libraries as closely as possible not only for familiarity reasons -to experienced programmers but also so that existing sample code and tutorials -that have been written over the years would still apply to this SDK, as well -as making the process of porting over existing homebrew originally made with -official SDKs easier with minimal modificationn provided they do not depend -on libgs. - -PSn00bSDK is currently a work in progress and cannot really be considered -production ready, but what is currently implemented should be enough to -produce some interesting homebrew with the SDK especially with its extensive -support for the GPU and GTE hardware. There's no reason not to fully support -hardware features of a target platform when said hardware features have been -fully documented for years (nocash's PSX specs document in this case). - -Most of `libpsn00b` is written mostly in MIPS assembly more so functions that -interface with the hardware. Many of the standard C functions are implemented -in custom MIPS assembly instead of equivalents found in the BIOS ROM, for both -stability (the BIOS `libc` implementation of the PlayStation is actually buggy) -and performance reasons. - - -## Notable features - -As of October 11, 2022: - -* Extensive GPU support with lines, flat shaded or textured polygon and sprite - primitives, high-speed DMA for VRAM transfers and ordering tables. All video - modes for both NTSC and PAL standards also supported with fully adjustable - display area and automatic video standard detection based on last GPU mode. +PSn00bSDK is an open source homebrew software development kit for the original +Sony PlayStation, consisting of a C/C++ compiler toolchain and a set of +libraries that provide a layer of abstraction over the raw hardware in order to +make game and app development easier. A CMake-based build system, CD-ROM image +packing tool (`mkpsxiso`) and asset conversion utilities are also provided. + +At the heart of PSn00bSDK is `libpsn00b`, a set of libraries that implements +most of the core functionality of the official Sony SDK (excluding higher-level +libraries) plus several new extensions to it. Most of the basic APIs commonly +used by homebrew apps and games built with the official SDK are available, +making PSn00bSDK a good starting point for those who have an existing codebase +but want to move away from Sony tools. + +Currently supported features include: + +* Full support for the GPU's functionality including all primitive types (lines, + polygons, sprites) as well DMA transfers managed through a software-driven + command queue that can optionally be extended with custom commands. Both NTSC + and PAL video modes are fully supported. * Extensive GTE support with rotate, translate, perspective correction and lighting calculation fully supported through C and/or assembly GTE macros paired with high speed matrix and vector helper functions. All calculations performed in fixed point integer math, not a single float used. -* Flexible interrupt service subsystem with easy to use callback mechanism for - simplified handling and hooking of hardware and DMA interrupts. No crude - event handler hooks or kernel hacks providing great compatibility with - HLE BIOS implementations and loader/menu type homebrew programs. +* BIOS-based interrupt dispatch system providing the ability to register custom + callbacks for all IRQs and DMA channels while preserving compatibility with + all functions provided by the BIOS. -* BIOS controller functions for polling controller input work as intended - thanks to proper handling of hardware interrupts. Optional limited support - for manual polling. +* Basic support for controller input through the BIOS, with optional limited + support for manual polling. -* Complete Serial I/O support and console driver to redirect standard input and - output to the serial port. Hardware flow control supported. +* Complete Serial I/O support with buffering and console driver to redirect BIOS + standard input and output to the serial port. Hardware flow control supported. -* Full CD-ROM support using `libpsxcd` featuring data reading, CD-DA and XA - audio playback, a built-in ISO9660 file system parser with no file count - limit and support for multi-session discs. +* CD-ROM support featuring asynchronous reading, CD-DA and XA-ADPCM audio + playback and a built-in ISO9660 file system parser with no file count limit. + Additional support for multi-session discs and bypassing region checks on + supported console models. -* MDEC support, lossy image decompression and video playback using - `libpsxpress` (currently only bitstream versions 1 and 2 are supported). +* Full MDEC support for hardware accelerated lossy image decompression and video + playback. * Preliminary limited support for Konami System 573 arcade hardware. -* Experimental support for dynamic linking at runtime, with support for - function and variable introspection by loading a map file generated at build - time. - -* Uses Sony SDK library syntax for familiarity to experienced programmers - and makes porting existing homebrew projects to PSn00bSDK easier. - -* Works on real hardware and most popular emulators. - -* Fully expandable and customizable to your heart's content. +* Experimental support for dynamic linking at runtime, including function and + variable introspection by loading a map file generated at build time. +Note that, while PSn00bSDK's API is to some extent compatible with the official +SDK's, the project is *not* meant to be a drop-in replacement for it, both +since reimplementing the entire SDK would be a major undertaking and because +many parts of it are inefficient, clunky and/or provide relatively little value. ## Obtaining PSn00bSDK @@ -97,27 +70,31 @@ used as a starting point. For more information on how to get started, or if you wish to build the SDK yourself from source instead, refer to [installation.md](doc/installation.md). +## Tutorials and examples -## Examples - -There are a few examples and complete source code of `n00bdemo` included in the -`examples` directory. More example programs may be added in the future and -contributed example programs are welcome. - -There's also [Lameguy's PlayStation Programming Tutorial Series](http://lameguy64.net/tutorials/pstutorials) -for learning how to program for the PlayStation. Much of the tutorials should -apply to PSn00bSDK. +The `examples` directory contains several example programs showcasing different +parts of the SDK (mostly graphics); the source code of `n00bdemo` can also be +found in the same directory. More example programs may be added in the future +and contributed example programs are welcome. +[Lameguy's PlayStation Programming Tutorial Series](http://lameguy64.net/tutorials/pstutorials) +was written with older versions of PSn00bSDK in mind and is outdated at this +point, but may still be useful as an introduction to the console's hardware and +the basics of the graphics and controller APIs. ## To-do List +* `libpsxgte`: Rewrite all assembly functions from scratch as parts of them have + been lifted as-is from Sony libraries. **PSn00bSDK is currently** (and + will probably always be) **in a legal gray area due to this.** + * `libpsxspu`: Plenty of work to be done. Some kind of MIDI sequencer (similar to the one present in the official SDK) should be added at some point, along with a proper API for audio streaming. * `libpsxcd`: Implement a command queue mechanism for the CD-ROM. -* `libpsxpress`: Add support for version 3 and IKI frame bitstreams. +* `libpsxpress`: Add an API for SPU-ADPCM audio encoding at runtime. * `libc`: Improve the memory allocation framework with multiple allocators, replace the string functions with optimized implementations and maybe add @@ -127,32 +104,33 @@ apply to PSn00bSDK. controller driver, and possibly a library for interfacing to IDE/ATAPI drives to make development for arcade systems easier. - ## Credits -Main developer/author/whatever: +Main developers/authors: * **Lameguy64** (John "Lameguy" Wilbert Villamor) +* **spicyjpeg** Contributors: -* **spicyjpeg**: dynamic linker, `libpsxpress`, CMake scripts, some docs and - examples. * **Silent**, **G4Vi**, **Chromaryu**: `mkpsxiso` and `dumpsxiso` (maintained as a [separate repo](https://github.com/Lameguy64/mkpsxiso)). Honorable mentions: +* **Soapy**: wrote the original version of the `inline_c.h` header containing + GTE macros. * **ijacquez**: helpful suggestions for getting C++ working. -* **Nicolas Noble**: his OpenBIOS project gave insight to how the BIOS works - internally. +* **Nicolas Noble**: author of the + [pcsx-redux](https://github.com/grumpycoders/pcsx-redux) emulator, OpenBIOS + and other projects which proved invaluable during development. Helpful contributors can be found in the changelog. References used: -* [nocash's PlayStation specs document](http://problemkaputt.de/psx-spx.htm) - and Nicolas Noble's [updated version](https://psx-spx.consoledev.net). +* [Martin Korth's psx-spx document](http://problemkaputt.de/psx-spx.htm) and the + [community-maintained version](https://psx-spx.consoledev.net). * MIPS and System V ABI specs (for the dynamic linker). * Tails92's PSXSDK project (during PSn00bSDK's infancy). diff --git a/doc/drawing_queue.md b/doc/drawing_queue.md new file mode 100644 index 0000000..4fa83f7 --- /dev/null +++ b/doc/drawing_queue.md @@ -0,0 +1,105 @@ + +# GPU drawing queue + +`libpsxgpu` manages access to the GPU by implementing a software driven queue. +This queue, separate from the GPU's internal command FIFO, allows for high-level +management of GPU operations such as display list sending, VRAM image uploads +and framebuffer readback, in a similar way to the drawing queue system +implemented behind the scenes by the official SDK. + +The queue is managed internally by the library and can hold up to 16 drawing +operations ("DrawOps"). Each DrawOp is represented by a pointer to a function, +alongside any arguments to be passed to it. Whenever the GPU is idle, +`libpsxgpu` fetches a DrawOp from the queue and calls its respective function, +which should then proceed to actually send commands to the GPU or set up and +start a DMA transfer. `DrawSync()` can be called to wait for the queue to become +empty or get its current length, while `DrawSyncCallback()` may be used to +register a callback that will be invoked once the GPU is idle and no more +DrawOps are pending. + +Completion of each DrawOp (and transition of the GPU from busy to idle state) is +signalled through one of two means: + +- the DMA channel 2 IRQ, fired automatically by the DMA unit when a data + transfer such as a VRAM upload or a display list has finished executing; +- the GPU IRQ, triggered manually using the `GP0(0x1f)` command or the `DR_IRQ` + primitive. + +Note that the end of a DMA transfer does not necessarily imply that the GPU has +finished executing all commands; the last command issued may not yet be done, +hence the ability to use the GPU IRQ instead is provided as a more reliable way +to detect the completion of certain commands. + +## Built-in DrawOps + +The library includes a number of built-in DrawOps for the most common use cases. +The following APIs are wrappers around DrawOps: + +- `DrawBuffer()` and `DrawBufferIRQ()` queue a new DrawOp to start a DMA + transfer in chunked mode (sending one word at a time) with the specified + starting address and number of words. `DrawBuffer2()` and `DrawBufferIRQ2()` + are the underlying DrawOp functions respectively. +- `DrawOTag()` and `DrawOTagIRQ()` queue a new DrawOp to start a DMA transfer in + linked-list mode with the specified starting address, with `DrawOTag2()` and + `DrawOTagIRQ2()` being the respective DrawOp functions. +- `PutDrawEnv()`, `PutDrawEnvFast()`, `DrawOTagEnv()` and `DrawOTagEnvIRQ()` + insert drawing environment setup commands as the first (or only) item in a + display list, then proceed to pass it to `DrawOTag()`. The setup packet + linked into the display list is stored as part of the `DRAWENV` structure. +- `LoadImage()` and `StoreImage()` copy the provided coordinates into a + temporary buffer, then proceed to enqueue a DrawOp to actually start the VRAM + transfer. The synchronous variants of these APIs are `LoadImage2()` and + `StoreImage2()` respectively. +- `MoveImage()` saves the provided coordinates into a temporary buffer, then + enqueues a DrawOp that will issue a `GP0(0x80)` VRAM blitting command. As + this command is handled entirely by the GPU with no DMA transfers involved, + the GPU IRQ is used to detect its completion. + +## Custom DrawOps + +Unlike the official SDK, `libpsxgpu` exposes the drawing queue by providing a +way to enqueue arbitrary custom DrawOps. This can be useful for profiling +purposes or to work around specific GPU bugs (see the use cases section). + +Custom DrawOps can be pushed into the queue by calling `EnqueueDrawOp()` and +passing a pointer to the callback function in charge of issuing the DrawOp's +commands to the GPU, as well as up to 3 arguments to be passed through to it. +The function must: + +- call `SetDrawOpType()` to let the library know which type of IRQ it shall wait + for before moving onto the next DrawOp (either `DRAWOP_TYPE_DMA` or + `DRAWOP_TYPE_GPU_IRQ`); +- wait until the GPU is ready to accept commands by polling the status bits in + `GPU_STAT` and make sure DMA channel 2 is also idle before proceeding; +- issue any commands to the GPU's GP0 register and/or set up a DMA transfer, + terminating them with a `GP0(0x1f)` IRQ command if appropriate. + +Note that DrawOps are called from within the exception handler's context and +must thus not block for significant periods of time, manipulate COP0 registers +or wait for any IRQs to occur. They are also restricted from manipulating the +drawing queue by e.g. calling `EnqueueDrawOp()`, `DrawOTag()` or any other +function that enqueues a DrawOp. + +## Use cases + +### Scissoring commands + +The GPU provides commands to set the origin of all X/Y coordinates passed to it +as well as a scissoring region, all pixels outside of which are automatically +masked out during drawing. These commands are issued to the GP0 register and can +be inserted in a display list through the `DR_OFFSET` and `DR_AREA` primitives, +however they will *not* go through the GPU's command FIFO like most other +primitives. They will instead take effect immediately, resulting in graphical +glitches if the GPU is already busy processing a drawing command (i.e. if they +are not the very first commands in a display list). + +The software-driven drawing queue provides a way around this. By splitting up a +frame's display list into multiple chunks, one for each scissoring command +issued, it is possible to always place scissoring commands at the beginning of a +chunk. Each chunk can be terminated with a `DR_IRQ` primitive and queued for +drawing using `DrawOTagIRQ()` to ensure the GPU goes idle before the next chunk +is sent, preventing scissoring commands from being received by the GPU while +busy. + +----------------------------------------- +_Last updated on 2023-05-11 by spicyjpeg_ diff --git a/doc/toolchain.md b/doc/toolchain.md index 8e28c24..9b65720 100644 --- a/doc/toolchain.md +++ b/doc/toolchain.md @@ -28,6 +28,7 @@ tested extensively: - ~~GCC 7.4.0 with binutils 2.31~~ (the linker fails to build PS1 DLLs) - GCC **11.1.0** with binutils **2.36** - GCC **11.2.0** with binutils **2.37** +- GCC **12.2.0** with binutils **2.40** If you wish to pick an older GCC release but don't know which binutils version it requires, see [here](https://wiki.osdev.org/Cross-Compiler_Successful_Builds) @@ -78,7 +79,7 @@ for a compatibility table. ```bash ../binutils-<VERSION>/configure \ --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --with-float=soft + --disable-docs --disable-nls --disable-werror --with-float=soft ``` Replace `<VERSION>` as usual. If you don't want to install the toolchain into @@ -117,9 +118,11 @@ options. ```bash ../gcc-<VERSION>/configure \ --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --disable-libada --disable-libssp \ - --disable-libquadmath --disable-libstdc++-v3 --with-float=soft \ - --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + --disable-docs --disable-nls --disable-werror --disable-libada \ + --disable-libssp --disable-libquadmath --disable-threads \ + --disable-libgomp --disable-libstdcxx-pch --disable-hosted-libstdcxx \ + --enable-languages=c,c++ --without-isl --without-headers \ + --with-float=soft --with-gnu-as --with-gnu-ld ``` If you previously set a custom installation path, remember to set it here as @@ -172,7 +175,7 @@ that runs on Windows. ../binutils-<VERSION>/configure \ --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 \ --prefix=/tmp/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --with-float=soft + --disable-docs --disable-nls --disable-werror --with-float=soft ``` Then build binutils again: @@ -187,10 +190,12 @@ that runs on Windows. ```bash ../gcc-<VERSION>/configure \ --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 \ - --prefix=/tmp/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --disable-libada --disable-libssp \ - --disable-libquadmath --disable-libstdc++-v3 --with-float=soft \ - --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ + --disable-docs --disable-nls --disable-werror --disable-libada \ + --disable-libssp --disable-libquadmath --disable-threads \ + --disable-libgomp --disable-libstdcxx-pch --disable-hosted-libstdcxx \ + --enable-languages=c,c++ --without-isl --without-headers \ + --with-float=soft --with-gnu-as --with-gnu-ld ``` And build it as usual: @@ -211,14 +216,17 @@ that runs on Windows. ## Note regarding C++ support -C++ support in PSn00bSDK, besides compile-time features like `constexpr`, only -goes as far as basic classes, namespaces and the ability to dynamically create -and delete class objects at any point of the program. The required dependencies -(which are just wrappers around `malloc()` and `free()`) are supplied by `libc`. +C++ support in PSn00bSDK is limited to the freestanding subset of the standard +library provided by GCC, which includes most metaprogramming and compile-time +utilities but not higher level functionality that requires runtime support such +as exceptions, containers or streams. Basic C++ features that only depend on the +compiler (classes, templates, `constexpr` and so on) are fully supported. -Standard C++ libraries are not implemented and likely never going to be -implemented due to bloat concerns that it may introduce. Besides, the official -SDK lacks full C++ support as well. +Implementing a full STL, while technically possible, is currently out of the +scope of PSn00bSDK. There are other PS1 SDKs that provide an STL, such as +[psyqo](https://github.com/grumpycoders/pcsx-redux/tree/main/src/mips/psyqo), +and they might be a better fit for your project if you plan to make heavy use of +C++ features. ----------------------------------------- -_Last updated on 2021-11-23 by spicyjpeg_ +_Last updated on 2023-04-05 by spicyjpeg_ diff --git a/examples/beginner/cppdemo/main.cpp b/examples/beginner/cppdemo/main.cpp index fd2e3a8..f55f952 100644 --- a/examples/beginner/cppdemo/main.cpp +++ b/examples/beginner/cppdemo/main.cpp @@ -1,157 +1,151 @@ -/* Work in progress example, need to add comments. +/* Work in progress example, need to add comments. * * Basically a quick little example that showcases C++ classes are * functioning in PSn00bSDK. - Lameguy64 * - * First written in December ‎18, ‎2020. + * First written in December 18, 2020. * * Changelog: * - * May 10, 2021 - Variable types updated for psxgpu.h changes. + * May 11, 2023 - Updated the example to use C++ standard library headers, + * renamed the class and cleaned up some methods. + * May 10, 2021 - Variable types updated for psxgpu.h changes. * */ - -#include <sys/types.h> -#include <stdio.h> -#include <stdlib.h> + +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstdlib> #include <psxgte.h> #include <psxgpu.h> -class GraphClass -{ - u_long *_ot[2]; - u_char *_pri[2]; - u_char *_nextpri; +/* Example class */ + +class RenderContext { +private: + std::uint32_t *_ot[2]; + std::uint8_t *_pri[2]; + std::uint8_t *_nextpri; + + int _ot_count, _db; - int _ot_count; - int _db; - - DISPENV _disp[2]; + DISPENV _disp[2]; DRAWENV _draw[2]; - + public: + RenderContext(std::size_t ot_len = 8, std::size_t pri_len = 8192); + ~RenderContext(void); + void setupBuffers(int w, int h, int r, int g, int b); + void flip(void); + + template<typename T> inline T *addPrimitive(void) { + auto pri = reinterpret_cast<T *>(_nextpri); + addPrim(_ot[_db], pri); + + _nextpri += sizeof(T); + return pri; + } +}; + +RenderContext::RenderContext(std::size_t ot_len, std::size_t pri_len) { + _ot[0] = new std::uint32_t[ot_len]; + _ot[1] = new std::uint32_t[ot_len]; + + _db = 0; + _ot_count = ot_len; + ClearOTagR(_ot[0], _ot_count); + ClearOTagR(_ot[1], _ot_count); + + _pri[0] = new std::uint8_t[pri_len]; + _pri[1] = new std::uint8_t[pri_len]; + + _nextpri = _pri[0]; + + std::printf("RenderContext::RenderContext: Buffers allocated.\n"); +} + +RenderContext::~RenderContext(void) { + delete[] _ot[0]; + delete[] _ot[1]; + + delete[] _pri[0]; + delete[] _pri[1]; + + std::printf("RenderContext::RenderContext: Buffers freed.\n"); +} + +void RenderContext::setupBuffers(int w, int h, int r, int g, int b) { + SetDefDispEnv(&_disp[0], 0, h, w, h); + SetDefDispEnv(&_disp[1], 0, 0, w, h); + SetDefDrawEnv(&_draw[0], 0, 0, w, h); + SetDefDrawEnv(&_draw[1], 0, h, w, h); + + setRGB0(&_draw[0], r, g, b); + _draw[0].isbg = 1; + _draw[0].dtd = 1; - GraphClass( int ot_len = 8, int pri_len = 8192 ) - { - _ot[0] = (u_long*)malloc( sizeof(u_long)*ot_len ); - _ot[1] = (u_long*)malloc( sizeof(u_long)*ot_len ); - - _db = 0; - _ot_count = ot_len; - ClearOTagR( _ot[0], _ot_count ); - ClearOTagR( _ot[1], _ot_count ); - - _pri[0] = (u_char*)malloc( pri_len ); - _pri[1] = (u_char*)malloc( pri_len ); - - _nextpri = _pri[0]; - - printf( "GraphClass::GraphClass: Buffers allocated.\n" ); - - } /* GraphClass */ - - virtual ~GraphClass() - { - /* free the OTs and primitive buffers */ - free( _ot[0] ); - free( _ot[1] ); - - free( _pri[0] ); - free( _pri[1] ); - - printf( "GraphClass::GraphClass: Buffers freed.\n" ); - - } /* ~GraphClass */ - - void SetRes( int w, int h ) - { - SetDefDispEnv( &_disp[0], 0, h, w, h ); - SetDefDispEnv( &_disp[1], 0, 0, w, h ); - - SetDefDrawEnv( &_draw[0], 0, 0, w, h ); - SetDefDrawEnv( &_draw[1], 0, h, w, h ); - - setRGB0( &_draw[0], 63, 0, 127 ); - _draw[0].isbg = 1; - _draw[0].dtd = 1; - setRGB0( &_draw[1], 63, 0, 127 ); - _draw[1].isbg = 1; - _draw[1].dtd = 1; - - PutDispEnv( &_disp[0] ); - PutDrawEnv( &_draw[0] ); - - } /* SetRes */ - - void IncPri( int bytes ) - { - _nextpri += bytes; - - } /* IncPri */ - - void SetPri( u_char *ptr ) - { - _nextpri = ptr; - - } /* SetPri */ - - u_char *GetNextPri( void ) - { - return( _nextpri ); - - } /* GetNextPri */ - - u_long *GetOt( void ) - { - return( _ot[_db] ); - - } /* GetOt */ - - void Display( void ) - { - VSync( 0 ); - DrawSync( 0 ); - SetDispMask( 1 ); - - _db = !_db; - - PutDispEnv( &_disp[_db] ); - PutDrawEnv( &_draw[_db] ); - - DrawOTag( _ot[!_db]+(_ot_count-1) ); - - ClearOTagR( _ot[_db], _ot_count ); - _nextpri = _pri[_db]; - - } /* Display */ - -}; /* GraphClass */ - -GraphClass *otable; - -int main( int argc, const char *argv[] ) -{ - TILE *tile; - - ResetGraph( 0 ); - - otable = new GraphClass(); - - otable->SetRes( 320, 240 ); - - while( 1 ) - { - tile = (TILE*)otable->GetNextPri(); - setTile( tile ); - setXY0( tile, 32, 32 ); - setWH( tile, 128, 128 ); - setRGB0( tile, 255, 255, 0 ); - addPrim( otable->GetOt(), tile ); - otable->IncPri( sizeof(TILE) ); - - otable->Display(); + setRGB0(&_draw[1], r, g, b); + _draw[1].isbg = 1; + _draw[1].dtd = 1; + + PutDispEnv(&_disp[0]); + PutDrawEnv(&_draw[0]); +} + +void RenderContext::flip(void) { + DrawSync(0); + VSync(0); + + _db ^= 1; + + PutDispEnv(&_disp[_db]); + PutDrawEnv(&_draw[_db]); + + DrawOTag(_ot[_db ^ 1] + _ot_count - 1); + ClearOTagR(_ot[_db], _ot_count); + + _nextpri = _pri[_db]; +} + +/* Main */ + +static constexpr int SCREEN_XRES = 320; +static constexpr int SCREEN_YRES = 240; + +static constexpr int BGCOLOR_R = 63; +static constexpr int BGCOLOR_G = 0; +static constexpr int BGCOLOR_B = 127; + +int main(int argc, const char **argv) { + ResetGraph(0); + SetDispMask(1); + + RenderContext ctx; + ctx.setupBuffers(SCREEN_XRES, SCREEN_YRES, BGCOLOR_R, BGCOLOR_G, BGCOLOR_B); + + int x = 0, y = 0; + int dx = 1, dy = 1; + + for (;;) { + // Update the position and velocity of the bouncing square. + if (x < 0 || x > (SCREEN_XRES - 64)) + dx = -dx; + if (y < 0 || y > (SCREEN_YRES - 64)) + dy = -dy; + + x += dx; + y += dy; + + // Draw the square. + auto tile = ctx.addPrimitive<TILE>(); + setTile(tile); + setXY0 (tile, x, y); + setWH (tile, 64, 64); + setRGB0(tile, 255, 255, 0); + + ctx.flip(); } - - return( 0 ); - -} /* main */
\ No newline at end of file + + return 0; +} diff --git a/examples/cdrom/cdbrowse/CMakeLists.txt b/examples/cdrom/cdbrowse/CMakeLists.txt index 0cc091f..70a4585 100644 --- a/examples/cdrom/cdbrowse/CMakeLists.txt +++ b/examples/cdrom/cdbrowse/CMakeLists.txt @@ -13,7 +13,10 @@ project( file(GLOB _sources *.c) psn00bsdk_add_executable(cdbrowse GPREL ${_sources}) -psn00bsdk_add_cd_image(cdbrowse_iso cdbrowse iso.xml DEPENDS cdbrowse) +psn00bsdk_add_cd_image( + cdbrowse_iso cdbrowse iso.xml + DEPENDS cdbrowse system.cnf +) psn00bsdk_target_incbin(cdbrowse PRIVATE ball16c ball16c.tim) diff --git a/examples/cdrom/cdbrowse/iso.xml b/examples/cdrom/cdbrowse/iso.xml index 771b0e9..f1c00f7 100644 --- a/examples/cdrom/cdbrowse/iso.xml +++ b/examples/cdrom/cdbrowse/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" diff --git a/examples/cdrom/cdxa/CMakeLists.txt b/examples/cdrom/cdxa/CMakeLists.txt index fd2f653..07e6ae4 100644 --- a/examples/cdrom/cdxa/CMakeLists.txt +++ b/examples/cdrom/cdxa/CMakeLists.txt @@ -11,10 +11,12 @@ project( HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk" ) -# TODO: add rules to actually generate a valid .XA file file(GLOB _sources *.c) psn00bsdk_add_executable(cdxa GPREL ${_sources}) -#psn00bsdk_add_cd_image(cdxa_iso cdxa iso.xml DEPENDS cdxa) +#psn00bsdk_add_cd_image( + #cdxa_iso cdxa iso.xml + #DEPENDS cdxa system.cnf xasample.xa +#) psn00bsdk_target_incbin(cdxa PRIVATE ball16c ball16c.tim) diff --git a/examples/cdrom/cdxa/iso.xml b/examples/cdrom/cdxa/iso.xml index 6715f94..f935d26 100644 --- a/examples/cdrom/cdxa/iso.xml +++ b/examples/cdrom/cdxa/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" diff --git a/examples/demos/n00bdemo/main.c b/examples/demos/n00bdemo/main.c index 6d0be3c..55dbbc4 100644 --- a/examples/demos/n00bdemo/main.c +++ b/examples/demos/n00bdemo/main.c @@ -358,10 +358,9 @@ void stencilstuff() { /* The stencil demo is achieved by utilizing the mask bit setting primitive GP0(E6h). The structure of this primitive is defined as - DR_MASK initialized and set by setDrawMask(). These are not available - in Sony's SDK by default. + DR_STP initialized and set by setDrawStp(). - The DR_MASK primitive controls mask bit operations for drawing + The DR_STP primitive controls mask bit operations for drawing primitives such as setting mask bits on every pixel drawn or mask bit test where pixels won't be drawn on pixels with the mask bit set. It applies to most graphics drawing primitives except VRAM fill. @@ -373,10 +372,10 @@ void stencilstuff() { bit operation disabled. The stencil effect featured in this demo is achieved by enabling set - mask bit with DR_MASK, drawing semi-transparent primitives using + mask bit with DR_STP, drawing semi-transparent primitives using additive blending but color is all zero to make it completely invisible but is enough to update the mask bits, disable mask set bit but enable - mask test with DR_MASK and then drawing a rectangle that fills the + mask test with DR_STP and then drawing a rectangle that fills the entire screen. Semi-transparency mask in textures must not be used when drawing the scene that will be 'below' the mask layer. */ @@ -384,7 +383,7 @@ void stencilstuff() { int spin=0; - DR_MASK *mask; + DR_STP *mask; TILE *rect; SC_OT s_ot; @@ -430,10 +429,10 @@ void stencilstuff() { // Sort mask primitive that enables setting mask bits - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 1, 0 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 1, 0 ); addPrim( ot[db]+20, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort the stars @@ -465,10 +464,10 @@ void stencilstuff() { // Sort mask primitive that enables mask bit test - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 0, 1 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 0, 1 ); addPrim( ot[db]+18, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort rectangle that fills the screen @@ -482,10 +481,10 @@ void stencilstuff() { // Clear all mask settings - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 0, 0 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 0, 0 ); addPrim( ot[db]+15, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort overlay then display @@ -624,7 +623,7 @@ void plasmastuff() { // Simple stripe transition effect void transition() { - int i,count,comp; + int count = 0; int bheight[16] = { 0 }; TILE *tile = (TILE*)nextpri; @@ -632,9 +631,9 @@ void transition() { while( 1 ) { - comp = 0; + int comp = 0; - for( i=0; i<16; i++ ) { + for( int i=0; i<16; i++ ) { if( bheight[i] > 0 ) { @@ -657,19 +656,11 @@ void transition() { if( bheight[count>>1] == 0 ) bheight[count>>1] = 1; - display(); count++; if( comp >= 16 ) break; - - /* - I haven't yet managed to figure out why this loop hangs on no$psx - if I comment out this completely useless call to puts(). Some - alignment or timing crap perhaps? -- spicyjpeg - */ - puts("."); } DrawSync(0); diff --git a/examples/graphics/billboard/billboard.c b/examples/graphics/billboard/billboard.c index ea98b28..1ddc4dc 100644 --- a/examples/graphics/billboard/billboard.c +++ b/examples/graphics/billboard/billboard.c @@ -152,7 +152,7 @@ int main() { setRGB0(quad, 128, 128, 128); // Set tpage - quad->tpage = getTPage(tim.mode&0x8, 0, tim.prect->x, tim.prect->y); + quad->tpage = getTPage(tim.mode, 0, tim.prect->x, tim.prect->y); // Set CLUT setClut(quad, tim.crect->x, tim.crect->y); diff --git a/examples/io/system573/iso.xml b/examples/io/system573/iso.xml index 2226089..d22665c 100644 --- a/examples/io/system573/iso.xml +++ b/examples/io/system573/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" @@ -19,9 +16,9 @@ The System 573 BIOS does not parse SYSTEM.CNF, it's instead hardcoded to look for an executable named PSX.EXE. Some modded or hacked BIOS variants may instead look for slightly altered - file names (QSY.DXD, SSW.BXF, TSV.AXG) as an obfuscation - measure, so it's recommended to have multiple copies of the - executable on the disc. + file names (QSY.DXD, SSW.BXF, TSV.AXG, GSE.NXX, NSE.GXX) as an + obfuscation measure, so it's recommended to have multiple + copies of the executable on the disc. Note that this behavior can be abused to make multi-system CDs with different executables for PS1 and 573 (i.e. have both @@ -33,6 +30,8 @@ <file name="QSY.DXD" type="data" source="system573.exe" /> <file name="SSW.BXF" type="data" source="system573.exe" /> <file name="TSV.AXG" type="data" source="system573.exe" /> + <file name="GSE.NXX" type="data" source="system573.exe" /> + <file name="NSE.GXX" type="data" source="system573.exe" /> <dummy sectors="1024"/> </directory_tree> diff --git a/examples/mdec/strvideo/CMakeLists.txt b/examples/mdec/strvideo/CMakeLists.txt index d41556b..f6695c0 100644 --- a/examples/mdec/strvideo/CMakeLists.txt +++ b/examples/mdec/strvideo/CMakeLists.txt @@ -13,7 +13,10 @@ project( file(GLOB _sources *.c) psn00bsdk_add_executable(strvideo GPREL ${_sources}) -#psn00bsdk_add_cd_image(strvideo_iso strvideo iso.xml DEPENDS strvideo) +#psn00bsdk_add_cd_image( + #strvideo_iso strvideo iso.xml + #DEPENDS strvideo system.cnf video.str +#) install( FILES diff --git a/examples/mdec/strvideo/iso.xml b/examples/mdec/strvideo/iso.xml index 65e0ff5..8ba67dd 100644 --- a/examples/mdec/strvideo/iso.xml +++ b/examples/mdec/strvideo/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" diff --git a/examples/mdec/strvideo/main.c b/examples/mdec/strvideo/main.c index 28d39b2..853e0c2 100644 --- a/examples/mdec/strvideo/main.c +++ b/examples/mdec/strvideo/main.c @@ -1,6 +1,6 @@ /* * PSn00bSDK .STR FMV playback example - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * * This example demonstrates playback of full-motion video in the standard .STR * format, using the MDEC for frame decoding and XA for audio. Decoded frames @@ -34,9 +34,10 @@ * Playback is stopped once the .STR header is no longer present in sectors * read. * - * Note that PSn00bSDK's bitstream decoding API only supports version 1 and 2 - * bitstreams currently, so make sure your .STR files are encoded as v2 and not - * v3. + * PSn00bSDK's bitstream decoding API supports both version 2 and 3 bitstreams. + * Encoding your .STR files as v3 may result in slightly higher quality + * depending on the encoder, but also higher CPU usage during playback compared + * to the older v2. */ #include <stdint.h> @@ -102,13 +103,12 @@ void init_context(RenderContext *ctx) { FntOpen(4, 12, 312, 16, 2, 256); } -void display(RenderContext *ctx, int sync) { +void display(RenderContext *ctx) { Framebuffer *db; ctx->db_active ^= 1; DrawSync(0); - if (sync) - VSync(0); + //VSync(0); db = &(ctx->db[ctx->db_active]); PutDrawEnv(&(db->draw)); @@ -163,13 +163,13 @@ typedef struct { volatile int8_t cur_frame, cur_slice; } StreamContext; -StreamContext str_ctx; +static StreamContext str_ctx; // This buffer is used by cd_sector_handler() as a temporary area for sectors // read from the CD. Due to DMA limitations it can't be allocated on the stack // (especially not in the interrupt callbacks' stack, whose size is very // limited). -STR_Header sector_header; +static STR_Header sector_header; void cd_sector_handler(void) { StreamBuffer *frame = &str_ctx.frames[str_ctx.cur_frame]; @@ -189,8 +189,15 @@ void cd_sector_handler(void) { return; // If this sector is actually part of a new frame, validate the sectors - // that have been read so far and flip the bitstream data buffers. - if (sector_header.frame_id != str_ctx.frame_id) { + // that have been read so far and flip the bitstream data buffers. If the + // frame number is actually lower than the current one, assume the drive + // has started reading another .STR file and stop playback. + if ((int) sector_header.frame_id < str_ctx.frame_id) { + str_ctx.frame_ready = -1; + return; + } + + if ((int) sector_header.frame_id > str_ctx.frame_id) { // Do not set the ready flag if any sector has been missed. if (str_ctx.sector_count) str_ctx.dropped_frames++; @@ -263,12 +270,10 @@ void init_stream(void) { CdReadyCallback(&cd_event_handler); ExitCriticalSection(); - // Set the maximum amount of data DecDCTvlc() can output and copy the - // lookup table used for decompression to the scratchpad area. This is - // optional but makes the decompressor slightly faster. See the libpsxpress - // documentation for more details. - DecDCTvlcSize(0x8000); - DecDCTvlcCopyTable((DECDCTTAB *) 0x1f800000); + // Copy the lookup table used for frame decompression to the scratchpad + // area. This is optional but makes the decompressor slightly faster. See + // the libpsxpress documentation for more details. + DecDCTvlcCopyTableV3((VLC_TableV3 *) 0x1f800000); str_ctx.cur_frame = 0; str_ctx.cur_slice = 0; @@ -309,7 +314,7 @@ void start_stream(CdlFILE *file) { static RenderContext ctx; -#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx, 1); } +#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx); } #define SHOW_ERROR(...) { SHOW_STATUS(__VA_ARGS__); while (1) __asm__("nop"); } int main(int argc, const char* argv[]) { @@ -318,7 +323,7 @@ int main(int argc, const char* argv[]) { SHOW_STATUS("INITIALIZING\n"); SpuInit(); CdInit(); - InitGeom(); // Required for PSn00bSDK's DecDCTvlc() + InitGeom(); // GTE initialization required by the VLC decompressor DecDCTReset(0); SHOW_STATUS("OPENING VIDEO FILE\n"); @@ -330,8 +335,9 @@ int main(int argc, const char* argv[]) { init_stream(); start_stream(&file); - // Disable framebuffer clearing to get rid of flickering during playback. - display(&ctx, 1); + // Clear the screen, then disable framebuffer clearing to get rid of + // flickering during playback. + display(&ctx); ctx.db[0].draw.isbg = 0; ctx.db[1].draw.isbg = 0; #ifdef DISP_24BPP @@ -339,9 +345,13 @@ int main(int argc, const char* argv[]) { ctx.db[1].disp.isrgb24 = 1; #endif - int decode_errors = 0; + int frame_time = 1, decode_errors = 0; while (1) { +#ifdef DRAW_OVERLAY + int frame_start = TIMER_VALUE(1); +#endif + // Wait for a full frame to be read from the disc and decompress the // bitstream into the format expected by the MDEC. If the video has // ended, restart playback from the beginning. @@ -355,38 +365,45 @@ int main(int argc, const char* argv[]) { } #ifdef DRAW_OVERLAY - // Measure CPU usage of the decompressor using the hblank counter. - int total_time = TIMER_VALUE(1) + 1; - TIMER_VALUE(1) = 0; + int decode_time = TIMER_VALUE(1); #endif - if (DecDCTvlc(frame->bs_data, frame->mdec_data)) { + VLC_Context vlc_ctx; + if (DecDCTvlcStart( + &vlc_ctx, + frame->mdec_data, + sizeof(frame->mdec_data) / 4, + frame->bs_data + )) { decode_errors++; continue; } #ifdef DRAW_OVERLAY - int cpu_usage = TIMER_VALUE(1) * 100 / total_time; + // Calculate CPU usage of the decompressor. + decode_time = (TIMER_VALUE(1) - decode_time) & 0xffff; + int cpu_usage = decode_time * 100 / frame_time; #endif // Wait for the MDEC to finish decoding the previous frame, then flip // the framebuffers to display it and prepare the buffer for the next // frame. - // NOTE: you should *not* call VSync(0) during playback, as the refresh - // rate of the GPU is not synced to the video's frame rate. If you want - // to minimize screen tearing, consider triple buffering instead (i.e. - // always keep 2 fully decoded frames in VRAM and use VSyncCallback() - // to register a function that displays the next decoded frame whenever - // vblank occurs). + // NOTE: as the refresh rate of the GPU is not synced to the video's + // frame rate, this VSync(0) call may potentially end up waiting too + // long and desynchronizing playback. A better solution would be to + // implement triple buffering (i.e. always keep 2 fully decoded frames + // in VRAM and use VSyncCallback() to register a function that displays + // the next decoded frame if available whenever vblank occurs). + VSync(0); DecDCTinSync(0); DecDCToutSync(0); #ifdef DRAW_OVERLAY - FntPrint(-1, "FRAME:%5d READ ERRORS: %5d\n", str_ctx.frame_id, str_ctx.dropped_frames); - FntPrint(-1, "CPU: %5d%% DECODE ERRORS:%5d\n", cpu_usage, decode_errors); + FntPrint(-1, "FRAME:%6d READ ERRORS: %6d\n", str_ctx.frame_id, str_ctx.dropped_frames); + FntPrint(-1, "CPU: %6d%% DECODE ERRORS:%6d\n", cpu_usage, decode_errors); FntFlush(-1); #endif - display(&ctx, 0); + display(&ctx); // Feed the newly decompressed frame to the MDEC. The MDEC will not // actually start decoding it until an output buffer is also configured @@ -414,6 +431,10 @@ int main(int argc, const char* argv[]) { str_ctx.slices[str_ctx.cur_slice], BLOCK_SIZE * str_ctx.slice_pos.h / 2 ); + +#ifdef DRAW_OVERLAY + frame_time = (TIMER_VALUE(1) - frame_start) & 0xffff; +#endif } return 0; diff --git a/examples/sound/cdstream/CMakeLists.txt b/examples/sound/cdstream/CMakeLists.txt index e569449..d04761c 100644 --- a/examples/sound/cdstream/CMakeLists.txt +++ b/examples/sound/cdstream/CMakeLists.txt @@ -13,7 +13,10 @@ project( file(GLOB _sources *.c) psn00bsdk_add_executable(cdstream GPREL ${_sources}) -psn00bsdk_add_cd_image(cdstream_iso cdstream iso.xml DEPENDS cdstream) +psn00bsdk_add_cd_image( + cdstream_iso cdstream iso.xml + DEPENDS cdstream system.cnf stream.vag +) install( FILES diff --git a/examples/sound/cdstream/iso.xml b/examples/sound/cdstream/iso.xml index 66f1f74..10128eb 100644 --- a/examples/sound/cdstream/iso.xml +++ b/examples/sound/cdstream/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" diff --git a/examples/sound/cdstream/main.c b/examples/sound/cdstream/main.c index 324abb2..53b88e6 100644 --- a/examples/sound/cdstream/main.c +++ b/examples/sound/cdstream/main.c @@ -212,7 +212,7 @@ void spu_irq_handler(void) { // if str_ctx.state is set to STATE_DATA_NEEDED and fetch the next chunk. } -void cd_read_handler(int event, uint8_t *payload) { +void cd_read_handler(CdlIntrResult event, uint8_t *payload) { // Attempt to read the chunk again if an error has occurred, otherwise // start uploading it to SPU RAM. if (event == CdlDiskError) { diff --git a/examples/system/childexec/child/child.c b/examples/system/childexec/child/child.c index dcfbfaf..e5e16b9 100644 --- a/examples/system/childexec/child/child.c +++ b/examples/system/childexec/child/child.c @@ -1,5 +1,6 @@ #include <stdint.h> #include <stdio.h> +#include <psxetc.h> #include <psxapi.h> #include <psxgpu.h> #include <psxgte.h> @@ -238,11 +239,12 @@ int main(int argc, const char *argv[]) { display(); } - + + DrawSync(0); StopPAD(); - + StopCallback(); + return 0; - } void init(void) { diff --git a/examples/system/childexec/parent.c b/examples/system/childexec/parent.c index 83d964c..cfed11c 100644 --- a/examples/system/childexec/parent.c +++ b/examples/system/childexec/parent.c @@ -273,7 +273,7 @@ extern char child_exe[]; void run_child(void) { // Arguments for the child program - char *args[] = + const char *args[] = { "SAMPLE=0", "SESSION=1", @@ -285,31 +285,27 @@ void run_child(void) { // Copy child executable to its intended adddress memcpy((void*)exe->param.t_addr, child_exe+2048, exe->param.t_size); - - // Prepare for program execution and disable interrupts - //EnterCriticalSection(); - StopCallback(); - // Stop pads, enable auto acknowledge + // Prepare for program execution and disable interrupts + DrawSync(0); StopPAD(); - ChangeClearPAD(1); - ChangeClearRCnt(3, 1); + StopCallback(); + FlushCache(); // Execute child - printf("Child exec!\n"); + printf("Executing child...\n"); Exec(&exe->param, 3, args); - + // Restore interrupts for this PS-EXE RestartCallback(); - //ExitCriticalSection(); - + printf("Child returned\n"); + // Re-init and re-enable pads InitPAD(pad_buff[0], 34, pad_buff[1], 34); StartPAD(); ChangeClearPAD(0); - + // Set this program's display mode SetDispMask(0); PutDispEnv(&disp); - } diff --git a/examples/system/console/main.c b/examples/system/console/main.c index b4f91b4..845ca95 100644 --- a/examples/system/console/main.c +++ b/examples/system/console/main.c @@ -25,7 +25,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ioctl.h> +#include <sys/ioctl.h> #include <sys/fcntl.h> #include <psxapi.h> #include <psxetc.h> diff --git a/examples/system/dynlink/CMakeLists.txt b/examples/system/dynlink/CMakeLists.txt index e750fd1..a1fd24e 100644 --- a/examples/system/dynlink/CMakeLists.txt +++ b/examples/system/dynlink/CMakeLists.txt @@ -17,7 +17,7 @@ psn00bsdk_add_library (dynlink_cube SHARED library/cube.c) psn00bsdk_add_library (dynlink_balls SHARED library/balls.c) psn00bsdk_add_cd_image( dynlink_iso dynlink iso.xml - DEPENDS dynlink_main dynlink_cube dynlink_balls + DEPENDS dynlink_main dynlink_cube dynlink_balls system.cnf ) psn00bsdk_target_incbin(dynlink_balls PRIVATE ball16c library/ball16c.tim) diff --git a/examples/system/dynlink/iso.xml b/examples/system/dynlink/iso.xml index 8f40510..93cb948 100644 --- a/examples/system/dynlink/iso.xml +++ b/examples/system/dynlink/iso.xml @@ -1,8 +1,5 @@ <?xml version="1.0" encoding="utf-8"?> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <identifiers system ="PLAYSTATION" diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake index 9e7a4d3..e34ff63 100644 --- a/libpsn00b/cmake/internal_setup.cmake +++ b/libpsn00b/cmake/internal_setup.cmake @@ -280,23 +280,22 @@ function(psn00bsdk_add_cd_image name image_name config_file) cmake_path(HASH config_file _hash) - set(CD_IMAGE_NAME ${image_name}) - set(CD_CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml) - configure_file("${config_file}" ${CD_CONFIG_FILE}) + set(_xml_file ${CMAKE_CURRENT_BINARY_DIR}/cd_image_${_hash}.xml) + configure_file("${config_file}" ${_xml_file}) add_custom_command( - OUTPUT ${CD_IMAGE_NAME}.bin ${CD_IMAGE_NAME}.cue + OUTPUT ${image_name}.bin ${image_name}.cue COMMAND ${MKPSXISO} -y - -o ${CD_IMAGE_NAME}.bin -c ${CD_IMAGE_NAME}.cue ${CD_CONFIG_FILE} - COMMENT "Building CD image ${CD_IMAGE_NAME}" + -o ${image_name}.bin -c ${image_name}.cue ${_xml_file} + COMMENT "Building CD image ${image_name}" VERBATIM ${ARGN} ) add_custom_target( ${name} ALL DEPENDS - ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.bin - ${CMAKE_CURRENT_BINARY_DIR}/${CD_IMAGE_NAME}.cue + ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.bin + ${CMAKE_CURRENT_BINARY_DIR}/${image_name}.cue ) endfunction() diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h index 1b2bda2..8f8df74 100644 --- a/libpsn00b/include/assert.h +++ b/libpsn00b/include/assert.h @@ -1,13 +1,12 @@ /* * PSn00bSDK assert macro and internal logging - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * - * Note that the _sdk_log() macro is used internally by PSn00bSDK to output - * debug messages and warnings. + * The _sdk_*() macros are used internally by PSn00bSDK to output messages when + * building in debug mode. */ -#ifndef __ASSERT_H -#define __ASSERT_H +#pragma once #include <stdio.h> @@ -25,6 +24,9 @@ void _assert_abort(const char *file, int line, const char *expr); #define assert(expr) #define _sdk_log(fmt, ...) +#define _sdk_assert(expr, fmt, ...) +#define _sdk_validate_args_void(expr) +#define _sdk_validate_args(expr, ret) #else @@ -32,11 +34,27 @@ void _assert_abort(const char *file, int line, const char *expr); ((expr) ? ((void) 0) : _assert_abort(__FILE__, __LINE__, #expr)) #ifdef SDK_LIBRARY_NAME -#define _sdk_log(fmt, ...) printf(SDK_LIBRARY_NAME ": " fmt, ##__VA_ARGS__) +#define _sdk_log(fmt, ...) \ + printf(SDK_LIBRARY_NAME ": " fmt __VA_OPT__(,) __VA_ARGS__) #else -#define _sdk_log(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define _sdk_log(fmt, ...) \ + printf(fmt __VA_OPT__(,) __VA_ARGS__) #endif -#endif +#define _sdk_assert(expr, ret, fmt, ...) \ + if (!(expr)) { \ + _sdk_log(fmt, __VA_ARGS__); \ + return ret; \ + } +#define _sdk_validate_args_void(expr) \ + if (!(expr)) { \ + _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \ + return; \ + } +#define _sdk_validate_args(expr, ret) \ + if (!(expr)) { \ + _sdk_log("invalid args to %s() (%s)\n", __func__, #expr); \ + return ret; \ + } #endif diff --git a/libpsn00b/include/cassert b/libpsn00b/include/cassert new file mode 100644 index 0000000..0923486 --- /dev/null +++ b/libpsn00b/include/cassert @@ -0,0 +1,8 @@ +/* + * PSn00bSDK assert macro and internal logging + * (C) 2022-2023 spicyjpeg - MPL licensed + */ + +#pragma once + +#include <assert.h> diff --git a/libpsn00b/include/cctype b/libpsn00b/include/cctype new file mode 100644 index 0000000..b73ad34 --- /dev/null +++ b/libpsn00b/include/cctype @@ -0,0 +1,22 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +namespace std { +extern "C" { + +int isprint(int ch); +int isgraph(int ch); +int isspace(int ch); +int isblank(int ch); +int isalpha(int ch); +int isdigit(int ch); + +int tolower(int ch); +int toupper(int ch); + +} +} diff --git a/libpsn00b/include/cstdint b/libpsn00b/include/cstdint new file mode 100644 index 0000000..3b1bc4a --- /dev/null +++ b/libpsn00b/include/cstdint @@ -0,0 +1,34 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + * + * This is a replacement for the <cstdint> header included with GCC, which seems + * to be broken (at least in GCC 12.2.0) as it requires some macros to be set. + */ + +#pragma once + +#include <stdint.h> + +namespace std { + +#define _DEF_TYPE(bits, prefix) \ + using ::prefix##bits##_t; \ + using ::prefix##_fast##bits##_t; \ + using ::prefix##_least##bits##_t; + +_DEF_TYPE( 8, int) +_DEF_TYPE( 8, uint) +_DEF_TYPE(16, int) +_DEF_TYPE(16, uint) +_DEF_TYPE(32, int) +_DEF_TYPE(32, uint) + +#undef _DEF_TYPE + +using ::intmax_t; +using ::uintmax_t; +using ::intptr_t; +using ::uintptr_t; + +} diff --git a/libpsn00b/include/cstdio b/libpsn00b/include/cstdio new file mode 100644 index 0000000..800d1a2 --- /dev/null +++ b/libpsn00b/include/cstdio @@ -0,0 +1,32 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstdarg> + +namespace std { +extern "C" { + +/* String I/O API (provided by BIOS) */ + +int printf(const char *fmt, ...); +char *gets(char *str); +void puts(const char *str); +int getchar(void); +void putchar(int ch); + +/* String formatting API (built-in) */ + +int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap); +int vsprintf(char *string, const char *fmt, va_list ap); +int sprintf(char *string, const char *fmt, ...); +int snprintf(char *string, unsigned int size, const char *fmt, ...); + +int vsscanf(const char *str, const char *format, va_list ap); +int sscanf(const char *str, const char *fmt, ...); + +} +} diff --git a/libpsn00b/include/cstdlib b/libpsn00b/include/cstdlib new file mode 100644 index 0000000..4fa859d --- /dev/null +++ b/libpsn00b/include/cstdlib @@ -0,0 +1,59 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstddef> + +namespace std { + +/* Definitions */ + +static constexpr int RAND_MAX = 0x7fff; + +/* Structure definitions */ + +struct HeapUsage { + size_t total; // Total size of heap + stack + size_t heap; // Amount of memory currently reserved for heap + size_t stack; // Amount of memory currently reserved for stack + size_t alloc; // Amount of memory currently allocated + size_t alloc_max; // Maximum amount of memory ever allocated +}; + +/* API */ + +extern "C" { + +extern int __argc; +extern const char **__argv; + +void abort(void); + +int rand(void); +void srand(int seed); + +int abs(int j); +long labs(long i); + +long strtol(const char *str, char **str_end, int base); +long long strtoll(const char *str, char **str_end, int base); +//float strtof(const char *str, char **str_end); +//double strtod(const char *str, char **str_end); +//long double strtold(const char *str, char **str_end); + +void InitHeap(void *addr, size_t size); +void *sbrk(ptrdiff_t incr); + +void TrackHeapUsage(ptrdiff_t alloc_incr); +void GetHeapUsage(HeapUsage *usage); + +void *malloc(size_t size); +void *calloc(size_t num, size_t size); +void *realloc(void *ptr, size_t size); +void free(void *ptr); + +} +} diff --git a/libpsn00b/include/cstring b/libpsn00b/include/cstring new file mode 100644 index 0000000..1ce7246 --- /dev/null +++ b/libpsn00b/include/cstring @@ -0,0 +1,38 @@ +/* + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#include <cstddef> + +namespace std { +extern "C" { + +void *memset(void *dest, int ch, size_t count); +void *memcpy(void *dest, const void *src, size_t count); +void *memccpy(void *dest, const void *src, int ch, size_t count); +void *memmove(void *dest, const void *src, size_t count); +int memcmp(const void *lhs, const void *rhs, size_t count); +void *memchr(const void *ptr, int ch, size_t count); + +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t count); +int strcmp(const char *lhs, const char *rhs); +int strncmp(const char *lhs, const char *rhs, size_t count); +char *strchr(const char *str, int ch); +char *strrchr(const char *str, int ch); +char *strpbrk(const char *str, const char *breakset); +char *strstr(const char *str, const char *substr); + +size_t strlen(const char *str); +char *strcat(char *dest, const char *src); +char *strncat(char *dest, const char *src, size_t count); +char *strdup(const char *str); +char *strndup(const char *str, size_t count); + +char *strtok(char *str, const char *delim); + +} +} diff --git a/libpsn00b/include/ctype.h b/libpsn00b/include/ctype.h index 24ee9d9..2fe0a42 100644 --- a/libpsn00b/include/ctype.h +++ b/libpsn00b/include/ctype.h @@ -1,20 +1,24 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __CTYPE_H -#define __CTYPE_H +#pragma once #ifdef __cplusplus extern "C" { #endif -int tolower(int chr); -int toupper(int chr); +int isprint(int ch); +int isgraph(int ch); +int isspace(int ch); +int isblank(int ch); +int isalpha(int ch); +int isdigit(int ch); + +int tolower(int ch); +int toupper(int ch); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/dlfcn.h b/libpsn00b/include/dlfcn.h index 6192430..5e1e3b6 100644 --- a/libpsn00b/include/dlfcn.h +++ b/libpsn00b/include/dlfcn.h @@ -3,8 +3,7 @@ * (C) 2021-2022 spicyjpeg - MPL licensed */ -#ifndef __DLFCN_H -#define __DLFCN_H +#pragma once #include <stdint.h> #include <stddef.h> @@ -215,5 +214,3 @@ void *DL_GetDLLSymbol(const DLL *dll, const char *name); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/elf.h b/libpsn00b/include/elf.h index abfb3d5..b0ddf71 100644 --- a/libpsn00b/include/elf.h +++ b/libpsn00b/include/elf.h @@ -9,8 +9,7 @@ * converted to enums. */ -#ifndef __ELF_H -#define __ELF_H +#pragma once #include <stdint.h> @@ -121,18 +120,3 @@ typedef enum { STT_LOPROC = 13, /* Start of processor-specific */ STT_HIPROC = 15 /* End of processor-specific */ } Elf32_st_type; - -// If you need to add more constants, you may use the following Python snippet -// to quickly convert #defines to enums: -/* -import re -t = """<paste #defines here>""" -t = re.sub( - r"(0x[0-9a-f]+|0b[01]+|[0-9]+)", - lambda m: f"= {m.group(1)},", - t.replace("#define ", "\t").replace("#define\t", "\t") -) -print("typedef enum {\n" + t + "\n} NAME;") -*/ - -#endif diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h index 7015101..2152986 100644 --- a/libpsn00b/include/hwregs_c.h +++ b/libpsn00b/include/hwregs_c.h @@ -3,8 +3,7 @@ * (C) 2022 spicyjpeg - MPL licensed */ -#ifndef __HWREGS_C_H -#define __HWREGS_C_H +#pragma once #include <stdint.h> @@ -35,7 +34,7 @@ #define CD_DATA _MMIO8(IOBASE | 0x1802) #define CD_IRQ _MMIO8(IOBASE | 0x1803) -#define CD_REG(N) _MMIO8(IOBASE | 0x1800 + (N)) +#define CD_REG(N) _MMIO8((IOBASE | 0x1800) + (N)) /* SPU */ @@ -74,13 +73,13 @@ // These are not named SPU_VOICE_* to avoid name clashes with SPU attribute // flags defined in psxspu.h. -#define SPU_CH_VOL_L(N) _MMIO16(IOBASE | 0x1c00 + 16 * (N)) -#define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N)) -#define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N)) -#define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N)) -#define SPU_CH_ADSR1(N) _MMIO16(IOBASE | 0x1c08 + 16 * (N)) -#define SPU_CH_ADSR2(N) _MMIO16(IOBASE | 0x1c0a + 16 * (N)) -#define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N)) +#define SPU_CH_VOL_L(N) _MMIO16((IOBASE | 0x1c00) + (16 * (N))) +#define SPU_CH_VOL_R(N) _MMIO16((IOBASE | 0x1c02) + (16 * (N))) +#define SPU_CH_FREQ(N) _MMIO16((IOBASE | 0x1c04) + (16 * (N))) +#define SPU_CH_ADDR(N) _MMIO16((IOBASE | 0x1c06) + (16 * (N))) +#define SPU_CH_ADSR1(N) _MMIO16((IOBASE | 0x1c08) + (16 * (N))) +#define SPU_CH_ADSR2(N) _MMIO16((IOBASE | 0x1c0a) + (16 * (N))) +#define SPU_CH_LOOP_ADDR(N) _MMIO16((IOBASE | 0x1c0e) + (16 * (N))) /* MDEC */ @@ -92,11 +91,11 @@ // IMPORTANT: even though SIO_DATA is a 32-bit register, it should only be // accessed as 8-bit. Reading it as 16 or 32-bit works fine on real hardware, // but leads to problems in some emulators. -#define SIO_DATA(N) _MMIO8 (IOBASE | 0x1040 + 16 * (N)) -#define SIO_STAT(N) _MMIO16(IOBASE | 0x1044 + 16 * (N)) -#define SIO_MODE(N) _MMIO16(IOBASE | 0x1048 + 16 * (N)) -#define SIO_CTRL(N) _MMIO16(IOBASE | 0x104a + 16 * (N)) -#define SIO_BAUD(N) _MMIO16(IOBASE | 0x104e + 16 * (N)) +#define SIO_DATA(N) _MMIO8 ((IOBASE | 0x1040) + (16 * (N))) +#define SIO_STAT(N) _MMIO16((IOBASE | 0x1044) + (16 * (N))) +#define SIO_MODE(N) _MMIO16((IOBASE | 0x1048) + (16 * (N))) +#define SIO_CTRL(N) _MMIO16((IOBASE | 0x104a) + (16 * (N))) +#define SIO_BAUD(N) _MMIO16((IOBASE | 0x104e) + (16 * (N))) /* IRQ controller */ @@ -108,15 +107,15 @@ #define DMA_DPCR _MMIO32(IOBASE | 0x10f0) #define DMA_DICR _MMIO32(IOBASE | 0x10f4) -#define DMA_MADR(N) _MMIO32(IOBASE | 0x1080 + 16 * (N)) -#define DMA_BCR(N) _MMIO32(IOBASE | 0x1084 + 16 * (N)) -#define DMA_CHCR(N) _MMIO32(IOBASE | 0x1088 + 16 * (N)) +#define DMA_MADR(N) _MMIO32((IOBASE | 0x1080) + (16 * (N))) +#define DMA_BCR(N) _MMIO32((IOBASE | 0x1084) + (16 * (N))) +#define DMA_CHCR(N) _MMIO32((IOBASE | 0x1088) + (16 * (N))) /* Timers */ -#define TIMER_VALUE(N) _MMIO32(IOBASE | 0x1100 + 16 * (N)) -#define TIMER_CTRL(N) _MMIO32(IOBASE | 0x1104 + 16 * (N)) -#define TIMER_RELOAD(N) _MMIO32(IOBASE | 0x1108 + 16 * (N)) +#define TIMER_VALUE(N) _MMIO32((IOBASE | 0x1100) + (16 * (N))) +#define TIMER_CTRL(N) _MMIO32((IOBASE | 0x1104) + (16 * (N))) +#define TIMER_RELOAD(N) _MMIO32((IOBASE | 0x1108) + (16 * (N))) /* Memory/bus control */ @@ -130,5 +129,3 @@ #define BUS_EXP2_CFG _MMIO32(IOBASE | 0x101c) #define BUS_COM_DELAY _MMIO32(IOBASE | 0x1020) #define BUS_RAM_SIZE _MMIO32(IOBASE | 0x1060) - -#endif diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h index 5facc1c..cb550b9 100644 --- a/libpsn00b/include/inline_c.h +++ b/libpsn00b/include/inline_c.h @@ -16,8 +16,7 @@ * compiled object files. */ -#ifndef _INLINE_C_H -#define _INLINE_C_H +#pragma once /* GTE load macros */ @@ -1612,5 +1611,3 @@ : "g"( r0 ) ) #define gte_mvmva_b(sf, mx, v, cv, lm) gte_mvmva_core_b( 0x0400012 | \ ((sf)<<19) | ((mx)<<17) | ((v)<<15) | ((cv)<<13) | ((lm)<<10) ) - -#endif // _INLINE_C_H diff --git a/libpsn00b/include/ioctl.h b/libpsn00b/include/ioctl.h deleted file mode 100644 index 5c56422..0000000 --- a/libpsn00b/include/ioctl.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _IOCTL_H -#define _IOCTL_H - -#ifndef NULL -#define NULL 0 -#endif - -#ifndef EOF -#define EOF -1 -#endif - -// General -#define FIONBLOCK (('f'<<8)|1) -#define FIOCSCAN (('f'<<8)|2) - -// disk -#define DIO_FORMAT (('d'<<8)|1) - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/lzconfig.h b/libpsn00b/include/lzconfig.h deleted file mode 100644 index cb8a830..0000000 --- a/libpsn00b/include/lzconfig.h +++ /dev/null @@ -1,68 +0,0 @@ -/*! \file lzconfig.h - * \brief Library configuration header - * \details Define settings will only take effect when you recompile the library. - */ - -#ifndef _LZP_CONFIG_H -#define _LZP_CONFIG_H - - -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - - -/* Set to TRUE to compile without data compression routines useful if you - * plan to use this library on a program that does not require said routines - * especially on a platform with limited memory (such as the PlayStation). - * - * This define will rule out lzCompress(), lzSetHashSizes() and - * lzResetHashSizes() functions and their associated functions. - */ -#define LZP_NO_COMPRESS TRUE - - -/* Set to TRUE to make default compression table sizes to maximum and works best - * when compressing large amounts of data. LZP_USE_MALLOC must be set to TRUE to - * prevent stack overflow errors. - * - * Do not enable this if you plan to compile for a platform with limited memory - * otherwise, the library will consume all memory and crash the system. - * - * This define only affects lzCompress(). - */ -#define LZP_MAX_COMPRESS FALSE - - -/* Uncomment to make the library use malloc() instead of array initializers to - * allocate hash tables. Enabling this is a must if you plan to use large hash - * and window table sizes. - */ -#define LZP_USE_MALLOC FALSE - - -/* Hash table sizes (in power-of-two multiple units) - * - * These define only affect lzCompress(). - */ -#if LZP_MAX_COMPRESS == TRUE - -// Minimal defaults -#define LZP_WINDOW_SIZE 17 -#define LZP_HASH1_SIZE 8 -#define LZP_HASH2_SIZE 10 - -#else - -// Maximum defaults -#define LZP_WINDOW_SIZE 17 -#define LZP_HASH1_SIZE 22 -#define LZP_HASH2_SIZE 24 - -#endif - - -#endif // _LZP_CONFIG_H diff --git a/libpsn00b/include/lzp/lzp.h b/libpsn00b/include/lzp/lzp.h index 456de02..1aeea30 100644 --- a/libpsn00b/include/lzp/lzp.h +++ b/libpsn00b/include/lzp/lzp.h @@ -1,20 +1,29 @@ -/*! \file lzp.h - * \brief Main library header +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed */ -/*! \mainpage - * \version 0.20b - * \author John Wilbert 'Lameguy64' Villamor +/** + * @file lzp.h + * @brief Utility library for file bundling and compression * - * \section creditsSection Credits - * - LZ77 data compression/decompression routines based from Ilya Muravyov's - * crush.cpp released under public domain. Refined and ported to C by Lameguy64. - * - CRC calculation routines based from Lammert Bies' lib_crc routines. + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). * + * This header provides the LZ77 compression API and functions to parse and + * decompress .LZP archives after they have been loaded into memory. + * + * @section creditsSection Credits + * - LZ77 data compression/decompression routines based from Ilya Muravyov's + * crush.cpp released under public domain. Refined and ported to C by + * Lameguy64. + * - CRC calculation routines based from Lammert Bies' lib_crc routines. */ -#ifndef _LZPACK_H -#define _LZPACK_H +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum); #ifdef __cplusplus } #endif - - -#endif // _LZPACK_H diff --git a/libpsn00b/include/lzp/lzqlp.h b/libpsn00b/include/lzp/lzqlp.h index 32ce0d7..127f263 100644 --- a/libpsn00b/include/lzp/lzqlp.h +++ b/libpsn00b/include/lzp/lzqlp.h @@ -1,5 +1,23 @@ -#ifndef _QLP_H -#define _QLP_H +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed + */ + +/** + * @file lzqlp.h + * @brief Utility library for file bundling + * + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). + * + * This header provides functions to parse .QLP archives and retrieve pointers + * to their contents after they have been loaded into memory. + */ + +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -23,9 +41,17 @@ typedef struct { uint32_t offs; } QLP_FILE; + +// Function prototypes +#ifdef __cplusplus +extern "C" { +#endif + int qlpFileCount(const QLP_HEAD* qlpfile); const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile); const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile); int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile); -#endif // _QLP_H
\ No newline at end of file +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/include/malloc.h b/libpsn00b/include/malloc.h deleted file mode 100644 index 75c3711..0000000 --- a/libpsn00b/include/malloc.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _MALLOC_H -#define _MALLOC_H - -#warning "<malloc.h> is deprecated, include <stdlib.h> instead" - -#include <stdlib.h> - -#endif // _MALLOC_H
\ No newline at end of file diff --git a/libpsn00b/include/psxapi.h b/libpsn00b/include/psxapi.h index 7353ed2..35ee040 100644 --- a/libpsn00b/include/psxapi.h +++ b/libpsn00b/include/psxapi.h @@ -1,10 +1,21 @@ /* * PSn00bSDK kernel API library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXAPI_H -#define __PSXAPI_H +/** + * @file psxapi.h + * @brief Kernel API library header + * + * @details This header provides access to most of the APIs made available by + * the system's BIOS, including basic file I/O, TTY output, controller and + * memory card drivers, threads, events as well as kernel memory allocation. + * + * For more information and up-to-date documentation on kernel APIs, see: + * https://psx-spx.consoledev.net/kernelbios/ + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -12,13 +23,38 @@ /* Definitions */ -#define DescHW 0xf0000000 -#define DescSW 0xf4000000 - -#define HwCARD (DescHW|0x11) -#define HwCARD_1 (DescHW|0x12) -#define HwCARD_0 (DescHW|0x13) -#define SwCARD (DescHW|0x02) +// TODO: these desperately need to be cleaned up + +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +#define DescMask 0xff000000 // Event descriptor mask +#define DescTH DescMask +#define DescHW 0xf0000000 // Hardware event (IRQ) +#define DescEV 0xf1000000 // Event event +#define DescRC 0xf2000000 // Root counter event +#define DescUEV 0xf3000000 // User event +#define DescSW 0xf4000000 // BIOS event + +#define HwVBLANK (DescHW|0x01) // VBlank +#define HwGPU (DescHW|0x02) // GPU +#define HwCdRom (DescHW|0x03) // CDROM +#define HwDMAC (DescHW|0x04) // DMA +#define HwRTC0 (DescHW|0x05) // Timer 0 +#define HwRTC1 (DescHW|0x06) // Timer 1 +#define HwRTC2 (DescHW|0x07) // Timer 2 +#define HwCNTL (DescHW|0x08) // Controller +#define HwSPU (DescHW|0x09) // SPU +#define HwPIO (DescHW|0x0a) // PIO & lightgun +#define HwSIO (DescHW|0x0b) // Serial + +#define HwCPU (DescHW|0x10) // Processor exception +#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions) +#define HwCard_0 (DescHW|0x12) +#define HwCard_1 (DescHW|0x13) +#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions) +#define SwMATH (DescSW|0x02) #define EvSpIOE 0x0004 #define EvSpERROR 0x8000 @@ -135,8 +171,6 @@ struct JMP_BUF { uint32_t gp; }; -// Not recommended to use these functions to install IRQ handlers - typedef struct { uint32_t *next; uint32_t *func2; @@ -158,7 +192,8 @@ typedef struct { #define FastExitCriticalSection() \ (IRQ_MASK = __saved_irq_mask) -/*#define FastEnterCriticalSection() { \ +#if 0 +#define FastEnterCriticalSection() { \ uint32_t r0, r1; \ __asm__ volatile( \ "mfc0 %0, $12;" \ @@ -179,9 +214,10 @@ typedef struct { "nop;" \ : "=r"(r0) :: \ ); \ -}*/ +} +#endif -/* API */ +/* BIOS API */ #ifdef __cplusplus extern "C" { @@ -199,23 +235,28 @@ int DisableEvent(int event); void DeliverEvent(uint32_t cl, uint32_t spec); void UnDeliverEvent(uint32_t cl, uint32_t spec); -int open(const char *name, int mode); +int open(const char *path, int mode); int close(int fd); -int seek(int fd, uint32_t offset, int mode); -int read(int fd, uint8_t *buff, size_t len); -int write(int fd, const uint8_t *buff, size_t len); +int lseek(int fd, uint32_t offset, int mode); +int read(int fd, void *buff, size_t len); +int write(int fd, const void *buff, size_t len); +int getc(int fd); +int putc(int ch, int fd); int ioctl(int fd, int cmd, int arg); +int isatty(int fd); struct DIRENTRY *firstfile(const char *wildcard, struct DIRENTRY *entry); struct DIRENTRY *nextfile(struct DIRENTRY *entry); -int erase(const char *name); -int chdir(const char *path); +int erase(const char *path); +int undelete(const char *path); +int cd(const char *path); -//#define cd(p) chdir(p) +int _get_errno(void); +int _get_error(int fd); -int AddDev(DCB *dcb); -int DelDev(const char *name); -void ListDev(void); -void AddDummyTty(void); +int AddDrv(DCB *dcb); +int DelDrv(const char *name); +void ListDrv(void); +void add_nullcon_driver(void); int EnterCriticalSection(void); void ExitCriticalSection(void); @@ -254,30 +295,33 @@ int ResetRCnt(int spec); void ChangeClearPAD(int mode); void ChangeClearRCnt(int t, int m); -uint32_t OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp); -int CloseTh(uint32_t thread); -int ChangeTh(uint32_t thread); +int OpenTh(uint32_t (*func)(), uint32_t sp, uint32_t gp); +int CloseTh(int thread); +int ChangeTh(int thread); -int Exec(struct EXEC *exec, int argc, char **argv); +int Exec(struct EXEC *exec, int argc, const char **argv); +int LoadExec(const char *path, int argc, const char **argv); void FlushCache(void); void b_setjmp(struct JMP_BUF *buf); void b_longjmp(const struct JMP_BUF *buf, int param); -void SetDefaultExitFromException(void); -void SetCustomExitFromException(const struct JMP_BUF *buf); +void ResetEntryInt(void); +void HookEntryInt(const struct JMP_BUF *buf); void ReturnFromException(void); +int SetConf(int evcb, int tcb, uint32_t sp); +void GetConf(int *evcb, int *tcb, uint32_t *sp); +void SetMem(int size); + int GetSystemInfo(int index); void *GetB0Table(void); void *GetC0Table(void); -void *_kernel_malloc(int size); -void _kernel_free(void *ptr); +void *alloc_kernel_memory(int size); +void free_kernel_memory(void *ptr); void _boot(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h index 503bc83..fc9c391 100644 --- a/libpsn00b/include/psxcd.h +++ b/libpsn00b/include/psxcd.h @@ -21,8 +21,7 @@ * library extension is considered for future development. */ -#ifndef __PSXCD_H -#define __PSXCD_H +#pragma once #include <stdint.h> @@ -811,6 +810,48 @@ int CdMode(void); int CdStatus(void); /** + * @brief Returns the CD-ROM controller's region code. + * + * @details Reads region information from the drive using a CdlTest command. + * This can be used to reliably determine the system's region without having to + * resort to workarounds like probing the BIOS ROM. + * + * This function may return incorrect results and trigger error callbacks on + * emulators or consoles equipped with CD-ROM drive emulation devices such as + * the PSIO. It is not affected by modchips. + * + * @return Region code or 0 if the region cannot be determined + */ +CdlRegionCode CdGetRegion(void); + +/** + * @brief Attempts to disable the CD-ROM controller's region check. + * + * @details Sends undocumented commands to the drive in an attempt to disable + * the region string check, in order to allow reading data from non-PS1 discs + * as well as CD-Rs without needing a modchip. As unlocking commands are region + * specific, the drive's region must be obtained beforehand using CdGetRegion() + * and passed to this function. The unlock persists even if the lid is opened, + * but not if a CdlReset command is issued. + * + * Unlocking is only supported on US, European and Net Yaroze consoles (not on + * Japanese models, devkits and most emulators). This function will return 1 + * without doing anything if CdlRegionDebug is passed as region, as debug + * consoles can already read unlicensed discs. + * + * NOTE: if any callbacks were set using CdReadyCallback() or CdSyncCallback() + * prior to calling CdUnlock(), they will be called with an error code as part + * of the unlocking sequence, even if the unlock was successful. It is thus + * recommended to call this function before setting any callbacks. + * + * @param region + * @return 1 if the drive was successfully unlocked, 0 otherwise + * + * @see CdGetRegion() + */ +int CdUnlock(CdlRegionCode region); + +/** * @brief Retrieves the disc's table of contents. * * @details Retrieves the track entries from a CD's table of contents (TOC). The @@ -832,21 +873,6 @@ int CdStatus(void); int CdGetToc(CdlLOC *toc); /** - * @brief Returns the CD-ROM controller's region code. - * - * @details Attempts to fetch region information from the drive using a CdlTest - * command. This can be used to reliably determine the system's region without - * having to resort to workarounds like probing the BIOS ROM. - * - * This function may return incorrect results on emulators or consoles equipped - * with CD-ROM drive emulation devices such as the PSIO. It is not affected by - * modchips. - * - * @return Region code or 0 if the region cannot be determined - */ -CdlRegionCode CdGetRegion(void); - -/** * @brief Sets the CD-ROM volume mixing matrix. * * @details Sets the volume levels of the CD-ROM drive's audio output (used for @@ -1046,5 +1072,3 @@ int CdLoadSession(int session); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h index ae4611e..8dd1dd5 100644 --- a/libpsn00b/include/psxetc.h +++ b/libpsn00b/include/psxetc.h @@ -1,6 +1,6 @@ /* * PSn00bSDK interrupt management library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ /** @@ -13,8 +13,7 @@ * separate header). */ -#ifndef __PSXETC_H -#define __PSXETC_H +#pragma once /* IRQ and DMA channel definitions */ @@ -68,7 +67,7 @@ extern "C" { * | ID | Channel | Used by | * | --: | :--------------- | :-------------------------------------- | * | 0 | IRQ_VBLANK | psxgpu (use VSyncCallback() instead) | - * | 1 | IRQ_GPU | | + * | 1 | IRQ_GPU | psxgpu (use DrawSyncCallback() instead) | * | 2 | IRQ_CD | psxcd (use CdReadyCallback() instead) | * | 3 | IRQ_DMA | psxetc (use DMACallback() instead) | * | 4 | IRQ_TIMER0 | | @@ -156,19 +155,50 @@ void *DMACallback(DMA_Channel dma, void (*func)(void)); void *GetDMACallback(DMA_Channel dma); /** - * @brief Initializes the interrupt dispatcher. + * @brief Enables, disables or sets the priority of a DMA channel. + * + * @details Enables the specified DMA channel and configures its priority (if + * priority >= 0) or disables it (if priority = -1). The priority value must be + * in 0-7 range, with 0 being the highest priority and 7 the lowest. + * + * All channels are disabled upon calling ResetCallback(); most libraries will + * re-enable them as needed. By default the priority is set to 3 for all + * channels. + * + * @param dma + * @param priority Priority in 0-7 range or -1 to disable the channel + * @return Previously set priority in 0-7 range, -1 if the channel was disabled + */ +int SetDMAPriority(DMA_Channel dma, int priority); + +/** + * @brief Gets the priority of a DMA channel. + * + * @details Returns the currently set priority value for the specified DMA + * channel in 0-7 range, with 0 being the highest priority and 7 the lowest. + * Returns -1 if the channel is not enabled. + * + * @param dma + * @return Priority in 0-7 range, -1 if the channel is disabled + * + * @see SetDMAPriority() + */ +int GetDMAPriority(DMA_Channel dma); + +/** + * @brief Initializes the interrupt dispatcher and DMA controller. * * @details Sets up the interrupt handling system, hooks the BIOS to dispatch - * interrupts to the library and clears all registered callbacks. This function - * must be called once at the beginning of the program, prior to registering - * any IRQ or DMA callbacks. + * interrupts to the library, clears all registered callbacks and disables all + * DMA channels. This function must be called once at the beginning of the + * program, prior to registering any IRQ or DMA callbacks. * * ResetCallback() is called by psxgpu's ResetGraph(), so invoking it manually * is usually not required. Calling ResetCallback() after ResetGraph() will * actually result in improper initialization, as ResetGraph() registers * several callbacks used internally by psxgpu. * - * @return 0 or -1 if the was already initialized + * @return 0 or -1 if the dispatcher was already initialized */ int ResetCallback(void); @@ -196,6 +226,11 @@ void RestartCallback(void); * Note that interrupts are (obviously) disabled until RestartCallback() is * called. * + * WARNING: any ongoing background processing or DMA transfer must be stopped + * before calling StopCallback(), otherwise crashes may occur. This includes + * flushing psxgpu's command queue using DrawSync(), stopping CD-ROM reading + * and calling StopPAD() to disable the BIOS controller driver if used. + * * @see RestartCallback() */ void StopCallback(void); @@ -203,5 +238,3 @@ void StopCallback(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 26e560f..d7f1ad5 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -1,10 +1,26 @@ /* * PSn00bSDK GPU library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXGPU_H -#define __PSXGPU_H +/** + * @file psxgpu.h + * @brief GPU library header + * + * @details This library provides access to the PS1's GPU through a fully + * asynchronous command queue, which allows GPU commands to be batched and sent + * efficiently in the background without stalling the CPU. Helper structures + * and macros to initialize, generate and link GPU display lists in memory are + * also provided, in addition to support for asynchronous VRAM data transfers + * and a debug font API that can be used to easily draw text overlays for + * debugging purposes. + * + * This library is for the most part a drop-in replacement for the official + * SDK's GPU library and is only missing a handful of functions, mainly related + * to Kanji debug fonts and command queue pausing. + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -28,6 +44,11 @@ typedef enum _GPU_VideoMode { MODE_PAL = 1 } GPU_VideoMode; +typedef enum _GPU_DrawOpType { + DRAWOP_TYPE_DMA = 1, + DRAWOP_TYPE_GPU_IRQ = 2 +} GPU_DrawOpType; + /* Structure macros */ #define setVector(v, _x, _y, _z) \ @@ -83,7 +104,7 @@ typedef enum _GPU_VideoMode { (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ (p)->u2 = (_u2), (p)->v2 = (_v2) - + #define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \ (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ @@ -101,9 +122,12 @@ typedef enum _GPU_VideoMode { #define setlen(p, _len) (((P_TAG *) (p))->len = (uint8_t) (_len)) #define setaddr(p, _addr) (((P_TAG *) (p))->addr = (uint32_t) (_addr)) #define setcode(p, _code) (((P_TAG *) (p))->code = (uint8_t) (_code)) +#define setcode_T(p, _code) (((P_TAG_T *) (p))->code = (uint8_t) (_code)) + #define getlen(p) (((P_TAG *) (p))->len) #define getaddr(p) (((P_TAG *) (p))->addr) #define getcode(p) (((P_TAG *) (p))->code) +#define getcode_T(p) (((P_TAG_T *) (p))->code) #define nextPrim(p) (void *) (0x80000000 | (((P_TAG *) (p))->addr)) #define isendprim(p) ((((P_TAG *) (p))->addr) == 0xffffff) @@ -114,16 +138,20 @@ typedef enum _GPU_VideoMode { #define setSemiTrans(p, abe) \ ((abe) ? (getcode(p) |= 2) : (getcode(p) &= ~2)) +#define setSemiTrans_T(p, abe) \ + ((abe) ? (getcode_T(p) |= 2) : (getcode_T(p) &= ~2)) #define setShadeTex(p, tge) \ ((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1)) +#define setShadeTex_T(p, tge) \ + ((tge) ? (getcode_T(p) |= 1) : (getcode_T(p) &= ~1)) #define getTPage(tp, abr, x, y) ( \ - (((x) / 64) & 15) | \ - ((((y) / 256) & 1) << 4) | \ - (((abr) & 3) << 5) | \ - (((tp) & 3) << 7) | \ - ((((y) / 512) & 1) << 11) \ + (((x) & 0x3c0) >> 6) | \ + (((y) & 0x100) >> 4) | \ + (((y) & 0x200) << 2) | \ + (((abr) & 3) << 5) | \ + (((tp) & 3) << 7) \ ) #define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f)) @@ -147,59 +175,109 @@ typedef enum _GPU_VideoMode { #define setTile(p) setlen(p, 3), setcode(p, 0x60) #define setLineF2(p) setlen(p, 3), setcode(p, 0x40) #define setLineG2(p) setlen(p, 4), setcode(p, 0x50) -#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), \ - (p)->pad = 0x55555555 -#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), \ - (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0 -#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), \ - (p)->pad = 0x55555555 -#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), \ - (p)->pad = 0x55555555, (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 -#define setFill(p) setlen(p, 3), setcode(p, 0x02) -#define setVram2Vram(p) setlen(p, 8), setcode(p, 0x80), \ +#define setLineF3(p) setlen(p, 5), setcode(p, 0x48), (p)->pad = 0x55555555 +#define setLineG3(p) setlen(p, 7), setcode(p, 0x58), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0 +#define setLineF4(p) setlen(p, 6), setcode(p, 0x4c), (p)->pad = 0x55555555 +#define setLineG4(p) setlen(p, 9), setcode(p, 0x5c), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 +#define setFill(p) setlen(p, 3), setcode(p, 0x02) +#define setBlit(p) setlen(p, 8), setcode(p, 0x80), \ (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0 -#define setDrawTPage(p, dfe, dtd, tpage) \ - setlen(p, 1), \ +#define setPolyF3_T(p) setcode_T(p, 0x20) +#define setPolyFT3_T(p) setcode_T(p, 0x24) +#define setPolyG3_T(p) setcode_T(p, 0x30) +#define setPolyGT3_T(p) setcode_T(p, 0x34) +#define setPolyF4_T(p) setcode_T(p, 0x28) +#define setPolyFT4_T(p) setcode_T(p, 0x2c) +#define setPolyG4_T(p) setcode_T(p, 0x38) +#define setPolyGT4_T(p) setcode_T(p, 0x3c) +#define setSprt8_T(p) setcode_T(p, 0x74) +#define setSprt16_T(p) setcode_T(p, 0x7c) +#define setSprt_T(p) setcode_T(p, 0x64) +#define setTile1_T(p) setcode_T(p, 0x68) +#define setTile8_T(p) setcode_T(p, 0x70) +#define setTile16_T(p) setcode_T(p, 0x78) +#define setTile_T(p) setcode_T(p, 0x60) +#define setLineF2_T(p) setcode_T(p, 0x40) +#define setLineG2_T(p) setcode_T(p, 0x50) +#define setLineF3_T(p) setcode_T(p, 0x48), (p)->pad = 0x55555555 +#define setLineG3_T(p) setcode_T(p, 0x58), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0 +#define setLineF4_T(p) setcode_T(p, 0x4c), (p)->pad = 0x55555555 +#define setLineG4_T(p) setcode_T(p, 0x5c), (p)->pad = 0x55555555, \ + (p)->p1 = 0, (p)->p2 = 0, (p)->p3 = 0 +#define setFill_T(p) setcode_T(p, 0x02) +#define setBlit_T(p) setcode_T(p, 0x80), \ + (p)->pad[0] = 0, (p)->pad[1] = 0, (p)->pad[2] = 0, (p)->pad[3] = 0 + +#define setDrawTPage_T(p, dfe, dtd, tpage) \ (p)->code[0] = (0xe1000000 | \ (tpage) | \ - ((dtd) << 9) | \ - ((dfe) << 10) \ + (((dtd) & 1) << 9) | \ + (((dfe) & 1) << 10) \ ) +#define setDrawTPage(p, dfe, dtd, tpage) \ + setlen(p, 1), setDrawTPage_T(p, dfe, dtd, tpage) -#define setDrawOffset(p, _x, _y) \ - setlen(p, 1), \ - (p)->code[0] = (0xe5000000 | \ - ((_x) % 1024) | \ - (((_y) % 1024) << 11) \ +#define setTexWindow_T(p, r) \ + (p)->code[0] = (0xe2000000 | \ + ((r)->w & 0x1f) | \ + (((r)->h & 0x1f) << 5) | \ + (((r)->x & 0x1f) << 10) | \ + (((r)->y & 0x1f) << 15) \ ) +#define setTexWindow(p, r) \ + setlen(p, 1), setTexWindow_T(p, r) -#define setDrawMask(p, sb, mt) \ - setlen(p, 1), \ - (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1)) - -#define setDrawArea(p, r) \ - setlen(p, 2), \ +#define setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) \ (p)->code[0] = (0xe3000000 | \ - ((r)->x % 1024) | \ - (((r)->y % 1024) << 10) \ + ((_x0) & 0x3ff) | \ + (((_y0) & 0x3ff) << 10) \ ), \ (p)->code[1] = (0xe4000000 | \ - (((r)->x + (r)->w - 1) % 1024) | \ - ((((r)->y + (r)->h - 1) % 1024) << 10) \ + ((_x1) & 0x3ff) | \ + (((_y1) & 0x3ff) << 10) \ ) +#define setDrawAreaXY(p, _x0, _y0, _x1, _y1) \ + setlen(p, 2), setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) + +#define setDrawArea_T(p, r) \ + setDrawAreaXY_T(p, \ + (r)->x, \ + (r)->y, \ + (r)->x + (r)->w - 1, \ + (r)->y + (r)->h - 1 \ + ) +#define setDrawArea(p, r) \ + setlen(p, 2), setDrawArea_T(p, r) -#define setTexWindow(p, r) \ - setlen(p, 1), \ - (p)->code[0] = (0xe2000000 | \ - ((r)->w % 32) | \ - (((r)->h % 32) << 5) | \ - (((r)->x % 32) << 10) | \ - (((r)->y % 32) << 15) \ +#define setDrawOffset_T(p, _x, _y) \ + (p)->code[0] = (0xe5000000 | \ + ((_x) & 0x7ff) | \ + (((_y) & 0x7ff) << 11) \ ) +#define setDrawOffset(p, _x, _y) \ + setlen(p, 1), setDrawOffset_T(p, _x, _y) + +#define setDrawStp_T(p, pbw, mt) \ + (p)->code[0] = (0xe6000000 | (pbw) | ((mt) << 1)) +#define setDrawStp(p, pbw, mt) \ + setlen(p, 1), setDrawStp_T(p, pbw, mt) + +#define setDrawIRQ_T(p) \ + (p)->code[0] = 0x1f000000 +#define setDrawIRQ(p) \ + setlen(p, 1), setDrawIRQ_T(p) /* Primitive structure definitions */ +typedef struct _P_TAG_T { + uint32_t color:24; + uint32_t code:8; +} P_TAG_T; + typedef struct _P_TAG { uint32_t addr:24; uint32_t len:8; @@ -212,25 +290,31 @@ typedef struct _P_COLOR { uint32_t pad:8; } P_COLOR; -typedef struct _POLY_F3 { - uint32_t tag; +// These macros are used to define two variants of each primitive, a regular one +// and a "tagless" one (_T suffix) without the OT/display list header. +#define _DEF_PRIM(name, ...) \ + typedef struct _##name##_T { __VA_ARGS__ } name##_T; \ + typedef struct _##name { uint32_t tag; __VA_ARGS__ } name; +#define _DEF_ALIAS(name, target) \ + typedef struct _##target##_T name##_T; \ + typedef struct _##target name; + +_DEF_PRIM(POLY_F3, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; -} POLY_F3; +) -typedef struct _POLY_F4 { - uint32_t tag; +_DEF_PRIM(POLY_F4, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; int16_t x3, y3; -} POLY_F4; +) -typedef struct _POLY_FT3 { - uint32_t tag; +_DEF_PRIM(POLY_FT3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -241,10 +325,9 @@ typedef struct _POLY_FT3 { int16_t x2, y2; uint8_t u2, v2; uint16_t pad; -} POLY_FT3; +) -typedef struct _POLY_FT4 { - uint32_t tag; +_DEF_PRIM(POLY_FT4, uint8_t r0, g0, b0, code; uint16_t x0, y0; uint8_t u0, v0; @@ -258,20 +341,18 @@ typedef struct _POLY_FT4 { int16_t x3, y3; uint8_t u3, v3; uint16_t pad1; -} POLY_FT4; +) -typedef struct _POLY_G3 { - uint32_t tag; +_DEF_PRIM(POLY_G3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, pad0; int16_t x1, y1; uint8_t r2, g2, b2, pad1; int16_t x2, y2; -} POLY_G3; +) -typedef struct _POLY_G4 { - uint32_t tag; +_DEF_PRIM(POLY_G4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, pad0; @@ -280,10 +361,9 @@ typedef struct _POLY_G4 { int16_t x2, y2; uint8_t r3, g3, b3, pad2; int16_t x3, y3; -} POLY_G4; +) -typedef struct _POLY_GT3 { - uint32_t tag; +_DEF_PRIM(POLY_GT3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -296,10 +376,9 @@ typedef struct _POLY_GT3 { int16_t x2, y2; uint8_t u2, v2; uint16_t pad2; -} POLY_GT3; +) -typedef struct _POLY_GT4 { - uint32_t tag; +_DEF_PRIM(POLY_GT4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; @@ -316,34 +395,30 @@ typedef struct _POLY_GT4 { int16_t x3, y3; uint8_t u3, v3; uint16_t pad4; -} POLY_GT4; +) -typedef struct _LINE_F2 { - uint32_t tag; +_DEF_PRIM(LINE_F2, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; -} LINE_F2; +) -typedef struct _LINE_G2 { - uint32_t tag; +_DEF_PRIM(LINE_G2, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; int16_t x1, y1; -} LINE_G2; +) -typedef struct _LINE_F3 { - uint32_t tag; +_DEF_PRIM(LINE_F3, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; uint32_t pad; -} LINE_F3; +) -typedef struct _LINE_G3 { - uint32_t tag; +_DEF_PRIM(LINE_G3, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; @@ -351,20 +426,18 @@ typedef struct _LINE_G3 { uint8_t r2, g2, b2, p2; int16_t x2, y2; uint32_t pad; -} LINE_G3; +) -typedef struct _LINE_F4 { - uint32_t tag; +_DEF_PRIM(LINE_F4, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t x1, y1; int16_t x2, y2; int16_t x3, y3; uint32_t pad; -} LINE_F4; +) -typedef struct _LINE_G4 { - uint32_t tag; +_DEF_PRIM(LINE_G4, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t r1, g1, b1, p1; @@ -374,88 +447,80 @@ typedef struct _LINE_G4 { uint8_t r3, g3, b3, p3; int16_t x3, y3; uint32_t pad; -} LINE_G4; +) -typedef struct _TILE { - uint32_t tag; +_DEF_PRIM(TILE, uint8_t r0, g0, b0, code; int16_t x0, y0; int16_t w, h; -} TILE; +) -struct _TILE_FIXED { - uint32_t tag; +_DEF_PRIM(TILE_1, uint8_t r0, g0, b0, code; int16_t x0, y0; -}; -typedef struct _TILE_FIXED TILE_1; -typedef struct _TILE_FIXED TILE_8; -typedef struct _TILE_FIXED TILE_16; +) +_DEF_ALIAS(TILE_8, TILE_1) +_DEF_ALIAS(TILE_16, TILE_1) -typedef struct _SPRT { - uint32_t tag; +_DEF_PRIM(SPRT, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; uint16_t clut; uint16_t w, h; -} SPRT; +) -struct _SPRT_FIXED { - uint32_t tag; +_DEF_PRIM(SPRT_1, uint8_t r0, g0, b0, code; int16_t x0, y0; uint8_t u0, v0; uint16_t clut; -}; -typedef struct _SPRT_FIXED SPRT_8; -typedef struct _SPRT_FIXED SPRT_16; - -typedef struct _DR_ENV { - uint32_t tag; - uint32_t code[8]; -} DR_ENV; - -typedef struct _DR_AREA { - uint32_t tag; - uint32_t code[2]; -} DR_AREA; - -typedef struct _DR_OFFSET { - uint32_t tag; - uint32_t code[1]; -} DR_OFFSET; - -typedef struct _DR_TWIN { - uint32_t tag; - uint32_t code[2]; -} DR_TWIN; - -typedef struct _DR_TPAGE { - uint32_t tag; - uint32_t code[1]; -} DR_TPAGE; - -typedef struct _DR_MASK { - uint32_t tag; - uint32_t code[1]; -} DR_MASK; +) +_DEF_ALIAS(SPRT_8, SPRT_1) +_DEF_ALIAS(SPRT_16, SPRT_1) -typedef struct _FILL { - uint32_t tag; +_DEF_PRIM(FILL, uint8_t r0, g0, b0, code; - uint16_t x0, y0; // Note: coordinates must be in 16 pixel steps + uint16_t x0, y0; uint16_t w, h; -} FILL; +) -typedef struct _VRAM2VRAM { - uint32_t tag; +_DEF_PRIM(DR_MOVE, uint8_t p0, p1, p2, code; uint16_t x0, y0; uint16_t x1, y1; uint16_t w, h; - uint32_t pad[4]; -} VRAM2VRAM; +) + +_DEF_PRIM(DR_AREA, + uint32_t code[2]; +) +_DEF_PRIM(DR_OFFSET, + uint32_t code[1]; +) +_DEF_PRIM(DR_TWIN, + uint32_t code[1]; +) +_DEF_PRIM(DR_TPAGE, + uint32_t code[1]; +) +_DEF_PRIM(DR_STP, + uint32_t code[1]; +) +_DEF_PRIM(DR_IRQ, + uint32_t code[1]; +) + +_DEF_PRIM(DR_ENV, + DR_TPAGE_T tpage; + DR_TWIN_T twin; + DR_AREA_T area; + DR_OFFSET_T offset; + FILL_T fill; +) + +#undef _DEF_PRIM +#undef _DEF_ALIAS /* Structure definitions */ @@ -478,13 +543,13 @@ typedef struct _DISPENV { typedef struct _DRAWENV { RECT clip; // Drawing area int16_t ofs[2]; // GPU draw offset (relative to draw area) - RECT tw; // Texture window (doesn't do anything atm) + RECT tw; // Texture window uint16_t tpage; // Initial tpage value uint8_t dtd; // Dither processing flag (simply OR'ed to tpage) uint8_t dfe; // Drawing to display area blocked/allowed (simply OR'ed to tpage) uint8_t isbg; // Clear draw area if non-zero uint8_t r0, g0, b0; // Draw area clear color (if isbg iz nonzero) - DR_ENV dr_env; // Draw mode packet area (used by PutDrawEnv) + DR_ENV dr_env; // GPU primitive cache area (used internally) } DRAWENV; typedef struct _TIM_IMAGE { @@ -521,31 +586,35 @@ void PutDrawEnv(DRAWENV *env); void PutDrawEnvFast(DRAWENV *env); int GetODE(void); +int IsIdleGPU(int timeout); int VSync(int mode); void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); -int EnqueueDrawOp( - void (*func)(uint32_t, uint32_t, uint32_t), - uint32_t arg1, - uint32_t arg2, - uint32_t arg3 -); +void SetDrawOpType(GPU_DrawOpType type); +int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3); int DrawSync(int mode); void *DrawSyncCallback(void (*func)(void)); int LoadImage(const RECT *rect, const uint32_t *data); int StoreImage(const RECT *rect, uint32_t *data); -//int MoveImage(const RECT *rect, int x, int y); +int MoveImage(const RECT *rect, int x, int y); void LoadImage2(const RECT *rect, const uint32_t *data); void StoreImage2(const RECT *rect, uint32_t *data); -//void MoveImage2(const RECT *rect, int x, int y); +void MoveImage2(const RECT *rect, int x, int y); void ClearOTagR(uint32_t *ot, size_t length); void ClearOTag(uint32_t *ot, size_t length); int DrawOTag(const uint32_t *ot); +int DrawOTagIRQ(const uint32_t *ot); int DrawOTagEnv(const uint32_t *ot, DRAWENV *env); +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env); void DrawOTag2(const uint32_t *ot); +void DrawOTagIRQ2(const uint32_t *ot); +int DrawBuffer(const uint32_t *buf, size_t length); +int DrawBufferIRQ(const uint32_t *buf, size_t length); +void DrawBuffer2(const uint32_t *buf, size_t length); +void DrawBufferIRQ2(const uint32_t *buf, size_t length); void DrawPrim(const uint32_t *pri); void AddPrim(uint32_t *ot, const void *pri); @@ -565,5 +634,3 @@ char *FntFlush(int id); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h index 91dfd6a..2200a55 100644 --- a/libpsn00b/include/psxgte.h +++ b/libpsn00b/include/psxgte.h @@ -14,8 +14,7 @@ * registers and issue commands to the GTE. */ -#ifndef __PSXGTE_H -#define __PSXGTE_H +#pragma once #include <stdint.h> @@ -259,5 +258,3 @@ void Square0(VECTOR *v0, VECTOR *v1); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxkernel.h b/libpsn00b/include/psxkernel.h deleted file mode 100644 index 0c55bcb..0000000 --- a/libpsn00b/include/psxkernel.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _PSXKERNEL_H -#define _PSXKERNEL_H - -// Event descriptors -#define DescMask 0xff000000 // Event descriptor mask -#define DescTH DescMask -#define DescHW 0xf0000000 // Hardware event (IRQ) -#define DescEV 0xf1000000 // Event event -#define DescRC 0xf2000000 // Root counter event -#define DescUEV 0xf3000000 // User event -#define DescSW 0xf4000000 // BIOS event - -// Hardware events -#define HwVBLANK (DescHW|0x01) // VBlank -#define HwGPU (DescHW|0x02) // GPU -#define HwCdRom (DescHW|0x03) // CDROM -#define HwDMAC (DescHW|0x04) // DMA -#define HwRTC0 (DescHW|0x05) // Timer 0 -#define HwRTC1 (DescHW|0x06) // Timer 1 -#define HwRTC2 (DescHW|0x07) // Timer 2 -#define HwCNTL (DescHW|0x08) // Controller -#define HwSPU (DescHW|0x09) // SPU -#define HwPIO (DescHW|0x0a) // PIO & lightgun -#define HwSIO (DescHW|0x0b) // Serial - -#define HwCPU (DescHW|0x10) // Processor exception -#define HwCARD (DescHW|0x11) // Memory card (lower level BIOS functions) -#define HwCard_0 (DescHW|0x12) // Memory card (unused) -#define HwCard_1 (DescHW|0x13) // Memory card (unused) -#define SwCARD (DescSW|0x01) // Memory card (higher level BIOS functions) -#define SwMATH (DescSW|0x02) // Libmath related apparently, unknown purpose - -#define RCntCNT0 (DescRC|0x00) // Root counter 0 (dot clock) -#define RCntCNT1 (DescRC|0x01) // Horizontal sync -#define RCntCNT2 (DescRC|0x02) // 1/8 of system clock -#define RCntCNT3 (DescRC|0x03) // Vertical blank - -#define RCntMdINTR 0x1000 // General interrupt -#define RCntMdNOINTR 0x2000 // New device -#define RCntMdSC 0x0001 // Counter becomes zero -#define RCntMdSP 0x0000 // Unknown purpose -#define RCntMdFR 0x0000 -#define RCntMdGATE 0x0010 // Command acknowledged - -#endif // _PSXKERNEL_H
\ No newline at end of file diff --git a/libpsn00b/include/psxpad.h b/libpsn00b/include/psxpad.h index 32f7f8a..09f28c4 100644 --- a/libpsn00b/include/psxpad.h +++ b/libpsn00b/include/psxpad.h @@ -11,8 +11,7 @@ * Reference: https://gist.github.com/scanlime/5042071 */ -#ifndef _PSXPAD_H -#define _PSXPAD_H +#pragma once #include <stdint.h> @@ -234,5 +233,3 @@ typedef struct __attribute__((packed)) _MemCardRequest { uint8_t checksum; // = lba_h ^ lba_l ^ data (CMD_WRITE only) uint8_t dummy2[3]; } MemCardRequest; - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index dc1d52c..f26e030 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -1,6 +1,6 @@ /* * PSn00bSDK MDEC library - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ /** @@ -17,11 +17,12 @@ * FMV playback is not part of this library per se, but can implemented using * the APIs defined here alongside some code to stream data from the CD drive. * - * Currently only version 1 and 2 .BS files are supported. + * Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI + * bitstreams are not supported, but no encoder is publicly available for those + * anyway. */ -#ifndef __PSXPRESS_H -#define __PSXPRESS_H +#pragma once #include <stdint.h> #include <stddef.h> @@ -34,28 +35,26 @@ typedef struct _DECDCTENV { int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point) } DECDCTENV; -// This is the "small" lookup table used by DecDCTvlc(). It can be copied to -// the scratchpad. +typedef struct _VLC_TableV2 { + uint16_t ac0[2]; + uint32_t ac2[8], ac3[64]; + uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; + uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; +} VLC_TableV2; + +typedef struct _VLC_TableV3 { + uint16_t ac0[2]; + uint32_t ac2[8], ac3[64]; + uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; + uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; + uint8_t dc[128], dc_len[9]; + uint8_t _reserved[3]; +} VLC_TableV3; + typedef struct _DECDCTTAB { - uint16_t lut0[2]; - uint32_t lut2[8]; - uint32_t lut3[64]; - uint16_t lut4[8]; - uint16_t lut5[8]; - uint16_t lut7[16]; - uint16_t lut8[32]; - uint16_t lut9[32]; - uint16_t lut10[32]; - uint16_t lut11[32]; - uint16_t lut12[32]; + uint32_t ac[8192], ac00[512]; } DECDCTTAB; -// This is the "large" table used by DecDCTvlc2(). -typedef struct _DECDCTTAB2 { - uint32_t lut[8192]; - uint32_t lut00[512]; -} DECDCTTAB2; - typedef enum _DECDCTMODE { DECDCT_MODE_24BPP = 1, DECDCT_MODE_16BPP = 0, @@ -66,8 +65,9 @@ typedef enum _DECDCTMODE { typedef struct _VLC_Context { const uint32_t *input; uint32_t window, next_window, remaining; - uint16_t quant_scale; int8_t is_v3, bit_offset, block_index, coeff_index; + uint16_t quant_scale; + int16_t last_y, last_cr, last_cb; } VLC_Context; // Despite what some docs claim, the "number of 32-byte blocks" and "always @@ -233,8 +233,9 @@ int DecDCToutSync(int mode); * frame) into a buffer that can be passed to DecDCTin(). This function uses a * small (<1 KB) lookup table combined with the GTE to accelerate the process; * performance is roughly on par with DecDCTvlcStart2() if the lookup table - * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTable(). The - * contents of the GTE's LZCR register, if any, will be destroyed. + * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTableV2() or + * DecDCTvlcCopyTableV3(). The contents of the GTE's LZCS and LZCR registers, + * if any, will be destroyed. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the @@ -244,8 +245,6 @@ int DecDCToutSync(int mode); * can be different). If max_size = 0, the entire frame will always be decoded * in one shot. * - * Only bitstream version 2 is currently supported. - * * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the * first time. Attempting to call this function with the GTE disabled will * result in a crash. @@ -256,7 +255,7 @@ int DecDCToutSync(int mode); * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure * - * @see DecDCTvlcContinue(), DecDCTvlcCopyTable() + * @see DecDCTvlcContinue(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); @@ -275,7 +274,8 @@ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint3 * context returned 0; in that case the context shall be discarded or reused to * decode another bitstream. * - * The contents of the GTE's LZCR register, if any, will be destroyed. + * The contents of the GTE's LZCS and LZCR registers, if any, will be + * destroyed. * * See DecDCTvlcStart() for more details. * @@ -309,7 +309,7 @@ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); * @param buf * @return 0, 1 if more data needs to be output or -1 in case of failure * - * @see DecDCTvlcSize(), DecDCTvlcCopyTable() + * @see DecDCTvlcSize(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); @@ -332,23 +332,60 @@ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); size_t DecDCTvlcSize(size_t size); /** - * @brief Moves the lookup table used by the .BS decompressor to the scratchpad - * region. + * @brief Copies the lookup tables used by the .BS decompressor (v1/v2) to the + * scratchpad region. * - * @details Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), - * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified - * address. A copy of this table is always present in main RAM, however this - * function can be used to copy it to the scratchpad region to boost - * decompression performance. + * @details Copies the lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this + * table is always present in main RAM, however this function can be used to + * copy it to the scratchpad region to boost decompression performance. + * + * This function copies a 676-byte table (VLC_TableV2 structure) containing + * only the data necessary for decoding version 1 and 2 bitstreams, to help + * save scratchpad space. If support for version 3 is required, + * DecDCTvlcCopyTableV3() can be used instead to copy the full 816-byte table. * * The address passed to this function is saved. Calls to DecDCTvlcStart(), * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table - * copied. Call DecDCTvlcCopyTable(0) to revert to using the library's internal - * table in main RAM. + * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to + * using the library's internal table in main RAM. + * + * WARNING: attempting to decode a version 3 .BS file or .STR frame after + * calling this function will result in undefined behavior and potentially a + * crash. To re-enable version 3 decoding, use DecDCTvlcCopyTableV3() to copy + * the full table to the scratchpad or revert to using the built-in table in + * main RAM. + * + * @param addr Pointer to free 676-byte area in scratchpad region or 0 to reset * - * @param addr Pointer to free area in scratchpad region or 0 to reset + * @see DecDCTvlcCopyTableV3() */ -void DecDCTvlcCopyTable(DECDCTTAB *addr); +void DecDCTvlcCopyTableV2(VLC_TableV2 *addr); + +/** + * @brief Copies the lookup tables used by the .BS decompressor (v1/v2/v3) to + * the scratchpad region. + * + * @details Copies the lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this + * table is always present in main RAM, however this function can be used to + * copy it to the scratchpad region to boost decompression performance. + * + * This function copies the full 816-byte table (VLC_TableV3 structure), + * including the data used to decode version 3 bitstreams. If support for + * version 3 is not required, DecDCTvlcCopyTableV2() can be used instead to + * save scratchpad space by only copying the first 676 bytes of the table. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart(), + * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table + * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to + * using the library's internal table in main RAM. + * + * @param addr Pointer to free 816-byte area in scratchpad region or 0 to reset + * + * @see DecDCTvlcCopyTableV2() + */ +void DecDCTvlcCopyTableV3(VLC_TableV3 *addr); /** * @brief Decompresses or begins decompressing a .BS file into MDEC codes @@ -360,8 +397,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr); * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. * Depending on the specific bitstream being decoded DecDCTvlcStart2() might be * slightly faster or slower than DecDCTvlcStart() with its lookup table copied - * to the scratchpad (see DecDCTvlcCopyTable()). DecDCTvlcStart() with the - * table in main RAM tends to be much slower. + * to the scratchpad (see DecDCTvlcCopyTableV2() and DecDCTvlcCopyTableV3()). + * DecDCTvlcStart() with the table in main RAM tends to be much slower. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the @@ -371,7 +408,8 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr); * buffer can be different). If max_size = 0, the entire frame will always be * decoded in one shot. * - * Only bitstream version 2 is currently supported. + * This function only supports decoding version 1 and 2 bitstreams. Use + * DecDCTvlcStart() to decode a version 3 bitstream. * * @param ctx Pointer to VLC_Context structure (which will be initialized) * @param buf @@ -432,7 +470,7 @@ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); * * @see DecDCTvlcSize2(), DecDCTvlcBuild() */ -int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table); +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table); /** * @brief Sets the maximum amount of data to be decompressed (alternate @@ -458,7 +496,7 @@ size_t DecDCTvlcSize2(size_t size); * the .BS decompressor. * * @details Generates the lookup table required by DecDCTvlcStart2(), - * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the + * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB structure) into the * specified buffer. Since the table is relatively large (34 KB), it is * recommended to only generate it in a dynamically-allocated buffer when * needed and deallocate the buffer afterwards. @@ -468,10 +506,8 @@ size_t DecDCTvlcSize2(size_t size); * * @param table */ -void DecDCTvlcBuild(DECDCTTAB2 *table); +void DecDCTvlcBuild(DECDCTTAB *table); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxsio.h b/libpsn00b/include/psxsio.h index 449e43a..8932830 100644 --- a/libpsn00b/include/psxsio.h +++ b/libpsn00b/include/psxsio.h @@ -18,8 +18,7 @@ * debugging purposes. */ -#ifndef __PSXSIO_H -#define __PSXSIO_H +#pragma once #include <stdint.h> @@ -280,5 +279,3 @@ void DelSIO(void); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/psxsn.h b/libpsn00b/include/psxsn.h new file mode 100644 index 0000000..1acbc18 --- /dev/null +++ b/libpsn00b/include/psxsn.h @@ -0,0 +1,51 @@ +/* + * PSn00bSDK kernel API library (host file access) + * (C) 2023 spicyjpeg - MPL licensed + */ + +/** + * @file psxsn.h + * @brief Host file access API header + * + * @details This header provides stubs for the PCDRV API, which grants read and + * write access to a directory on the host's filesystem when the executable is + * running on an emulator or through a debugger that supports the PCDRV + * protocol, such as Unirom or pcsx-redux. These functions are completely + * separate and independent from the BIOS file API and do not register any + * device drivers. + * + * Note that in the official SDK these functions are provided by libsn, while + * in PSn00bSDK they are part of libpsxapi. + */ + +#pragma once + +#include <stddef.h> + +typedef enum _PCDRV_OpenMode { + PCDRV_MODE_READ = 0, + PCDRV_MODE_WRITE = 1, + PCDRV_MODE_READ_WRITE = 2 +} PCDRV_OpenMode; + +typedef enum _PCDRV_SeekMode { + PCDRV_SEEK_SET = 0, + PCDRV_SEEK_CUR = 1, + PCDRV_SEEK_END = 2 +} PCDRV_SeekMode; + +#ifdef __cplusplus +extern "C" { +#endif + +int PCinit(void); +int PCcreat(const char *path); +int PCopen(const char *path, PCDRV_OpenMode mode); +int PCclose(int fd); +int PCread(int fd, void *data, size_t length); +int PCwrite(int fd, const void *data, size_t length); +int PClseek(int fd, int offset, PCDRV_SeekMode mode); + +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h index cdc3ac7..b544952 100644 --- a/libpsn00b/include/psxspu.h +++ b/libpsn00b/include/psxspu.h @@ -1,10 +1,25 @@ /* * PSn00bSDK SPU library - * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __PSXSPU_H -#define __PSXSPU_H +/** + * @file psxspu.h + * @brief SPU library header + * + * @details The PSn00bSDK SPU library allows for SPU initialization, DMA + * transfers (both sample data uploads and capture buffer reads) and provides + * helper macros for accessing SPU control registers, which can be used to + * control sample playback on each channel, configure reverb and enable more + * advanced features such as interrupts. + * + * This library currently has fewer functions than its Sony SDK counterpart, in + * part because it is not yet complete but also since the vast majority of the + * Sony library's functions are redundant, inefficient and can be replaced with + * simple SPU register writes. + */ + +#pragma once #include <stdint.h> #include <stddef.h> @@ -12,6 +27,7 @@ /* Definitions */ +#if 0 typedef enum _SPU_AttrMask { SPU_VOICE_VOLL = 1 << 0, // Left volume SPU_VOICE_VOLR = 1 << 1, // Right volume @@ -33,6 +49,7 @@ typedef enum _SPU_AttrMask { SPU_VOICE_ADSR_ADSR1 = 1 << 17, SPU_VOICE_ADSR_ADSR2 = 1 << 18 } SPU_AttrMask; +#endif typedef enum _SPU_TransferMode { SPU_TRANSFER_BY_DMA = 0, @@ -46,6 +63,7 @@ typedef enum _SPU_WaitMode { /* Structure definitions */ +#if 0 typedef struct _SpuVolume { int16_t left, right; } SpuVolume; @@ -72,6 +90,7 @@ typedef struct _SpuCommonAttr { SpuVolume mvol, mvolmode, mvolx; SpuExtAttr cd, ext; } SpuCommonAttr; +#endif /* Macros */ @@ -137,11 +156,11 @@ size_t SpuRead(uint32_t *data, size_t size); size_t SpuWrite(const uint32_t *data, size_t size); size_t SpuWritePartly(const uint32_t *data, size_t size); SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode); +SPU_TransferMode SpuGetTransferMode(void); uint32_t SpuSetTransferStartAddr(uint32_t addr); +uint32_t SpuGetTransferStartAddr(void); int SpuIsTransferCompleted(int mode); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/stdio.h b/libpsn00b/include/stdio.h index 8aaf4c7..1bb5b74 100644 --- a/libpsn00b/include/stdio.h +++ b/libpsn00b/include/stdio.h @@ -1,39 +1,26 @@ -#ifndef _STDIO_H -#define _STDIO_H +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ -#include <stdarg.h> +#pragma once -// BIOS seek modes -#ifndef SEEK_SET -#define SEEK_SET 0 -#endif -#ifndef SEEK_CUR -#define SEEK_CUR 1 -#endif -#ifndef SEEK_END -#define SEEK_END 2 /* warning: reportedly buggy */ -#endif +#include <stdarg.h> #ifdef __cplusplus extern "C" { #endif -// The following functions use the BIOS -extern void printf (const char *__format, ...); - -extern int getc(int __fd); -extern int putc(int __char, int __fd); +/* String I/O API (provided by BIOS) */ -#define fputc(__char, __fd) putc(__char, __fd) -#define fgetc(__char, __fd) getc(__char, __fd) +int printf(const char *fmt, ...); +char *gets(char *str); +void puts(const char *str); +int getchar(void); +void putchar(int ch); -// Console TTY -extern void gets(char *__s); -extern void puts(const char *__s); -extern int getchar(void); -extern void putchar(int __c); +/* String formatting API (built-in) */ -// The following functions do not use the BIOS int vsnprintf(char *string, unsigned int size, const char *fmt, va_list ap); int vsprintf(char *string, const char *fmt, va_list ap); int sprintf(char *string, const char *fmt, ...); @@ -45,5 +32,3 @@ int sscanf(const char *str, const char *fmt, ...); #ifdef __cplusplus } #endif - -#endif // _STDIO_H
\ No newline at end of file diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h index 049d067..19761df 100644 --- a/libpsn00b/include/stdlib.h +++ b/libpsn00b/include/stdlib.h @@ -1,10 +1,9 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STDLIB_H -#define __STDLIB_H +#pragma once #include <stddef.h> @@ -39,11 +38,11 @@ void srand(int seed); int abs(int j); long labs(long i); -long strtol(const char *nptr, char **endptr, int base); -long long strtoll(const char *nptr, char **endptr, int base); -float strtof(const char *nptr, char **endptr); -double strtod(const char *nptr, char **endptr); -long double strtold(const char *nptr, char **endptr); +long strtol(const char *str, char **str_end, int base); +long long strtoll(const char *str, char **str_end, int base); +//float strtof(const char *str, char **str_end); +//double strtod(const char *str, char **str_end); +//long double strtold(const char *str, char **str_end); void InitHeap(void *addr, size_t size); void *sbrk(ptrdiff_t incr); @@ -59,5 +58,3 @@ void free(void *ptr); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/string.h b/libpsn00b/include/string.h index ceee066..6310b1a 100644 --- a/libpsn00b/include/string.h +++ b/libpsn00b/include/string.h @@ -1,37 +1,40 @@ /* * PSn00bSDK standard library - * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STRING_H -#define __STRING_H +#pragma once + +#include <stddef.h> #ifdef __cplusplus extern "C" { #endif -int strcmp(const char *dst , const char *src); -int strncmp(const char *dst , const char *src , int len); -char *strpbrk(const char *dst , const char *src); -char *strtok(char *s , char *set); -char *strstr(const char *big , const char *little); - -char *strcat(char *s , const char *append); -char *strncat(char *s , const char *append, int n); -char *strcpy(char *dst , const char *src); -char *strncpy(char *dst , const char *src , int n); -int strlen(const char *s); -char *strchr(const char *s , int c); -char *strrchr(const char *s , int c); - -void *memmove(void *dst , const void *src , int n); -void *memchr(void *s , int c , int n); -void *memcpy(void *dst , const void *src , int n); -void *memset(void *dst , char c , int n); -int memcmp(const void *b1 , const void *b2 , int n); +void *memset(void *dest, int ch, size_t count); +void *memcpy(void *dest, const void *src, size_t count); +void *memccpy(void *dest, const void *src, int ch, size_t count); +void *memmove(void *dest, const void *src, size_t count); +int memcmp(const void *lhs, const void *rhs, size_t count); +void *memchr(const void *ptr, int ch, size_t count); + +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t count); +int strcmp(const char *lhs, const char *rhs); +int strncmp(const char *lhs, const char *rhs, size_t count); +char *strchr(const char *str, int ch); +char *strrchr(const char *str, int ch); +char *strpbrk(const char *str, const char *breakset); +char *strstr(const char *str, const char *substr); + +size_t strlen(const char *str); +char *strcat(char *dest, const char *src); +char *strncat(char *dest, const char *src, size_t count); +char *strdup(const char *str); +char *strndup(const char *str, size_t count); + +char *strtok(char *str, const char *delim); #ifdef __cplusplus } #endif - -#endif diff --git a/libpsn00b/include/strings.h b/libpsn00b/include/strings.h index 7223ab9..0595637 100644 --- a/libpsn00b/include/strings.h +++ b/libpsn00b/include/strings.h @@ -3,8 +3,7 @@ * (C) 2019-2022 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#ifndef __STRINGS_H -#define __STRINGS_H +#pragma once #include <string.h> @@ -15,5 +14,3 @@ #define bcmp(b1, b2, len) memcmp(b1, b2, len) #define index(s, c) strchr(s, c) #define rindex(s, c) strrchr(s, c) - -#endif diff --git a/libpsn00b/include/sys/fcntl.h b/libpsn00b/include/sys/fcntl.h index dfbf5b2..54c2d05 100644 --- a/libpsn00b/include/sys/fcntl.h +++ b/libpsn00b/include/sys/fcntl.h @@ -1,8 +1,10 @@ -#ifndef _SYS_FCNTL_H -#define _SYS_FCNTL_H +/* + * PSn00bSDK kernel API library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once -// File control mode flags for BIOS file functions -// (many weren't documented in nocash docs) #define FREAD 0x1 // Read #define FWRITE 0x2 // Write #define FNBLOCK 0x4 // Non-blocking read access @@ -16,5 +18,3 @@ #define FNBUF 0x4000 // No ring buffer and terminal interrupt #define FASYNC 0x8000 // Asynchronous I/O #define FNBLOCKS(a) (a<<16) // Number of blocks? (from nocash docs) - -#endif
\ No newline at end of file diff --git a/libpsn00b/include/sys/ioctl.h b/libpsn00b/include/sys/ioctl.h new file mode 100644 index 0000000..af65e5d --- /dev/null +++ b/libpsn00b/include/sys/ioctl.h @@ -0,0 +1,13 @@ +/* + * PSn00bSDK kernel API library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ + +#pragma once + +#define EOF -1 + +#define FIONBLOCK (('f'<<8)|1) +#define FIOCSCAN (('f'<<8)|2) + +#define DIOFORMAT (('d'<<8)|1) diff --git a/libpsn00b/include/sys/types.h b/libpsn00b/include/sys/types.h index da43590..9075b5e 100644 --- a/libpsn00b/include/sys/types.h +++ b/libpsn00b/include/sys/types.h @@ -1,13 +1,13 @@ -#ifndef _TYPES_H -#define _TYPES_H +/* + * PSn00bSDK standard library + * (C) 2019-2023 Lameguy64, spicyjpeg - MPL licensed + */ -//#warning "<sys/types.h> and u_* types are deprecated, include <stdint.h> instead" +#pragma once -//#include <stdint.h> +//#warning "<sys/types.h> and u_* types are deprecated, use <stdint.h> instead" typedef unsigned char u_char; typedef unsigned short u_short; typedef unsigned int u_int; typedef unsigned long u_long; - -#endif // _TYPES_H
\ No newline at end of file diff --git a/libpsn00b/libc/memcmp.s b/libpsn00b/libc/memcmp.s deleted file mode 100644 index ec1e729..0000000 --- a/libpsn00b/libc/memcmp.s +++ /dev/null @@ -1,31 +0,0 @@ -# High speed ASM memcmp implementation by Lameguy64 -# -# Part of PSn00bSDK - -.set noreorder - -.section .text - -# Arguments: -# a0 - buffer 1 address -# a1 - buffer 2 address -# a2 - bytes to compare -.global memcmp -.type memcmp, @function -memcmp: - blez $a2, .Lexit - addi $a2, -1 - lbu $v0, 0($a0) - lbu $v1, 0($a1) - addiu $a0, 1 - bne $v0, $v1, .Lmismatch - addiu $a1, 1 - b memcmp - nop -.Lmismatch: - jr $ra - sub $v0, $v1 -.Lexit: - jr $ra - move $v0, $0 -
\ No newline at end of file diff --git a/libpsn00b/libc/memcpy.s b/libpsn00b/libc/memcpy.s deleted file mode 100644 index 26edb37..0000000 --- a/libpsn00b/libc/memcpy.s +++ /dev/null @@ -1,28 +0,0 @@ -# High speed ASM memcpy implementation by Lameguy64 -# -# Part of PSn00bSDK - -.set noreorder - -.section .text - -# Arguments: -# a0 - destination address -# a1 - source adress -# a2 - bytes to copy -.global memcpy -.type memcpy, @function -memcpy: - move $v0, $a0 -.Lloop: - blez $a2, .Lexit - addi $a2, -1 - lbu $a3, 0($a1) - addiu $a1, 1 - sb $a3, 0($a0) - b .Lloop - addiu $a0, 1 -.Lexit: - jr $ra - nop -
\ No newline at end of file diff --git a/libpsn00b/libc/memmove.s b/libpsn00b/libc/memmove.s deleted file mode 100644 index 843ece7..0000000 --- a/libpsn00b/libc/memmove.s +++ /dev/null @@ -1,42 +0,0 @@ -.set noreorder - -.section .text - -# Arguments -# a0 - destination address -# a1 - source address -# a2 - bytes to move -.global memmove -.type memmove, @function -memmove: - move $v0, $a0 - sltu $v1, $a0, $a1 - blez $v1, .Linit_backward -.Lloop_forward: - blez $a2, .Lexit - addi $a2, -1 - lbu $v1, 0($a1) - addiu $a1, 1 - sb $v1, 0($a0) - addiu $a0, 1 - b .Lloop_forward - nop -.Linit_backward: - addu $a0, $a2 - addu $a1, $a2 - addiu $a0, -1 - addiu $a1, -1 - b .Lloop_backward - nop -.Lloop_backward: - blez $a2, .Lexit - addi $a2, -1 - lbu $v1, 0($a1) - addiu $a1, -1 - sb $v1, 0($a0) - addiu $a0, -1 - b .Lloop_backward - nop -.Lexit: - jr $ra - nop
\ No newline at end of file diff --git a/libpsn00b/libc/start.c b/libpsn00b/libc/start.c index 9ff09c8..dcbad2d 100644 --- a/libpsn00b/libc/start.c +++ b/libpsn00b/libc/start.c @@ -11,11 +11,13 @@ #define KERNEL_ARG_STRING ((const char *) 0x80000180) #define KERNEL_RETURN_VALUE ((volatile int *) 0x8000dffc) -/* Argument parsing */ +/* BIOS argv parser (unused, interferes with child executable argv passing) */ int __argc; const char **__argv; +#if 0 + #define ARGC_MAX 16 static const char *_argv_buffer[ARGC_MAX]; @@ -48,6 +50,8 @@ static void _parse_kernel_args(void) { } } +#endif + /* Main */ // These are defined by the linker script. Note that these are *NOT* pointers, @@ -66,11 +70,10 @@ extern int main(int argc, const char* argv[]); // Even though _start() usually takes no arguments, this implementation allows // parent executables to pass args directly to child executables without having // to overwrite the arg strings in kernel RAM. -void _start_inner(int32_t override_argc, const char **override_argv) { +void _start_inner(int argc, const char **argv) { //__asm__ volatile("la $gp, _gp;"); - // Clear BSS 4 bytes at a time. BSS is always aligned to 4 bytes by the - // linker script. + // BSS is always aligned to 4 bytes by the linker script. for (uint32_t *i = (uint32_t *) __bss_start; i < (uint32_t *) _end; i++) *i = 0; @@ -78,17 +81,14 @@ void _start_inner(int32_t override_argc, const char **override_argv) { // RAM. Note that InitHeap() can be called again in main(). InitHeap((void *) _end + 4, (void *) 0x801ffff8 - (void *) _end); - if (override_argv) { - __argc = override_argc; - __argv = override_argv; - } else { - _parse_kernel_args(); - } + //_parse_kernel_args(); + __argc = argc; + __argv = argv; // Call the global constructors (if any) to initialize global objects // before calling main(). Constructors are put by the linker script in a // length-prefixed array in reverse order. - for (uint32_t i = (uint32_t) __CTOR_LIST__[0]; i >= 1; i--) + for (int i = (int) __CTOR_LIST__[0]; i >= 1; i--) __CTOR_LIST__[i](); // Store main()'s return value into the kernel return value area (for child @@ -96,6 +96,6 @@ void _start_inner(int32_t override_argc, const char **override_argv) { *KERNEL_RETURN_VALUE = main(__argc, __argv); // Call global destructors (in forward order). - for (uint32_t i = 0; i < (uint32_t) __DTOR_LIST__[0]; i++) + for (int i = 0; i < (int) __DTOR_LIST__[0]; i++) __DTOR_LIST__[i + 1](); } diff --git a/libpsn00b/libc/string.c b/libpsn00b/libc/string.c index a1a9a05..dbc2621 100644 --- a/libpsn00b/libc/string.c +++ b/libpsn00b/libc/string.c @@ -1,295 +1,457 @@ /* - * string.c - * - * Inherited from PSXSDK C library + * PSn00bSDK standard library + * (C) 2019-2023 PSXSDK authors, Lameguy64, spicyjpeg - MPL licensed */ -#include <stdio.h> -#include <string.h> +#include <stdint.h> +#include <stddef.h> #include <stdlib.h> +#include <string.h> // Uncomment to enable strtod(), strtold() and strtof(). Note that these // functions use extremely slow software floats. //#define ALLOW_FLOAT -int tolower(int chr) -{ - return (chr >='A' && chr<='Z') ? (chr + 32) : (chr); +/* Character manipulation */ + +int isprint(int ch) { + return (ch >= ' ') && (ch <= '~'); } -int toupper(int chr) -{ - return (chr >='a' && chr<='z') ? (chr - 32) : (chr); +int isgraph(int ch) { + return (ch > ' ') && (ch <= '~'); } -// Need to be replaced with MIPS assembler equivalents +int isspace(int ch) { + return (ch == ' ') || ((ch >= '\t') && (ch <= '\r')); +} -void *memchr(void *s , int c , int n) -{ - while(n--) - { - if(*((unsigned char*)s) == (unsigned char)c) - return s; - - s++; - } - - return NULL; +int isblank(int ch) { + return (ch == ' ') || (ch == '\t'); } -char *strncpy(char *dst, const char *src, int len) -{ - char *odst=dst; +int isalpha(int ch) { + return ((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z')); +} - while(*src && len) - { - *(dst++) = *(src++); - len--; - } - - *dst = 0; - - return odst; +int isdigit(int ch) { + return (ch >= '0') && (ch <= '9'); } -char *strcpy(char *dst, const char *src) -{ - char *odst = dst; +int tolower(int ch) { + if ((ch >= 'A') && (ch <= 'Z')) + ch += 'a' - 'A'; - while(*(dst++) = *(src++)); - return odst; + return ch; } -char *strcat(char *dst, const char *src) -{ - char *o=dst; - - while(*dst) - dst++; - - strcpy(dst, src); - - return o; +int toupper(int ch) { + if ((ch >= 'a') && (ch <= 'z')) + ch += 'A' - 'a'; + + return ch; } -char *strncat(char *s, const char *append, int len) -{ - char *o=s; - - while(*s) - s++; - - strncpy(s, append, len); - - return o; +/* Memory buffer manipulation */ + +// TODO: replace more of these with optimized assembly implementations + +/*void *memset(void *dest, int ch, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + + for (; count; count--) + *(_dest++) = (uint8_t) ch; + + return dest; +}*/ + +void *memcpy(void *restrict dest, const void *restrict src, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; + + for (; count; count--) + *(_dest++) = *(_src++); + + return dest; } -int strlen(const char *str) -{ - int i = 0; - while(*(str++))i++; - return i; +void *memccpy(void *restrict dest, const void *restrict src, int ch, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; + + for (; count; count--) { + uint8_t a = *(_src++); + + *(_dest++) = a; + if (a == ch) + return (void *) _dest; + } + + return 0; } -char *strchr(const char *s, int c) -{ - int x; +void *memmove(void *dest, const void *src, size_t count) { + uint8_t *_dest = (uint8_t *) dest; + const uint8_t *_src = (const uint8_t *) src; - for(x = 0; x <= strlen(s); x++) - if(s[x] == c) return (char*)&s[x]; + if (_dest == _src) + return dest; + if ((_dest >= &_src[count]) || (&_dest[count] <= _src)) + return memcpy(dest, src, count); - return NULL; + if (_dest < _src) { // Copy forwards + for (; count; count--) + *(_dest++) = *(_src++); + } else { // Copy backwards + _src += count; + _dest += count; + + for (; count; count--) + *(--_dest) = *(--_src); + } + + return dest; } -char *strrchr(const char *s, int c) -{ - int x; +int memcmp(const void *lhs, const void *rhs, size_t count) { + const uint8_t *_lhs = (const uint8_t *) lhs; + const uint8_t *_rhs = (const uint8_t *) rhs; + + for (; count; count--) { + uint8_t a = *(_lhs++), b = *(_rhs++); - for(x = strlen(s); x>=0; x--) - if(s[x] == c) return (char*)&s[x]; + if (a != b) + return a - b; + } - return NULL; + return 0; } -char *strpbrk(const char *s, const char *charset) -{ - int x,y; +void *memchr(const void *ptr, int ch, size_t count) { + const uint8_t *_ptr = (const uint8_t *) ptr; - for(x = 0; x < strlen(s); x++) - for(y = 0; y < strlen(charset); y++) - if(s[x] == charset[y]) return (char*)&s[x]; + for (; count; count--, _ptr++) { + if (*_ptr == ch) + return (void *) _ptr; + } - return NULL; + return 0; } -char *strstr(const char *big, const char *little) -{ - int ls = strlen(little); - int bs = strlen(big); - int x; +/* String manipulation */ - if(ls == 0) - return (char*)big; - - if(ls > bs) - return NULL; +char *strcpy(char *restrict dest, const char *restrict src) { + char *_dest = dest; - for(x = 0; x <= bs-ls; x++) - if(memcmp(little, &big[x], ls) == 0) - return (char*)&big[x]; + while (*src) + *(_dest++) = *(src++); - return NULL; + *_dest = 0; + return dest; } -int strcmp(const char *s1, const char *s2) -{ - while((*s1) && (*s2) && (*s1 == *s2)) - { - s1++; - s2++; +char *strncpy(char *restrict dest, const char *restrict src, size_t count) { + char *_dest = dest; + + for (; count && *src; count--) + *(_dest++) = *(src++); + for (; count; count--) + *(_dest++) = 0; + + return dest; +} + +int strcmp(const char *lhs, const char *rhs) { + for (;;) { + char a = *(lhs++), b = *(rhs++); + + if (a != b) + return a - b; + if (!a && !b) + return 0; + } +} + +int strncmp(const char *lhs, const char *rhs, size_t count) { + for (; count && *lhs && *rhs; count--) { + char a = *(lhs++), b = *(rhs++); + + if (a != b) + return a - b; } - return(*s1-*s2); + return 0; } -int strncmp(const char *s1, const char *s2, int len) -{ - int p = 0; +char *strchr(const char *str, int ch) { + for (; *str; str++) { + if (*str == ch) + return (char *) str; + } - while(*s1 && *s2 && (*s1 == *s2) && p<len) - { - p++; + return 0; +} + +char *strrchr(const char *str, int ch) { + size_t length = strlen(str); + + for (str += length; length; length--) { + str--; + if (*str == ch) + return (char *) str; + } + + return 0; +} + +char *strpbrk(const char *str, const char *breakset) { + for (; *str; str++) { + char a = *str; - if(p<len) - { - s1++; - s2++; + for (const char *ch = breakset; *ch; ch++) { + if (a == *ch) + return (char *) str; } } - return *s1-*s2; + return 0; } -// Requires a malloc implementation -char *strdup(const char *str) -{ - char *ns = (void*)malloc(strlen(str) + 1); +char *strstr(const char *str, const char *substr) { + size_t length = strlen(substr); - if(ns == NULL) - return NULL; - - strcpy(ns, str); - return ns; + if (!length) + return (char *) str; + + for (; *str; str++) { + if (!memcmp(str, substr, length)) + return (char *) str; + } + + return 0; } -char *strndup(const char *str, int len) -{ - int n=strlen(str); - char *ns = (void*)malloc((n+1)>len?len:(n+1)); +size_t strlen(const char *str) { + size_t length = 0; - if(ns == NULL) - return NULL; - - strncpy(ns, str, (n+1)>len?len:(n+1)); - return ns; + for (; *str; str++) + length++; + + return length; } - -long long strtoll(const char *nptr, char **endptr, int base) -{ - int r = 0; - int t = 0; - int n = 0; - - if(*nptr == '-') - { - nptr++; - n = 1; + +// Non-standard, used internally +size_t strnlen(const char *str, size_t count) { + size_t length = 0; + + for (; *str && (length < count); str++) + length++; + + return length; +} + +char *strcat(char *restrict dest, const char *restrict src) { + char *_dest = &dest[strlen(dest)]; + + while (*src) + *(_dest++) = *(src++); + + *_dest = 0; + return dest; +} + +char *strncat(char *restrict dest, const char *restrict src, size_t count) { + char *_dest = &dest[strlen(dest)]; + + for (; count && *src; count--) + *(_dest++) = *(src++); + + *_dest = 0; + return dest; +} + +char *strdup(const char *str) { + size_t length = strlen(str) + 1; + char *copy = malloc(length); + + if (!copy) + return 0; + + memcpy(copy, str, length); + return copy; +} + +char *strndup(const char *str, size_t count) { + size_t length = strnlen(str, count) + 1; + char *copy = malloc(length); + + if (!copy) + return 0; + + memcpy(copy, str, length); + return copy; +} + +/* String tokenizer */ + +static char *_strtok_ptr = 0, *_strtok_end_ptr = 0; + +char *strtok(char *restrict str, const char *restrict delim) { + if (str) { + _strtok_ptr = str; + _strtok_end_ptr = &str[strlen(str)]; } - if(base == 0) - if(*nptr == '0') - base = 8; - else - base = 10; + if (_strtok_ptr >= _strtok_end_ptr) + return 0; + if (!(*_strtok_ptr)) + return 0; + + char *split = strstr(_strtok_ptr, delim); + char *token = _strtok_ptr; + + if (split) { + *(split++) = 0; + _strtok_ptr = split; + } else { + _strtok_ptr += strlen(token); + } - if(!(base >= 2 && base <= 36)) + return token; +} + +/* Number parsers */ + +long long strtoll(const char *restrict str, char **restrict str_end, int base) { + if (!str) return 0; - if(base == 16 && *nptr == '0') - { - if(*(nptr+1) == 'x' || *(nptr+1) == 'X') - nptr+=2; + while (isspace(*str)) + str++; + + int negative = (*str == '-'); + if (negative) + str++; + + while (isspace(*str)) + str++; + + // Parse any base prefix if present. If a base was specified make sure it + // matches, otherwise use it to determine which base the value is in. + long long value = 0; + + if (*str == '0') { + int _base; + + switch (str[1]) { + case 0: + goto _exit_loop; + + case 'X': + case 'x': + _base = 16; + str += 2; + break; + + case 'O': + case 'o': + _base = 8; + str += 2; + break; + + case 'B': + case 'b': + _base = 2; + str += 2; + break; + + default: + // Numbers starting with a zero are *not* interpreted as octal + // unless base = 8. + _base = 0; + str++; + } + + if (!base) + base = _base; + else if (base != _base) + return 0; } - while(*nptr) - { - switch(*nptr) - { - case '0'...'9': - t = *nptr - '0'; - break; - case 'a' ... 'z': - t = (*nptr - 'a') + 10; - break; + if (!base) + base = 10; + else if ((base < 2) || (base > 36)) + return 0; + + // Parse the actual value. + for (; *str; str++) { + char ch = *str; + int digit; + + switch (ch) { + case '0' ... '9': + digit = ch - '0'; + break; + case 'A' ... 'Z': - t = (*nptr - 'A') + 10; - break; + digit = (ch - 'A') + 10; + break; + + case 'a' ... 'z': + digit = (ch - 'a') + 10; + break; + default: - t = 1000; - break; + goto _exit_loop; } - if(t>=base) - break; - - r*=base; - r+=t; - nptr++; + value = (value * base) + digit; } - if(endptr)*endptr = (char*)nptr; - return n?-r:r; +_exit_loop: + if (str_end) + *str_end = (char *) str; + + return negative ? (-value) : value; } -long strtol(const char *nptr, char **endptr, int base) -{ - return (long)strtoll(nptr, endptr, base); +long strtol(const char *restrict str, char **restrict str_end, int base) { + return (long) strtoll(str, str_end, base); } #ifdef ALLOW_FLOAT -double strtod(const char *nptr, char **endptr) -{ +double strtod(const char *restrict str, char **restrict str_end) { char strbuf[64]; int x = 0; int y; double i=0, d=0; int s=1; - if(*nptr == '-') + if(*str == '-') { - nptr++; + str++; s=-1; } - while(*nptr >= '0' && *nptr <= '9' && x < 18) - strbuf[x++] = *(nptr++); + while(*str >= '0' && *str <= '9' && x < 18) + strbuf[x++] = *(str++); strbuf[x] = 0; i = (double)strtoll(strbuf, NULL, 10); - if(*nptr == '.') + if(*str == '.') { - nptr++; + str++; x = 0; - while(*nptr >= '0' && *nptr <= '9' && x < 7) - strbuf[x++] = *(nptr++); + while(*str >= '0' && *str <= '9' && x < 7) + strbuf[x++] = *(str++); strbuf[x] = 0; - if(endptr != NULL) *endptr = (char*)nptr; + if(str_end != NULL) *str_end = (char*)str; y=1; @@ -301,67 +463,19 @@ double strtod(const char *nptr, char **endptr) } else { - if(endptr != NULL) - *endptr = (char*)nptr; + if(str_end != NULL) + *str_end = (char*)str; } return (i + d)*s; } -#endif - -/* implementation by Lameguy64, behaves like OpenWatcom's strtok() */ -/* BIOS strtok seemed either bugged, or designed for wide chars */ - -static char *_strtok_curpos; -static char *_strtok_endpos; - -char *strtok( char *s1, char *s2 ) -{ - char *c,*t; - - if( s1 ) - { - _strtok_curpos = s1; - _strtok_endpos = s1+strlen( s1 ); - } - else - { - if( _strtok_curpos >= _strtok_endpos ) - return( NULL ); - } - - if( !*_strtok_curpos ) - return( NULL ); - - if( c = strstr( _strtok_curpos, s2 ) ) - { - *c = 0; - t = _strtok_curpos; - _strtok_curpos = c+1; - return( t ); - } - else - { - t = _strtok_curpos; - _strtok_curpos += strlen( t ); - return( t ); - } - - return( NULL ); - -} /* strtok */ - -#ifdef ALLOW_FLOAT - -long double strtold(const char *nptr, char **endptr) -{ - return (long double)strtod(nptr, endptr); +long double strtold(const char *restrict str, char **restrict str_end) { + return (long double) strtod(str, str_end); } -float strtof(const char *nptr, char **endptr) -{ - return (float)strtod(nptr, endptr); +float strtof(const char *restrict str, char **restrict str_end) { + return (float) strtod(str, str_end); } #endif diff --git a/libpsn00b/lzp/bit.h b/libpsn00b/lzp/bit.h index 321160a..5e7ed23 100644 --- a/libpsn00b/lzp/bit.h +++ b/libpsn00b/lzp/bit.h @@ -1,5 +1,5 @@ -#ifndef _LZP_BIT_H -#define _LZP_BIT_H + +#pragma once extern const unsigned char* inPtr; extern int inBytes; @@ -21,6 +21,3 @@ int get_bits(int n); #ifdef __cplusplus } #endif - - -#endif // _LZP_BIT_H diff --git a/libpsn00b/lzp/compress.c b/libpsn00b/lzp/compress.c index 9cfc64d..16cb606 100644 --- a/libpsn00b/lzp/compress.c +++ b/libpsn00b/lzp/compress.c @@ -1,7 +1,7 @@ // Based on ilia muraviev's CRUSH compressor program which falls under public domain #include <string.h> -#if LZP_USE_MALLOC == TRUE +#ifdef LZP_USE_MALLOC #include <stdlib.h> #endif @@ -11,7 +11,7 @@ // Internal structure for hash table allocation sizes -#if LZP_NO_COMPRESS == FALSE +#ifndef LZP_NO_COMPRESS struct { short WindowSize; // Window size (17 - 23) @@ -67,7 +67,7 @@ struct { // LZ77 // -#if LZP_NO_COMPRESS == FALSE +#ifndef LZP_NO_COMPRESS int update_hash1(int h, int c) { @@ -108,13 +108,13 @@ int get_penalty(int a, int b) { int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) { - #if LZP_USE_MALLOC == FALSE +#ifndef LZP_USE_MALLOC int head[HASH1_SIZE+HASH2_SIZE]; int prev[W_SIZE]; - #else +#else int* head = malloc(4*(HASH1_SIZE+HASH2_SIZE)); int* prev = malloc(4*W_SIZE); - #endif +#endif int max_chain[] = {4, 256, 1<<12}; @@ -319,10 +319,10 @@ int lzCompress(void* outBuff, const void* inBuff, int inSize, int level) { flush_bits(); - #if LZP_USE_MALLOC == TRUE +#ifdef LZP_USE_MALLOC free(head); free(prev); - #endif +#endif return(outBytes); diff --git a/tools/lzpack/lzp/lzconfig.h b/libpsn00b/lzp/lzconfig.h index 65e623c..83579a3 100644 --- a/tools/lzpack/lzp/lzconfig.h +++ b/libpsn00b/lzp/lzconfig.h @@ -3,29 +3,9 @@ * \details Define settings will only take effect when you recompile the library. */ -#ifndef _LZP_CONFIG_H -#define _LZP_CONFIG_H +#pragma once - -#ifndef TRUE -#define TRUE 1 -#endif -#ifndef FALSE -#define FALSE 0 -#endif - - -/* Set to TRUE to compile without data compression routines useful if you - * plan to use this library on a program that does not require said routines - * especially on a platform with limited memory (such as the PlayStation). - * - * This define will rule out lzCompress(), lzSetHashSizes() and - * lzResetHashSizes() functions and their associated functions. - */ -#define LZP_NO_COMPRESS FALSE - - -/* Set to TRUE to make default compression table sizes to maximum and works best +/* Uncomment to make default compression table sizes to maximum and works best * when compressing large amounts of data. LZP_USE_MALLOC must be set to TRUE to * prevent stack overflow errors. * @@ -34,21 +14,16 @@ * * This define only affects lzCompress(). */ -#define LZP_MAX_COMPRESS TRUE - +//#define LZP_MAX_COMPRESS /* Uncomment to make the library use malloc() instead of array initializers to * allocate hash tables. Enabling this is a must if you plan to use large hash * and window table sizes. */ -#define LZP_USE_MALLOC TRUE +//#define LZP_USE_MALLOC -/* Hash table sizes (in power-of-two multiple units) - * - * These define only affect lzCompress(). - */ -#if LZP_MAX_COMPRESS == TRUE +#if defined(PSN00BSDK) && !defined(LZP_MAX_COMPRESS) // Minimal defaults #define LZP_WINDOW_SIZE 17 @@ -57,12 +32,11 @@ #else +#define LZP_USE_MALLOC + // Maximum defaults #define LZP_WINDOW_SIZE 17 #define LZP_HASH1_SIZE 22 #define LZP_HASH2_SIZE 24 #endif - - -#endif // _LZP_CONFIG_H diff --git a/libpsn00b/lzp/lzp.h b/libpsn00b/lzp/lzp.h index 456de02..1aeea30 100644 --- a/libpsn00b/lzp/lzp.h +++ b/libpsn00b/lzp/lzp.h @@ -1,20 +1,29 @@ -/*! \file lzp.h - * \brief Main library header +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed */ -/*! \mainpage - * \version 0.20b - * \author John Wilbert 'Lameguy64' Villamor +/** + * @file lzp.h + * @brief Utility library for file bundling and compression * - * \section creditsSection Credits - * - LZ77 data compression/decompression routines based from Ilya Muravyov's - * crush.cpp released under public domain. Refined and ported to C by Lameguy64. - * - CRC calculation routines based from Lammert Bies' lib_crc routines. + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). * + * This header provides the LZ77 compression API and functions to parse and + * decompress .LZP archives after they have been loaded into memory. + * + * @section creditsSection Credits + * - LZ77 data compression/decompression routines based from Ilya Muravyov's + * crush.cpp released under public domain. Refined and ported to C by + * Lameguy64. + * - CRC calculation routines based from Lammert Bies' lib_crc routines. */ -#ifndef _LZPACK_H -#define _LZPACK_H +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -218,6 +227,3 @@ int lzpUnpackFile(void* buff, const LZP_HEAD* lzpack, int fileNum); #ifdef __cplusplus } #endif - - -#endif // _LZPACK_H diff --git a/libpsn00b/lzp/lzqlp.h b/libpsn00b/lzp/lzqlp.h index 32ce0d7..127f263 100644 --- a/libpsn00b/lzp/lzqlp.h +++ b/libpsn00b/lzp/lzqlp.h @@ -1,5 +1,23 @@ -#ifndef _QLP_H -#define _QLP_H +/* + * liblzp data compression library + * (C) 2019 Lameguy64 - MPL licensed + */ + +/** + * @file lzqlp.h + * @brief Utility library for file bundling + * + * @details This library implements a simple in-memory archive format which + * can be used to package and compress assets for faster loading, as well as a + * generic LZ77 compressor and matching decompressor. Two archive formats are + * supported, one uncompressed (.QLP) and one with individually compressed + * entries (.LZP). + * + * This header provides functions to parse .QLP archives and retrieve pointers + * to their contents after they have been loaded into memory. + */ + +#pragma once #include <stdint.h> #ifdef _WIN32 @@ -23,9 +41,17 @@ typedef struct { uint32_t offs; } QLP_FILE; + +// Function prototypes +#ifdef __cplusplus +extern "C" { +#endif + int qlpFileCount(const QLP_HEAD* qlpfile); const QLP_FILE* qlpFileEntry(int index, const QLP_HEAD* qlpfile); const void* qlpFileAddr(int index, const QLP_HEAD* qlpfile); int qlpFindFile(char* fileName, const QLP_HEAD* qlpfile); -#endif // _QLP_H
\ No newline at end of file +#ifdef __cplusplus +} +#endif diff --git a/libpsn00b/psxapi/_syscalls.s b/libpsn00b/psxapi/_syscalls.s index 6eaed72..5062b15 100644 --- a/libpsn00b/psxapi/_syscalls.s +++ b/libpsn00b/psxapi/_syscalls.s @@ -1,26 +1,28 @@ # PSn00bSDK syscall wrappers -# (C) 2022 spicyjpeg - MPL licensed +# (C) 2022-2023 spicyjpeg - MPL licensed .set noreorder +## Interrupt enable/disable + .section .text.EnterCriticalSection .global EnterCriticalSection .type EnterCriticalSection, @function EnterCriticalSection: - li $a0, 0x01 + li $a0, 0x01 syscall 0 - jr $ra + jr $ra nop .section .text.ExitCriticalSection .global ExitCriticalSection .type ExitCriticalSection, @function ExitCriticalSection: - li $a0, 0x02 + li $a0, 0x02 syscall 0 - jr $ra + jr $ra nop .section .text.SwEnterCriticalSection @@ -31,7 +33,7 @@ SwEnterCriticalSection: li $a1, -1026 and $a1, $a0 mtc0 $a1, $12 - andi $a0, 0x0401 # return ((cop0r12_prev & 0x401) == 0x401) + andi $a0, 0x0401 # return !((cop0r12_prev & 0x401) < 0x401) sltiu $v0, $a0, 0x0401 jr $ra @@ -49,3 +51,100 @@ SwExitCriticalSection: jr $ra nop + +## PCDRV (host file access) API + +.section .text.PCinit +.global PCinit +.type PCinit, @function +PCinit: + break 0, 0x101 # () -> error + + jr $ra + nop + +.section .text.PCcreat +.global PCcreat +.type PCcreat, @function +PCcreat: + li $a2, 0 + move $a1, $a0 + break 0, 0x102 # (path, path, 0) -> error, fd + + bgez $v0, .Lcreate_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lcreate_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCopen +.global PCopen +.type PCopen, @function +PCopen: + move $a2, $a1 + move $a1, $a0 + break 0, 0x103 # (path, path, mode) -> error, fd + + bgez $v0, .Lopen_ok # if (error < 0) fd = error + nop + move $v1, $v0 +.Lopen_ok: + jr $ra # return fd + move $v0, $v1 + +.section .text.PCclose +.global PCclose +.type PCclose, @function +PCclose: + move $a1, $a0 + break 0, 0x104 # (fd, fd) -> error + + jr $ra + nop + +.section .text.PCread +.global PCread +.type PCread, @function +PCread: + move $a3, $a1 + move $a1, $a0 + break 0, 0x105 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lread_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lread_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PCwrite +.global PCwrite +.type PCwrite, @function +PCwrite: + move $a3, $a1 + move $a1, $a0 + break 0, 0x106 # (fd, fd, length, data) -> error, length + + bgez $v0, .Lwrite_ok # if (error < 0) length = error + nop + move $v1, $v0 +.Lwrite_ok: + jr $ra # return length + move $v0, $v1 + +.section .text.PClseek +.global PClseek +.type PClseek, @function +PClseek: + move $a3, $a2 + move $a2, $a1 + move $a1, $a0 + break 0, 0x107 # (fd, fd, offset, mode) -> error, offset + + bgez $v0, .Lseek_ok # if (error < 0) offset = error + nop + move $v1, $v0 +.Lseek_ok: + jr $ra # return offset + move $v0, $v1 diff --git a/libpsn00b/psxapi/drivers.s b/libpsn00b/psxapi/drivers.s index d991f90..c601201 100644 --- a/libpsn00b/psxapi/drivers.s +++ b/libpsn00b/psxapi/drivers.s @@ -32,10 +32,10 @@ _96_remove: jr $t2 li $t1, 0x72 -.section .text.AddDummyTty -.global AddDummyTty -.type AddDummyTty, @function -AddDummyTty: +.section .text.add_nullcon_driver +.global add_nullcon_driver +.type add_nullcon_driver, @function +add_nullcon_driver: li $t2, 0xa0 jr $t2 li $t1, 0x99 @@ -66,26 +66,26 @@ _card_clear: ## B0 table functions (12) -.section .text.AddDev -.global AddDev -.type AddDev, @function -AddDev: +.section .text.AddDrv +.global AddDrv +.type AddDrv, @function +AddDrv: li $t2, 0xb0 jr $t2 li $t1, 0x47 -.section .text.DelDev -.global DelDev -.type DelDev, @function -DelDev: +.section .text.DelDrv +.global DelDrv +.type DelDrv, @function +DelDrv: li $t2, 0xb0 jr $t2 li $t1, 0x48 -.section .text.ListDev -.global ListDev -.type ListDev, @function -ListDev: +.section .text.ListDrv +.global ListDrv +.type ListDrv, @function +ListDrv: li $t2, 0xb0 jr $t2 li $t1, 0x49 diff --git a/libpsn00b/psxapi/fs.s b/libpsn00b/psxapi/fs.s index f225d64..8b6d57a 100644 --- a/libpsn00b/psxapi/fs.s +++ b/libpsn00b/psxapi/fs.s @@ -6,12 +6,12 @@ .set noreorder -## B0 table functions (5) +## B0 table functions (6) -.section .text.chdir -.global chdir -.type chdir, @function -chdir: +.section .text.cd +.global cd +.type cd, @function +cd: li $t2, 0xb0 jr $t2 li $t1, 0x40 @@ -48,3 +48,11 @@ erase: jr $t2 li $t1, 0x45 +.section .text.undelete +.global undelete +.type undelete, @function +undelete: + li $t2, 0xb0 + jr $t2 + li $t1, 0x46 + diff --git a/libpsn00b/psxapi/stdio.s b/libpsn00b/psxapi/stdio.s index e65f871..14c6d03 100644 --- a/libpsn00b/psxapi/stdio.s +++ b/libpsn00b/psxapi/stdio.s @@ -6,7 +6,7 @@ .set noreorder -## A0 table functions (13) +## A0 table functions (14) .section .text.open .global open @@ -16,10 +16,10 @@ open: jr $t2 li $t1, 0x00 -.section .text.seek -.global seek -.type seek, @function -seek: +.section .text.lseek +.global lseek +.type lseek, @function +lseek: li $t2, 0xa0 jr $t2 li $t1, 0x01 @@ -56,6 +56,14 @@ ioctl: jr $t2 li $t1, 0x05 +.section .text.isatty +.global isatty +.type isatty, @function +isatty: + li $t2, 0xa0 + jr $t2 + li $t1, 0x07 + .section .text.getc .global getc .type getc, @function @@ -112,3 +120,21 @@ printf: jr $t2 li $t1, 0x3f +## B0 table functions (2) + +.section .text._get_errno +.global _get_errno +.type _get_errno, @function +_get_errno: + li $t2, 0xb0 + jr $t2 + li $t1, 0x54 + +.section .text._get_error +.global _get_error +.type _get_error, @function +_get_error: + li $t2, 0xb0 + jr $t2 + li $t1, 0x55 + diff --git a/libpsn00b/psxapi/stubs.json b/libpsn00b/psxapi/stubs.json index 50ffb55..afa83c6 100644 --- a/libpsn00b/psxapi/stubs.json +++ b/libpsn00b/psxapi/stubs.json @@ -8,7 +8,7 @@ { "type": "a", "id": 1, - "name": "seek", + "name": "lseek", "file": "stdio.s" }, { @@ -37,6 +37,12 @@ }, { "type": "a", + "id": 7, + "name": "isatty", + "file": "stdio.s" + }, + { + "type": "a", "id": 8, "name": "getc", "file": "stdio.s" @@ -109,6 +115,12 @@ }, { "type": "a", + "id": 81, + "name": "LoadExec", + "file": "sys.s" + }, + { + "type": "a", "id": 85, "name": "_bu_init", "file": "drivers.s" @@ -128,7 +140,7 @@ { "type": "a", "id": 153, - "name": "AddDummyTty", + "name": "add_nullcon_driver", "file": "drivers.s" }, { @@ -139,6 +151,18 @@ }, { "type": "a", + "id": 157, + "name": "GetConf", + "file": "sys.s" + }, + { + "type": "a", + "id": 159, + "name": "SetMem", + "file": "sys.s" + }, + { + "type": "a", "id": 160, "name": "_boot", "file": "sys.s" @@ -170,13 +194,13 @@ { "type": "b", "id": 0, - "name": "_kernel_malloc", + "name": "alloc_kernel_memory", "file": "sys.s" }, { "type": "b", "id": 1, - "name": "_kernel_free", + "name": "free_kernel_memory", "file": "sys.s" }, { @@ -296,13 +320,13 @@ { "type": "b", "id": 24, - "name": "SetDefaultExitFromException", + "name": "ResetEntryInt", "file": "sys.s" }, { "type": "b", "id": 25, - "name": "SetCustomExitFromException", + "name": "HookEntryInt", "file": "sys.s" }, { @@ -314,7 +338,7 @@ { "type": "b", "id": 64, - "name": "chdir", + "name": "cd", "file": "fs.s" }, { @@ -343,20 +367,26 @@ }, { "type": "b", + "id": 70, + "name": "undelete", + "file": "fs.s" + }, + { + "type": "b", "id": 71, - "name": "AddDev", + "name": "AddDrv", "file": "drivers.s" }, { "type": "b", "id": 72, - "name": "DelDev", + "name": "DelDrv", "file": "drivers.s" }, { "type": "b", "id": 73, - "name": "ListDev", + "name": "ListDrv", "file": "drivers.s" }, { @@ -397,6 +427,18 @@ }, { "type": "b", + "id": 84, + "name": "_get_errno", + "file": "stdio.s" + }, + { + "type": "b", + "id": 85, + "name": "_get_error", + "file": "stdio.s" + }, + { + "type": "b", "id": 86, "name": "GetC0Table", "file": "sys.s" diff --git a/libpsn00b/psxapi/sys.s b/libpsn00b/psxapi/sys.s index e2505e1..40dcdff 100644 --- a/libpsn00b/psxapi/sys.s +++ b/libpsn00b/psxapi/sys.s @@ -6,7 +6,7 @@ .set noreorder -## A0 table functions (8) +## A0 table functions (11) .section .text.b_setjmp .global b_setjmp @@ -48,6 +48,14 @@ FlushCache: jr $t2 li $t1, 0x44 +.section .text.LoadExec +.global LoadExec +.type LoadExec, @function +LoadExec: + li $t2, 0xa0 + jr $t2 + li $t1, 0x51 + .section .text.SetConf .global SetConf .type SetConf, @function @@ -56,6 +64,22 @@ SetConf: jr $t2 li $t1, 0x9c +.section .text.GetConf +.global GetConf +.type GetConf, @function +GetConf: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9d + +.section .text.SetMem +.global SetMem +.type SetMem, @function +SetMem: + li $t2, 0xa0 + jr $t2 + li $t1, 0x9f + .section .text._boot .global _boot .type _boot, @function @@ -74,18 +98,18 @@ GetSystemInfo: ## B0 table functions (27) -.section .text._kernel_malloc -.global _kernel_malloc -.type _kernel_malloc, @function -_kernel_malloc: +.section .text.alloc_kernel_memory +.global alloc_kernel_memory +.type alloc_kernel_memory, @function +alloc_kernel_memory: li $t2, 0xb0 jr $t2 li $t1, 0x00 -.section .text._kernel_free -.global _kernel_free -.type _kernel_free, @function -_kernel_free: +.section .text.free_kernel_memory +.global free_kernel_memory +.type free_kernel_memory, @function +free_kernel_memory: li $t2, 0xb0 jr $t2 li $t1, 0x01 @@ -242,18 +266,18 @@ ReturnFromException: jr $t2 li $t1, 0x17 -.section .text.SetDefaultExitFromException -.global SetDefaultExitFromException -.type SetDefaultExitFromException, @function -SetDefaultExitFromException: +.section .text.ResetEntryInt +.global ResetEntryInt +.type ResetEntryInt, @function +ResetEntryInt: li $t2, 0xb0 jr $t2 li $t1, 0x18 -.section .text.SetCustomExitFromException -.global SetCustomExitFromException -.type SetCustomExitFromException, @function -SetCustomExitFromException: +.section .text.HookEntryInt +.global HookEntryInt +.type HookEntryInt, @function +HookEntryInt: li $t2, 0xb0 jr $t2 li $t1, 0x19 diff --git a/libpsn00b/psxcd/cdread.c b/libpsn00b/psxcd/cdread.c index d211a01..1adc255 100644 --- a/libpsn00b/psxcd/cdread.c +++ b/libpsn00b/psxcd/cdread.c @@ -89,6 +89,8 @@ static int _poll_retry(void) { /* Public API */ int CdReadRetry(int sectors, uint32_t *buf, int mode, int attempts) { + _sdk_validate_args((sectors > 0) && buf && (attempts > 0), -1); + if (CdReadSync(1, 0) > 0) { _sdk_log("CdRead() failed, another read in progress (%d sectors pending)\n", _pending_sectors); return 0; diff --git a/libpsn00b/psxcd/common.c b/libpsn00b/psxcd/common.c index 8b8030b..461ab91 100644 --- a/libpsn00b/psxcd/common.c +++ b/libpsn00b/psxcd/common.c @@ -208,6 +208,9 @@ int CdInit(void) { BUS_CD_CFG = 0x00020943; + SetDMAPriority(DMA_CD, 3); + DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA + CD_REG(0) = 1; CD_REG(3) = 0x1f; // Acknowledge all IRQs CD_REG(2) = 0x1f; // Enable all IRQs @@ -217,9 +220,6 @@ int CdInit(void) { CdlATV mix = { 0x80, 0x00, 0x80, 0x00 }; CdMix(&mix); - DMA_DPCR |= 0x0000b000; // Enable DMA3 - DMA_CHCR(DMA_CD) = 0x00000000; // Stop DMA3 - _last_mode = 0; _ack_pending = 0; _sync_pending = 0; @@ -244,6 +244,8 @@ int CdInit(void) { /* Low-level command API */ int CdCommandF(CdlCommand cmd, const void *param, int length) { + _sdk_validate_args(param || (length <= 0), -1); + const uint8_t *_param = (const uint8_t *) param; _last_command = (uint8_t) cmd; @@ -283,7 +285,7 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) { __asm__ volatile(""); CD_REG(0) = 0; - for (; length; length--) + for (; length > 0; length--) CD_REG(2) = *(_param++); CD_REG(0) = 0; @@ -292,6 +294,8 @@ int CdCommandF(CdlCommand cmd, const void *param, int length) { } int CdCommand(CdlCommand cmd, const void *param, int length, uint8_t *result) { + _sdk_validate_args(param || (length <= 0), -1); + /*if (_ack_pending) { _sdk_log("CdCommand(0x%02x) failed, drive busy\n", cmd); return 0; @@ -329,8 +333,10 @@ int CdControlF(CdlCommand cmd, const void *param) { } else { // The command takes a mandatory parameter or no parameter. length = flags & 3; - if (length && !param) + if (length && !param) { + _sdk_log("CdControl() param is required for command 0x%02x\n", cmd); return -1; + } } return CdCommandF(cmd, param, length); diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c index 0ac782b..31ed00c 100644 --- a/libpsn00b/psxcd/isofs.c +++ b/libpsn00b/psxcd/isofs.c @@ -92,7 +92,7 @@ static int _CdReadIsoDescriptor(int session_offs) // Verify if volume descriptor is present descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff; - if( strncmp("CD001", descriptor->header.id, 5) ) + if( memcmp("CD001", descriptor->header.id, 5) ) { _sdk_log("Disc does not contain a ISO9660 file system.\n"); @@ -211,7 +211,7 @@ static int _CdReadIsoDirectory(int lba) return 0; } -#ifndef NDEBUG +#if 0 static void dump_directory(void) { @@ -228,8 +228,12 @@ static void dump_directory(void) { dir_entry = (ISO_DIR_ENTRY*)(_cd_iso_directory_buff+dir_pos); - strncpy(namebuff, - _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), dir_entry->identifierLen); + memcpy( + namebuff, + _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), + dir_entry->identifierLen + ); + namebuff[dir_entry->identifierLen] = 0; _sdk_log("P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff); @@ -271,9 +275,12 @@ static void dump_pathtable(void) while( (int)(tbl_pos-_cd_iso_pathtable_buff) < descriptor->pathTableSize.lsb ) { - strncpy(namebuff, + memcpy( + namebuff, tbl_pos+sizeof(ISO_PATHTABLE_ENTRY), - tbl_entry->nameLength); + tbl_entry->nameLength + ); + namebuff[tbl_entry->nameLength] = 0; _sdk_log("%s\n", namebuff); @@ -308,9 +315,12 @@ static int get_pathtable_entry(int entry, ISO_PATHTABLE_ENTRY *tbl, char *namebu { if( namebuff ) { - strncpy(namebuff, + memcpy( + namebuff, tbl_pos+sizeof(ISO_PATHTABLE_ENTRY), - tbl_entry->nameLength); + tbl_entry->nameLength + ); + namebuff[tbl_entry->nameLength] = 0; } if( tbl ) @@ -381,9 +391,12 @@ static int find_dir_entry(const char *name, ISO_DIR_ENTRY *dirent) if( !(dir_entry->flags & 0x2) ) { - strncpy(namebuff, + memcpy( + namebuff, _cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), - dir_entry->identifierLen); + dir_entry->identifierLen + ); + namebuff[dir_entry->identifierLen] = 0; if( strcmp(namebuff, name) == 0 ) { @@ -422,7 +435,8 @@ static char* get_pathname(char *path, const char *filename) return NULL; } - strncpy(path, filename, (int)(c-filename)); + memcpy(path, filename, c - filename); + path[c - filename] = 0; return path; } @@ -450,6 +464,8 @@ static char* get_filename(char *name, const char *filename) CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) { + _sdk_validate_args(fp && filename, NULL); + int i,j,found_dir,num_dirs; int dir_len; char tpath_rbuff[128]; @@ -553,6 +569,8 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename) CdlDIR *CdOpenDir(const char* path) { + _sdk_validate_args(path, NULL); + CdlDIR_INT* dir; int num_dirs; int i,found_dir; @@ -605,7 +623,11 @@ CdlDIR *CdOpenDir(const char* path) _sdk_log( "Directory LBA = %d\n", tbl_entry.dirOffs ); _CdReadIsoDirectory( tbl_entry.dirOffs ); - + +#ifndef NDEBUG + //dump_directory(); +#endif + dir = (CdlDIR_INT*)malloc( sizeof(CdlDIR_INT) ); dir->_len = _cd_iso_directory_len; @@ -631,6 +653,8 @@ CdlDIR *CdOpenDir(const char* path) int CdReadDir(CdlDIR *dir, CdlFILE* file) { + _sdk_validate_args(dir && file, 0); + CdlDIR_INT* d_dir; ISO_DIR_ENTRY* dir_entry; @@ -658,9 +682,12 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file) } else { - strncpy( file->name, + memcpy( + file->name, d_dir->_dir+d_dir->_pos+sizeof(ISO_DIR_ENTRY), - dir_entry->identifierLen ); + dir_entry->identifierLen + ); + file->name[dir_entry->identifierLen] = 0; } CdIntToPos( dir_entry->entryOffs.lsb, &file->pos ); @@ -683,6 +710,9 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file) void CdCloseDir(CdlDIR *dir) { + if (!dir) + return; + CdlDIR_INT* d_dir; d_dir = (CdlDIR_INT*)dir; @@ -698,6 +728,8 @@ int CdIsoError() int CdGetVolumeLabel(char *label) { + _sdk_validate_args(label, -1); + int i, length = 31; ISO_DESCRIPTOR* descriptor; @@ -735,7 +767,7 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result) if( _ses_scanbuff[0] == 0x1 ) { - if( strncmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 ) + if( memcmp((const char*)_ses_scanbuff+1, "CD001", 5) == 0 ) { CdControlF(CdlPause, 0); _ses_scancomplete = 1; @@ -761,6 +793,8 @@ static void _scan_callback(CdlIntrResult status, unsigned char *result) int CdLoadSession(int session) { + _sdk_validate_args(session >= 0, -1); + CdlLOC *loc; CdlCB ready_oldcb; char scanbuff[2048]; diff --git a/libpsn00b/psxcd/misc.c b/libpsn00b/psxcd/misc.c index 8fd2a4d..2f04821 100644 --- a/libpsn00b/psxcd/misc.c +++ b/libpsn00b/psxcd/misc.c @@ -12,15 +12,29 @@ #define DATA_SYNC_TIMEOUT 0x100000 -/* Private types */ - -typedef struct { - uint8_t status, first_track, last_track; -} TrackInfo; +/* Unlock command strings */ + +static const char *_unlock_strings[] = { + "", + "Licensed by", + "Sony", + "Computer", + "Entertainment", + "", + "" +}; + +static const char *const _unlock_regions[] = { + "of America", // CdlRegionSCEA + "(Europe)", // CdlRegionSCEE + "World wide" // CdlRegionSCEW +}; /* Sector DMA transfer functions */ int CdGetSector(void *madr, int size) { + _sdk_validate_args(madr && (size > 0), 0); + //while (!(CD_REG(0) & (1 << 6))) //__asm__ volatile(""); @@ -35,6 +49,8 @@ int CdGetSector(void *madr, int size) { } int CdGetSector2(void *madr, int size) { + _sdk_validate_args(madr && (size > 0), 0); + //while (!(CD_REG(0) & (1 << 6))) //__asm__ volatile(""); @@ -54,7 +70,7 @@ int CdDataSync(int mode) { return 0; } - _sdk_log("CdDataSync() timeout\n"); + _sdk_log("CdDataSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_CD)); return -1; } @@ -77,52 +93,40 @@ int CdPosToInt(const CdlLOC *p) { ) - 150; } -/* Misc. functions */ - -int CdGetToc(CdlLOC *toc) { - TrackInfo track_info; - - if (!CdCommand(CdlGetTN, 0, 0, (uint8_t *) &track_info)) - return 0; - if (CdSync(1, 0) != CdlComplete) - return 0; - - int first = btoi(track_info.first_track); - int tracks = btoi(track_info.last_track) + 1 - first; - //assert(first == 1); +/* Drive unlocking API */ - for (int i = 0; i < tracks; i++) { - uint8_t track = itob(first + i); +CdlRegionCode CdGetRegion(void) { + uint8_t param; + uint8_t result[16]; - if (!CdCommand(CdlGetTD, &track, 1, (uint8_t *) &toc[i])) - return 0; - if (CdSync(1, 0) != CdlComplete) - return 0; + // Firmware version C0 does not support test command 0x22 to retrieve the + // region, but it was only used in the SCPH-1000 Japanese model. Version D1 + // (and possibly others?) is used in debug consoles. + // https://psx-spx.consoledev.net/cdromdrive/#19h20h-int3yymmddver + // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe + param = 0x20; + memset(result, 0, 4); - toc[i].sector = 0; - toc[i].track = track; + if (!CdCommand(CdlTest, ¶m, 1, result)) { + _sdk_log("failed to probe drive firmware version\n"); + return CdlRegionUnknown; } - return tracks; -} - -CdlRegionCode CdGetRegion(void) { - uint8_t param = 0x22; - uint8_t result[16]; + _sdk_log("drive firmware version: 0x%02x\n", result[3]); + if (result[3] == 0xc0) + return CdlRegionSCEI; + if (result[3] >= 0xd0) + return CdlRegionDebug; - // Test command 0x22 is unsupported in firmware version C0, which was used - // exclusively in the SCPH-1000 Japanese model. It's thus safe to assume - // that the console is Japanese if the command returns a valid error. - // https://psx-spx.consoledev.net/cdromdrive/#19h22h-int3for-europe + param = 0x22; memset(result, 0, 16); if (!CdCommand(CdlTest, ¶m, 1, result)) { _sdk_log("failed to probe drive region\n"); - return (result[1] == 0x10) ? CdlRegionSCEI : CdlRegionUnknown; + return CdlRegionUnknown; } _sdk_log("drive region: %s\n", result); - if (!strcmp(result, "for Japan")) return CdlRegionSCEI; if (!strcmp(result, "for U/C")) @@ -137,7 +141,72 @@ CdlRegionCode CdGetRegion(void) { return CdlRegionUnknown; } +int CdUnlock(CdlRegionCode region) { + if (region <= CdlRegionSCEI) + return 0; + if (region >= CdlRegionDebug) + return 1; + + // This is by far the most efficient way to do it. + _unlock_strings[5] = _unlock_regions[region - CdlRegionSCEA]; + + for (int i = 0; i < 7; i++) { + uint8_t result[4]; + + if (!CdCommand( + 0x50 + i, + _unlock_strings[i], + strlen(_unlock_strings[i]), + result + )) + return 0; + + if (!(result[0] & CdlStatError) || (result[1] != 0x40)) { + _sdk_log("unlock failed, status=0x%02x, code=0x%02x\n", result[0], result[1]); + return 0; + } + } + + _sdk_log("unlock successful\n"); + return CdCommand(CdlNop, 0, 0, 0); +} + +/* Misc. functions */ + +int CdGetToc(CdlLOC *toc) { + _sdk_validate_args(toc, 0); + + uint8_t result[4]; + + if (!CdCommand(CdlGetTN, 0, 0, result)) + return 0; + if (CdSync(1, 0) != CdlComplete) + return 0; + + int first = btoi(result[1]); + int tracks = btoi(result[2]) + 1 - first; + //assert(first == 1); + + for (int i = 0; i < tracks; i++) { + uint8_t track = itob(first + i); + + if (!CdCommand(CdlGetTD, &track, 1, result)) + return 0; + if (CdSync(1, 0) != CdlComplete) + return 0; + + toc[i].minute = result[1]; + toc[i].second = result[2]; + toc[i].sector = 0; + toc[i].track = track; + } + + return tracks; +} + int CdMix(const CdlATV *vol) { + _sdk_validate_args(vol, 0); + CD_REG(0) = 2; CD_REG(2) = vol->val0; CD_REG(3) = vol->val1; diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c index ff712eb..06302e2 100644 --- a/libpsn00b/psxetc/dl.c +++ b/libpsn00b/psxetc/dl.c @@ -112,6 +112,8 @@ static uint32_t _elf_hash(const char *str) { /* Symbol map loading/introspection API */ int DL_InitSymbolMap(int num_entries) { + _sdk_validate_args(num_entries, -1); + if (_symbol_map.entries) DL_UnloadSymbolMap(); @@ -151,6 +153,8 @@ void DL_UnloadSymbolMap(void) { } void DL_AddMapSymbol(const char *name, void *ptr) { + _sdk_validate_args_void(name); + uint32_t hash = _elf_hash(name); int index = _symbol_map.index; _symbol_map.index = index + 1; @@ -168,6 +172,8 @@ void DL_AddMapSymbol(const char *name, void *ptr) { } int DL_ParseSymbolMap(const char *ptr, size_t size) { + _sdk_validate_args(ptr && size, 0); + int entries = 0; // Perform a quick scan over the entire map text and count the number of @@ -232,6 +238,8 @@ int DL_ParseSymbolMap(const char *ptr, size_t size) { } void *DL_GetMapSymbol(const char *name) { + _sdk_validate_args(name, 0); + if (!_symbol_map.entries) { _sdk_log("DL_GetMapSymbol() with no map loaded\n"); return 0; @@ -275,8 +283,7 @@ void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) { /* Library loading and linking API */ DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) { - if (!dll || !ptr) - return 0; + _sdk_validate_args(dll && ptr && size, 0); dll->ptr = ptr; dll->malloc_ptr = (mode & DL_FREE_ON_DESTROY) ? ptr : 0; @@ -463,6 +470,8 @@ void DL_DestroyDLL(DLL *dll) { } void *DL_GetDLLSymbol(const DLL *dll, const char *name) { + _sdk_validate_args(name, 0); + if (!dll) return DL_GetMapSymbol(name); //return _dl_resolve_callback(0, name); diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c index f2a273c..8bd11fc 100644 --- a/libpsn00b/psxetc/interrupts.c +++ b/libpsn00b/psxetc/interrupts.c @@ -4,6 +4,7 @@ */ #include <stdint.h> +#include <assert.h> #include <psxapi.h> #include <psxetc.h> #include <hwregs_c.h> @@ -99,8 +100,7 @@ static void _global_dma_handler(void) { /* IRQ and DMA handler API */ void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) { - if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) - return 0; + _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0); void *old_callback = _irq_handlers[irq]; _irq_handlers[irq] = func; @@ -116,15 +116,13 @@ void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) { } void *GetInterruptCallback(IRQ_Channel irq) { - if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS)) - return 0; + _sdk_validate_args((irq >= 0) && (irq < NUM_IRQ_CHANNELS), 0); return _irq_handlers[irq]; } void *DMACallback(DMA_Channel dma, void (*func)(void)) { - if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) - return 0; + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0); void *old_callback = _dma_handlers[dma]; _dma_handlers[dma] = func; @@ -151,12 +149,34 @@ void *DMACallback(DMA_Channel dma, void (*func)(void)) { } void *GetDMACallback(DMA_Channel dma) { - if ((dma < 0) || (dma >= NUM_DMA_CHANNELS)) - return 0; + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), 0); return _dma_handlers[dma]; } +/* DMA channel priority API */ + +int SetDMAPriority(DMA_Channel dma, int priority) { + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1); + + uint32_t dpcr = DMA_DPCR; + uint32_t channel = dpcr >> (dma * 4); + + dpcr &= ~(0xf << (dma * 4)); + if (priority >= 0) + dpcr |= ((priority & 7) | 8) << (dma * 4); + + DMA_DPCR = dpcr; + return (channel & 8) ? (channel & 7) : -1; +} + +int GetDMAPriority(DMA_Channel dma) { + _sdk_validate_args((dma >= 0) && (dma < NUM_DMA_CHANNELS), -1); + + uint32_t channel = DMA_DPCR >> (dma * 4); + return (channel & 8) ? (channel & 7) : -1; +} + /* Hook installation/removal API */ int ResetCallback(void) { @@ -190,7 +210,7 @@ void RestartCallback(void) { // Install the ISR hook and prevent the kernel's internal handlers from // automatically acknowledging SPI and timer IRQs. - SetCustomExitFromException(&_isr_jmp_buf); + HookEntryInt(&_isr_jmp_buf); ChangeClearPAD(0); ChangeClearRCnt(0, 0); ChangeClearRCnt(1, 0); @@ -217,7 +237,7 @@ void StopCallback(void) { DMA_DPCR = _saved_dma_dpcr & 0x07777777; DMA_DICR = 0; - SetDefaultExitFromException(); + ResetEntryInt(); ChangeClearPAD(1); ChangeClearRCnt(0, 1); ChangeClearRCnt(1, 1); diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index e41bd31..7e0758b 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -1,6 +1,6 @@ /* * PSn00bSDK GPU library (common functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> @@ -10,9 +10,8 @@ #include <psxgpu.h> #include <hwregs_c.h> -#define QUEUE_LENGTH 16 -#define DMA_CHUNK_LENGTH 8 -#define VSYNC_TIMEOUT 0x100000 +#define QUEUE_LENGTH 16 +#define VSYNC_TIMEOUT 0x100000 static void _default_vsync_halt(void); @@ -21,7 +20,7 @@ static void _default_vsync_halt(void); typedef struct { void (*func)(uint32_t, uint32_t, uint32_t); uint32_t arg1, arg2, arg3; -} QueueEntry; +} DrawOp; /* Internal globals */ @@ -31,10 +30,10 @@ static void (*_vsync_halt_func)(void) = &_default_vsync_halt; static void (*_vsync_callback)(void) = (void *) 0; static void (*_drawsync_callback)(void) = (void *) 0; -static volatile QueueEntry _draw_queue[QUEUE_LENGTH]; -static volatile uint8_t _queue_head, _queue_tail, _queue_length; -static volatile uint32_t _vblank_counter; -static volatile uint16_t _last_hblank; +static volatile DrawOp _draw_queue[QUEUE_LENGTH]; +static volatile uint8_t _queue_head, _queue_tail, _queue_length, _drawop_type; +static volatile uint32_t _vblank_counter, _last_vblank; +static volatile uint16_t _last_hblank; /* Private interrupt handlers */ @@ -45,16 +44,16 @@ static void _vblank_handler(void) { _vsync_callback(); } -static void _gpu_dma_handler(void) { - //while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) - while (!(GPU_GP1 & (1 << 26))) - __asm__ volatile(""); +static void _process_drawop(void) { + int length = _queue_length; + if (!length) + return; - if (--_queue_length) { + if (--length) { int head = _queue_head; _queue_head = (head + 1) % QUEUE_LENGTH; - volatile QueueEntry *entry = &_draw_queue[head]; + volatile DrawOp *entry = &_draw_queue[head]; entry->func(entry->arg1, entry->arg2, entry->arg3); } else { GPU_GP1 = 0x04000000; // Disable DMA request @@ -62,16 +61,36 @@ static void _gpu_dma_handler(void) { if (_drawsync_callback) _drawsync_callback(); } + + _queue_length = length; +} + +static void _gpu_irq_handler(void) { + GPU_GP1 = 0x02000000; // Reset IRQ + + if (_drawop_type == DRAWOP_TYPE_GPU_IRQ) + _process_drawop(); +} + +static void _gpu_dma_handler(void) { + if (_drawop_type == DRAWOP_TYPE_DMA) + _process_drawop(); } /* GPU reset and system initialization */ void ResetGraph(int mode) { + _queue_head = 0; + _queue_tail = 0; + _queue_length = 0; + _drawop_type = 0; + // Perform some basic system initialization when ResetGraph() is called for // the first time. if (!ResetCallback()) { EnterCriticalSection(); InterruptCallback(IRQ_VBLANK, &_vblank_handler); + InterruptCallback(IRQ_GPU, &_gpu_irq_handler); DMACallback(DMA_GPU, &_gpu_dma_handler); _gpu_video_mode = (GPU_GP1 >> 20) & 1; @@ -80,28 +99,27 @@ void ResetGraph(int mode) { _sdk_log("setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC"); } - if (mode == 3) { + if (mode) { GPU_GP1 = 0x01000000; // Reset command buffer - return; - } - - DMA_DPCR |= 0x0b000b00; // Enable DMA2 and DMA6 - DMA_CHCR(2) = 0x00000201; // Stop DMA2 - DMA_CHCR(6) = 0x00000200; // Stop DMA6 + GPU_GP1 = 0x02000000; // Reset IRQ + GPU_GP1 = 0x04000000; // Disable DMA request - if (mode == 1) { - GPU_GP1 = 0x01000000; // Reset command buffer - return; + if (mode == 1) + return; + } else { + GPU_GP1 = 0x00000000; // Reset GPU } - GPU_GP1 = 0x00000000; // Reset GPU + SetDMAPriority(DMA_GPU, 3); + SetDMAPriority(DMA_OTC, 3); + DMA_CHCR(DMA_GPU) = 0x00000201; // Stop DMA + DMA_CHCR(DMA_OTC) = 0x00000200; // Stop DMA + TIMER_CTRL(0) = 0x0500; TIMER_CTRL(1) = 0x0500; - _queue_head = 0; - _queue_tail = 0; - _queue_length = 0; _vblank_counter = 0; + _last_vblank = 0; _last_hblank = 0; } @@ -127,10 +145,13 @@ int VSync(int mode) { if (mode < 0) return _vblank_counter; - uint32_t status = GPU_GP1; + // Wait for the specified number of vertical blank events since the last + // call to VSync() to occur (if mode >= 2) or just for a single vertical + // blank (if mode = 0). + uint32_t target = mode ? (_last_vblank + mode) : (_vblank_counter + 1); - // Wait for at least one vertical blank event to occur. - do { + while (_vblank_counter < target) { + uint32_t status = GPU_GP1; _vsync_halt_func(); // If interlaced mode is enabled, wait until the GPU starts displaying @@ -139,9 +160,11 @@ int VSync(int mode) { while (!((GPU_GP1 ^ status) & (1 << 31))) __asm__ volatile(""); } - } while ((--mode) > 0); + } + _last_vblank = _vblank_counter; _last_hblank = TIMER_VALUE(1); + return delta; } @@ -167,14 +190,13 @@ void *VSyncCallback(void (*func)(void)) { /* Command queue API */ -// This function is normally only used internally, but it is exposed for -// advanced use cases. -int EnqueueDrawOp( - void (*func)(uint32_t, uint32_t, uint32_t), - uint32_t arg1, - uint32_t arg2, - uint32_t arg3 -) { +void SetDrawOpType(GPU_DrawOpType type) { + _drawop_type = type; +} + +int EnqueueDrawOp(void (*func)(), uint32_t arg1, uint32_t arg2, uint32_t arg3) { + _sdk_validate_args(func, -1); + // If GPU DMA is currently busy, append the command to the queue instead of // executing it immediately. Note that interrupts must be disabled *prior* // to checking if DMA is busy; disabling them afterwards would create a @@ -202,7 +224,7 @@ int EnqueueDrawOp( _queue_tail = (tail + 1) % QUEUE_LENGTH; _queue_length = length + 1; - volatile QueueEntry *entry = &_draw_queue[tail]; + volatile DrawOp *entry = &_draw_queue[tail]; entry->func = func; entry->arg1 = arg1; entry->arg2 = arg2; @@ -225,7 +247,7 @@ int DrawSync(int mode) { if (!_queue_length) { // Wait for any DMA transfer to finish if DMA is enabled. if (GPU_GP1 & (3 << 29)) { - while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24))) + while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(DMA_GPU) & (1 << 24))) __asm__ volatile(""); } @@ -248,88 +270,17 @@ void *DrawSyncCallback(void (*func)(void)) { return old_callback; } -/* OT and primitive drawing API */ - -void ClearOTagR(uint32_t *ot, size_t length) { - DMA_MADR(6) = (uint32_t) &ot[length - 1]; - DMA_BCR(6) = length & 0xffff; - DMA_CHCR(6) = 0x11000002; +/* Queue pause/resume API */ - while (DMA_CHCR(6) & (1 << 24)) - __asm__ volatile(""); -} +int IsIdleGPU(int timeout) { + if (timeout <= 0) + timeout = 1; -void ClearOTag(uint32_t *ot, size_t length) { - // DMA6 only supports writing to RAM in reverse order (last to first), so - // the OT has to be cleared in software here. This function is thus much - // slower than ClearOTagR(). - // https://problemkaputt.de/psx-spx.htm#dmachannels - for (int i = 0; i < (length - 1); i++) - ot[i] = (uint32_t) &ot[i + 1] & 0x00ffffff; - - ot[length - 1] = 0x00ffffff; -} - -void AddPrim(uint32_t *ot, const void *pri) { - addPrim(ot, pri); -} - -void DrawPrim(const uint32_t *pri) { - size_t length = getlen(pri); - - DrawSync(0); - GPU_GP1 = 0x04000002; - - // NOTE: if length >= DMA_CHUNK_LENGTH then it also has to be a multiple of - // DMA_CHUNK_LENGTH, otherwise the DMA channel will get stuck waiting for - // more data indefinitely. - DMA_MADR(2) = (uint32_t) &pri[1]; - if (length < DMA_CHUNK_LENGTH) - DMA_BCR(2) = 0x00010000 | length; - else - DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); - - DMA_CHCR(2) = 0x01000201; -} - -int DrawOTag(const uint32_t *ot) { - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) ot, 0, 0); -} - -void DrawOTag2(const uint32_t *ot) { - GPU_GP1 = 0x04000002; - - while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) - __asm__ volatile(""); - - DMA_MADR(2) = (uint32_t) ot; - DMA_BCR(2) = 0; - DMA_CHCR(2) = 0x01000401; -} - -/* Misc. functions */ - -GPU_VideoMode GetVideoMode(void) { - return _gpu_video_mode; -} - -void SetVideoMode(GPU_VideoMode mode) { - uint32_t _mode, stat = GPU_GP1; - - _gpu_video_mode = mode & 1; - - _mode = (mode & 1) << 3; - _mode |= (stat >> 17) & 0x37; // GPUSTAT 17-22 -> cmd bits 0-5 - _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> cmd bit 6 - _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> cmd bit 7 - - GPU_GP1 = 0x08000000 | mode; -} - -int GetODE(void) { - return (GPU_GP1 >> 31); -} + for (; timeout; timeout--) { + if (GPU_GP1 & (1 << 26)) + return 0; + } -void SetDispMask(int mask) { - GPU_GP1 = 0x03000000 | (mask ? 0 : 1); + //_sdk_log("IsIdleGPU() timeout\n"); + return -1; } diff --git a/libpsn00b/psxgpu/drawing.c b/libpsn00b/psxgpu/drawing.c new file mode 100644 index 0000000..161b2f7 --- /dev/null +++ b/libpsn00b/psxgpu/drawing.c @@ -0,0 +1,148 @@ +/* + * PSn00bSDK GPU library (drawing/display list functions) + * (C) 2022-2023 spicyjpeg - MPL licensed + */ + +#include <stdint.h> +#include <assert.h> +#include <psxetc.h> +#include <psxgpu.h> +#include <hwregs_c.h> + +/* Private utilities */ + +// This function is actually referenced in env.c as well, so it can't be static. +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) ot; + DMA_BCR(DMA_GPU) = 0; + DMA_CHCR(DMA_GPU) = 0x01000401; +} + +static void _send_buffer( + GPU_DrawOpType type, const uint32_t *buf, size_t length +) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) buf; + DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16); + DMA_CHCR(DMA_GPU) = 0x01000201; +} + +/* Buffer and primitive drawing API */ + +int DrawOTag(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) ot, + 0 + ); +} + +int DrawOTagIRQ(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) ot, + 0 + ); +} + +int DrawBuffer(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) buf, + (uint32_t) length + ); +} + +int DrawBufferIRQ(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) buf, + (uint32_t) length + ); +} + +void DrawOTag2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_DMA, ot); +} + +void DrawOTagIRQ2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_GPU_IRQ, ot); +} + +void DrawBuffer2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_DMA, buf, length); +} + +void DrawBufferIRQ2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_GPU_IRQ, buf, length); +} + +void DrawPrim(const uint32_t *pri) { + _sdk_validate_args_void(pri); + + DrawSync(0); + DrawBuffer2(&pri[1], getlen(pri)); +} + +/* Helper functions */ + +void ClearOTagR(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1]; + DMA_BCR(DMA_OTC) = length & 0xffff; + DMA_CHCR(DMA_OTC) = 0x11000002; + + while (DMA_CHCR(DMA_OTC) & (1 << 24)) + __asm__ volatile(""); +} + +void ClearOTag(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + // DMA6 only supports writing to RAM in reverse order (last to first), so + // the OT has to be cleared in software here. This function is thus much + // slower than ClearOTagR(). + // https://problemkaputt.de/psx-spx.htm#dmachannels + for (int i = 0; i < (length - 1); i++) + ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff; + + ot[length - 1] = 0xffffff; +} + +void AddPrim(uint32_t *ot, const void *pri) { + _sdk_validate_args_void(ot && pri); + + addPrim(ot, pri); +} diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c index 8784947..236ae4b 100644 --- a/libpsn00b/psxgpu/env.c +++ b/libpsn00b/psxgpu/env.c @@ -1,9 +1,10 @@ /* * PSn00bSDK GPU library (DRAWENV/DISPENV functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> +#include <assert.h> #include <psxgpu.h> #include <hwregs_c.h> @@ -33,9 +34,53 @@ static inline uint32_t _get_window_mask(int size) { return mask & 0x1f; } +static const uint32_t *_build_drawenv_ot(const uint32_t *ot, DRAWENV *env) { + // All commands are grouped into a single display list packet for + // performance reasons using tagless primitives (the GPU does not care + // about the grouping as the display list is parsed by the CPU). + DR_ENV *prim = &(env->dr_env); + setaddr(prim, ot); + setlen(prim, 5); + + // Texture page (reset active page and set dither/mask bits) + setDrawTPage_T(&(prim->tpage), env->dfe & 1, env->dtd & 1, env->tpage); + + // Texture window + //setTexWindow_T(&(prim->twin), &(env->tw)); + prim->twin.code[0] = 0xe2000000; + prim->twin.code[0] |= _get_window_mask(env->tw.w); + prim->twin.code[0] |= _get_window_mask(env->tw.h) << 5; + prim->twin.code[0] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10 + prim->twin.code[0] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15 + + // Set drawing area + setDrawArea_T(&(prim->area), &(env->clip)); + setDrawOffset_T( + &(prim->offset), env->clip.x + env->ofs[0], env->clip.y + env->ofs[1] + ); + + if (env->isbg) { + FILL_T *fill = &(prim->fill); + setlen(prim, 8); + + // Rectangle fill + // FIXME: reportedly this command doesn't accept height values >511... + setFill_T(fill); + setColor0(fill, *((const uint32_t *) &(env->isbg)) >> 8); + setXY0(fill, env->clip.x, env->clip.y); + setWH(fill, env->clip.w, _min(env->clip.h, 0x1ff)); + } + + return (const uint32_t *) prim; +} + /* Drawing API */ +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot); + DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { + _sdk_validate_args(env && (w > 0) && (h > 0), 0); + env->clip.x = x; env->clip.y = y; env->clip.w = w; @@ -60,69 +105,41 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { } int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { - DR_ENV *prim = &(env->dr_env); - - // All commands are grouped into a single display list packet for - // performance reasons (the GPU does not care about the grouping as the - // display list is parsed by the DMA unit in the CPU). - setaddr(prim, ot); - setlen(prim, 5); - - // Texture page (reset active page and set dither/mask bits) - prim->code[0] = 0xe1000000 | env->tpage; - prim->code[0] |= (env->dtd & 1) << 9; - prim->code[0] |= (env->dfe & 1) << 10; + _sdk_validate_args(ot && env, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); +} - // Texture window - prim->code[1] = 0xe2000000; - prim->code[1] |= _get_window_mask(env->tw.w); - prim->code[1] |= _get_window_mask(env->tw.h) << 5; - prim->code[1] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10 - prim->code[1] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15 - - // Set drawing area top left - prim->code[2] = 0xe3000000; - prim->code[2] |= env->clip.x & 0x3ff; - prim->code[2] |= (env->clip.y & 0x3ff) << 10; - - // Set drawing area bottom right - prim->code[3] = 0xe4000000; - prim->code[3] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff; - prim->code[3] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10; - - // Set drawing offset - prim->code[4] = 0xe5000000; - prim->code[4] |= (env->clip.x + env->ofs[0]) & 0x7ff; - prim->code[4] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11; +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env) { + _sdk_validate_args(ot && env, -1); - if (env->isbg) { - setlen(prim, 8); - - // Rectangle fill - // FIXME: reportedly this command doesn't accept height values >511... - prim->code[5] = 0x02000000; - //prim->code[5] |= env->r0 | (env->g0 << 8) | (env->b0 << 16); - //prim->code[6] = env->clip.x; - //prim->code[6] |= env->clip.y << 16; - prim->code[5] |= *((const uint32_t *) &(env->isbg)) >> 8; - prim->code[6] = *((const uint32_t *) &(env->clip.x)); - prim->code[7] = env->clip.w; - prim->code[7] |= _min(env->clip.h, 0x1ff) << 16; - } - - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0); + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); } void PutDrawEnv(DRAWENV *env) { - DrawOTagEnv((const uint32_t *) 0x00ffffff, env); + _sdk_validate_args_void(env); + + DrawOTagEnv((const uint32_t *) 0xffffff, env); } // This function skips rebuilding the cached packet whenever possible and is // useful if the DRAWENV structure is never modified (which is the case most of // the time). void PutDrawEnvFast(DRAWENV *env) { + _sdk_validate_args_void(env); + if (!(env->dr_env.tag)) - DrawOTagEnv((const uint32_t *) 0x00ffffff, env); + DrawOTagEnv((const uint32_t *) 0xffffff, env); else DrawOTag((const uint32_t *) &(env->dr_env)); } @@ -130,6 +147,8 @@ void PutDrawEnvFast(DRAWENV *env) { /* Display API */ DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) { + _sdk_validate_args(env && (w > 0) && (h > 0), 0); + env->disp.x = x; env->disp.y = y; env->disp.w = w; @@ -148,6 +167,8 @@ DISPENV *SetDefDispEnv(DISPENV *env, int x, int y, int w, int h) { } void PutDispEnv(const DISPENV *env) { + _sdk_validate_args_void(env); + uint32_t h_range, v_range, mode, fb_pos; mode = _gpu_video_mode << 3; @@ -216,6 +237,8 @@ void PutDispEnv(const DISPENV *env) { /* Deprecated "raw" display API */ void PutDispEnvRaw(const DISPENV_RAW *env) { + _sdk_validate_args_void(env); + uint32_t h_range, v_range, fb_pos; h_range = 608 + env->vid_xpos; @@ -233,3 +256,30 @@ void PutDispEnvRaw(const DISPENV_RAW *env) { GPU_GP1 = 0x08000000 | env->vid_mode; // Set video mode GPU_GP1 = 0x05000000 | fb_pos; // Set VRAM location to display } + +/* Misc. display functions */ + +GPU_VideoMode GetVideoMode(void) { + return _gpu_video_mode; +} + +void SetVideoMode(GPU_VideoMode mode) { + uint32_t _mode, stat = GPU_GP1; + + _gpu_video_mode = mode & 1; + + _mode = (mode & 1) << 3; + _mode |= (stat >> 17) & 0x37; // GPUSTAT bits 17-22 -> command bits 0-5 + _mode |= (stat >> 10) & 0x40; // GPUSTAT bit 16 -> command bit 6 + _mode |= (stat >> 7) & 0x80; // GPUSTAT bit 14 -> command bit 7 + + GPU_GP1 = 0x08000000 | _mode; +} + +int GetODE(void) { + return (GPU_GP1 >> 31); +} + +void SetDispMask(int mask) { + GPU_GP1 = 0x03000000 | (mask ? 0 : 1); +} diff --git a/libpsn00b/psxgpu/font.c b/libpsn00b/psxgpu/font.c index b1c3c7a..c9d60f1 100644 --- a/libpsn00b/psxgpu/font.c +++ b/libpsn00b/psxgpu/font.c @@ -1,4 +1,5 @@ #include <stdint.h> +#include <assert.h> #include <stdio.h> #include <string.h> #include <stdlib.h> @@ -23,6 +24,7 @@ uint16_t _font_clut; extern uint8_t _gpu_debug_font[]; void FntLoad(int x, int y) { + _sdk_validate_args_void((x >= 0) && (y >= 0) && (x < 1024) && (y < 1024)); RECT pos; TIM_IMAGE tim; @@ -66,7 +68,8 @@ void FntLoad(int x, int y) { } int FntOpen(int x, int y, int w, int h, int isbg, int n) { - + _sdk_validate_args((w > 0) && (h > 0) && (n > 0), -1); + int i; // Initialize a text stream @@ -98,7 +101,8 @@ int FntOpen(int x, int y, int w, int h, int isbg, int n) { } int FntPrint(int id, const char *fmt, ...) { - + _sdk_validate_args((id < _nstreams) && fmt, -1); + int n; va_list ap; @@ -124,7 +128,8 @@ int FntPrint(int id, const char *fmt, ...) { } char *FntFlush(int id) { - + _sdk_validate_args(id < _nstreams, 0); + char *opri; SPRT_8 *sprt; DR_TPAGE *tpage; @@ -226,7 +231,8 @@ char *FntFlush(int id) { } char *FntSort(uint32_t *ot, char *pri, int x, int y, const char *text) { - + _sdk_validate_args(ot && pri, 0); + DR_TPAGE *tpage; SPRT_8 *sprt = (SPRT_8*)pri; int i; diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index fc018a4..e02c3c2 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -1,6 +1,6 @@ /* * PSn00bSDK GPU library (image and VRAM transfer functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * * TODO: MoveImage() is currently commented out as it won't trigger a DMA IRQ, * making it unusable as a draw queue command. A way around this (perhaps using @@ -9,11 +9,12 @@ #include <stdint.h> #include <assert.h> +#include <psxetc.h> #include <psxgpu.h> #include <hwregs_c.h> #define QUEUE_LENGTH 16 -#define DMA_CHUNK_LENGTH 8 +#define DMA_CHUNK_LENGTH 16 /* Internal globals */ @@ -37,6 +38,10 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { length += DMA_CHUNK_LENGTH - 1; } + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + SetDrawOpType(DRAWOP_TYPE_DMA); GPU_GP1 = 0x04000000; // Disable DMA request GPU_GP0 = 0x01000000; // Flush cache @@ -49,18 +54,24 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { // Enable DMA request, route to GP0 (2) or from GPU_READ (3) GPU_GP1 = 0x04000002 | (write ^ 1); - DMA_MADR(2) = (uint32_t) data; + while ((DMA_CHCR(DMA_GPU) & (1 << 24)) || !(GPU_GP1 & (1 << 28))) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(2) = 0x00010000 | length; + DMA_BCR(DMA_GPU) = 0x00010000 | length; else - DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_GPU) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(2) = 0x01000200 | write; + DMA_CHCR(DMA_GPU) = 0x01000200 | write; } /* VRAM transfer API */ int LoadImage(const RECT *rect, const uint32_t *data) { + _sdk_validate_args(rect && data, -1); + int index = _next_saved_rect; _saved_rects[index] = *rect; @@ -75,6 +86,8 @@ int LoadImage(const RECT *rect, const uint32_t *data) { } int StoreImage(const RECT *rect, uint32_t *data) { + _sdk_validate_args(rect && data, -1); + int index = _next_saved_rect; _saved_rects[index] = *rect; @@ -88,22 +101,53 @@ int StoreImage(const RECT *rect, uint32_t *data) { ); } +int MoveImage(const RECT *rect, int x, int y) { + _sdk_validate_args(rect, -1); + + int index = _next_saved_rect; + + _saved_rects[index] = *rect; + _next_saved_rect = (index + 1) % QUEUE_LENGTH; + + return EnqueueDrawOp( + (void *) &MoveImage2, + (uint32_t) &_saved_rects[index], + (uint32_t) x, + (uint32_t) y + ); +} + void LoadImage2(const RECT *rect, const uint32_t *data) { + _sdk_validate_args_void(rect && data); + _dma_transfer(rect, (uint32_t *) data, 1); } void StoreImage2(const RECT *rect, uint32_t *data) { + _sdk_validate_args_void(rect && data); + _dma_transfer(rect, data, 0); } -/*void MoveImage2(const RECT *rect, int x, int y) { +void MoveImage2(const RECT *rect, int x, int y) { + _sdk_validate_args_void(rect); + + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + + SetDrawOpType(DRAWOP_TYPE_GPU_IRQ); + GPU_GP0 = 0x80000000; //GPU_GP0 = rect->x | (rect->y << 16); GPU_GP0 = *((const uint32_t *) &(rect->x)); GPU_GP0 = (x & 0xffff) | (y << 16); //GPU_GP0 = rect->w | (rect->h << 16); GPU_GP0 = *((const uint32_t *) &(rect->w)); -}*/ + + // As no DMA transfer is performed by this command, the GPU IRQ is used + // instead of the DMA IRQ to trigger the draw queue callback. + GPU_GP0 = 0x1f000000; +} /* .TIM image parsers */ @@ -112,6 +156,8 @@ void StoreImage2(const RECT *rect, uint32_t *data) { // returning pointers to them, which become useless once the .TIM file is // unloaded from main RAM. int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) { + _sdk_validate_args(tim && info, 1); + if ((*(tim++) & 0xffff) != 0x0010) return 1; @@ -138,6 +184,8 @@ int GsGetTimInfo(const uint32_t *tim, GsIMAGE *info) { } int GetTimInfo(const uint32_t *tim, TIM_IMAGE *info) { + _sdk_validate_args(tim && info, 1); + if ((*(tim++) & 0xffff) != 0x0010) return 1; diff --git a/libpsn00b/psxpress/README.md b/libpsn00b/psxpress/README.md index a894874..df18ec5 100644 --- a/libpsn00b/psxpress/README.md +++ b/libpsn00b/psxpress/README.md @@ -1,14 +1,19 @@ # PSn00bSDK MDEC library -This is a fully open source reimplementation of the official SDK's "data +This is a fully original reimplementation of the official SDK's "data compression" library. This library is made up of two parts, the MDEC API and functions to decompress Huffman-encoded bitstreams (.BS files, or frames in -.STR files) into data to be fed to the MDEC. FMV playback is not part of this -library (nor the official one) per se, but can implemented by using these APIs -alongside some code to stream data from the CD drive. +.STR files) into data to be fed to the MDEC. Two different implementations of +the latter are provided, one using the GTE and scratchpad region and an older +one using a large lookup table in main RAM. -**Currently only version 1 and 2 bitstreams are supported**. +FMV playback is not part of this library per se, but can implemented using the +APIs defined here alongside some code to stream data from the CD drive. + +Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI +bitstreams are not supported, but no encoder is publicly available for those +anyway. ## MDEC API @@ -26,14 +31,16 @@ The following functions are currently provided: - `DecDCTvlcStart()`, `DecDCTvlcContinue()`: a decompressor implementation that uses a small (<1 KB) lookup table and leverages the GTE, written in assembly. - `DecDCTvlcCopyTable()` can optionally be called to temporarily move the table - to the scratchpad region to improve decompression speed. -- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: a different implementation using + `DecDCTvlcCopyTableV2()` or `DecDCTvlcCopyTableV3()` may optionally be called + to temporarily move the table to the scratchpad region in order to boost + decompression speed. +- `DecDCTvlcStart2()`, `DecDCTvlcContinue2()`: an older implementation using a large (34 KB) lookup table in main RAM, written in C. The table must be - decompressed ahead of time using `DecDCTvlcBuild()`, but can be deallocated - when no longer needed. + decompressed ahead of time manually using `DecDCTvlcBuild()`, but can be + deallocated when no longer needed. **This implementation does not support** + **version 3 bitstreams**. - `DecDCTvlc()`, `DecDCTvlc2()`: wrappers around the functions listed above, - for compatibility with the Sony SDK. Using them is not recommended. + for compatibility with the Sony SDK. ## SPU ADPCM encoding API diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index d15a4db..b3aa837 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -1,11 +1,11 @@ /* * PSn00bSDK MDEC library (low-level MDEC/DMA API) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> #include <assert.h> -#include <psxapi.h> +#include <psxetc.h> #include <psxpress.h> #include <hwregs_c.h> @@ -14,14 +14,14 @@ /* Default IDCT matrix and quantization tables */ -#define S0 0x5a82 // 0x4000 * cos(0/16 * pi) * sqrt(2) -#define S1 0x7d8a // 0x4000 * cos(1/16 * pi) * 2 -#define S2 0x7641 // 0x4000 * cos(2/16 * pi) * 2 -#define S3 0x6a6d // 0x4000 * cos(3/16 * pi) * 2 -#define S4 0x5a82 // 0x4000 * cos(4/16 * pi) * 2 -#define S5 0x471c // 0x4000 * cos(5/16 * pi) * 2 -#define S6 0x30fb // 0x4000 * cos(6/16 * pi) * 2 -#define S7 0x18f8 // 0x4000 * cos(7/16 * pi) * 2 +#define S0 0x5a82 // (1 << 14) * cos(0/16 * pi) * sqrt(2) +#define S1 0x7d8a // (1 << 14) * cos(1/16 * pi) * 2 +#define S2 0x7641 // (1 << 14) * cos(2/16 * pi) * 2 +#define S3 0x6a6d // (1 << 14) * cos(3/16 * pi) * 2 +#define S4 0x5a82 // (1 << 14) * cos(4/16 * pi) * 2 +#define S5 0x471c // (1 << 14) * cos(5/16 * pi) * 2 +#define S6 0x30fb // (1 << 14) * cos(6/16 * pi) * 2 +#define S7 0x18f8 // (1 << 14) * cos(7/16 * pi) * 2 static const DECDCTENV _default_mdec_env = { // The default luma and chroma quantization table is based on the MPEG-1 @@ -84,34 +84,38 @@ static const DECDCTENV _default_mdec_env = { /* Public API */ void DecDCTReset(int mode) { - FastEnterCriticalSection(); + SetDMAPriority(DMA_MDEC_IN, 3); + SetDMAPriority(DMA_MDEC_OUT, 3); + DMA_CHCR(DMA_MDEC_IN) = 0x00000201; // Stop DMA + DMA_CHCR(DMA_MDEC_OUT) = 0x00000200; // Stop DMA - DMA_DPCR |= 0x000000bb; // Enable DMA0 and DMA1 - DMA_CHCR(0) = 0x00000201; // Stop DMA0 - DMA_CHCR(1) = 0x00000200; // Stop DMA1 - MDEC1 = 0x80000000; // Reset MDEC - MDEC1 = 0x60000000; // Enable DMA in/out requests + MDEC1 = 0x80000000; // Reset MDEC + MDEC1 = 0x60000000; // Enable DMA in/out requests - FastExitCriticalSection(); if (!mode) DecDCTPutEnv(0, 0); } void DecDCTPutEnv(const DECDCTENV *env, int mono) { - const DECDCTENV *_env = env ? env : &_default_mdec_env; DecDCTinSync(0); + if (!env) + env = &_default_mdec_env; MDEC0 = 0x60000000; // Set IDCT matrix - DecDCTinRaw((const uint32_t *) _env->dct, 32); + DecDCTinRaw((const uint32_t *) env->dct, 32); DecDCTinSync(0); - MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s) - DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32); + MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set quantization table(s) + DecDCTinRaw((const uint32_t *) env->iq_y, mono ? 16 : 32); DecDCTinSync(0); } void DecDCTin(const uint32_t *data, int mode) { + _sdk_validate_args_void(data); + uint32_t header = *data; + DecDCTinSync(0); + if (mode == DECDCT_MODE_RAW) MDEC0 = header; else if (mode & DECDCT_MODE_24BPP) @@ -126,18 +130,21 @@ void DecDCTin(const uint32_t *data, int mode) { // data length as an argument rather than parsing it from the first 4 bytes of // the stream. void DecDCTinRaw(const uint32_t *data, size_t length) { + _sdk_validate_args_void(data && length); + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { _sdk_log("input data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } - DMA_MADR(0) = (uint32_t) data; + DMA_MADR(DMA_MDEC_IN) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(0) = 0x00010000 | length; + DMA_BCR(DMA_MDEC_IN) = 0x00010000 | length; else - DMA_BCR(0) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_MDEC_IN) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(0) = 0x01000201; + DMA_CHCR(DMA_MDEC_IN) = 0x01000201; } int DecDCTinSync(int mode) { @@ -149,11 +156,13 @@ int DecDCTinSync(int mode) { return 0; } - _sdk_log("DecDCTinSync() timeout\n"); + _sdk_log("DecDCTinSync() timeout, MDEC1=0x%08x\n", MDEC1); return -1; } void DecDCTout(uint32_t *data, size_t length) { + _sdk_validate_args_void(data && length); + DecDCToutSync(0); if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { @@ -161,24 +170,25 @@ void DecDCTout(uint32_t *data, size_t length) { length += DMA_CHUNK_LENGTH - 1; } - DMA_MADR(1) = (uint32_t) data; + DMA_MADR(DMA_MDEC_OUT) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(1) = 0x00010000 | length; + DMA_BCR(DMA_MDEC_OUT) = 0x00010000 | length; else - DMA_BCR(1) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_MDEC_OUT) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(1) = 0x01000200; + DMA_CHCR(DMA_MDEC_OUT) = 0x01000200; } int DecDCToutSync(int mode) { if (mode) - return (DMA_CHCR(1) >> 24) & 1; + return (DMA_CHCR(DMA_MDEC_OUT) >> 24) & 1; for (int i = MDEC_SYNC_TIMEOUT; i; i--) { - if (!(DMA_CHCR(1) & (1 << 24))) + if (!(DMA_CHCR(DMA_MDEC_OUT) & (1 << 24))) return 0; } - _sdk_log("DecDCToutSync() timeout\n"); + _sdk_log("DecDCToutSync() timeout, CHCR=0x%08x\n", DMA_CHCR(DMA_MDEC_OUT)); return -1; } diff --git a/libpsn00b/psxpress/vlc.c b/libpsn00b/psxpress/vlc.c index 4e3e283..36cfbe2 100644 --- a/libpsn00b/psxpress/vlc.c +++ b/libpsn00b/psxpress/vlc.c @@ -1,6 +1,6 @@ /* * PSn00bSDK MDEC library (support code for the main VLC decompressor) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> @@ -10,87 +10,120 @@ /* Huffman code lookup table */ -#define _val1(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff)) -#define _val2(rl, dc, len) (_val1(rl, dc) | (len << 16)) +#define _DC(y, c) (((y) << 4) | (c)) +#define _AC(rl, dc) (((rl) << 10) | ((uint16_t) (dc) & 0x3ff)) +#define _ACL(rl, dc, len) (_AC(rl, dc) | ((len) << 16)) -#define _pair(rl, dc) _val1(rl, dc), _val1(rl, -(dc)) -#define _pair2(rl, dc, len) _val2(rl, dc, len), _val2(rl, -(dc), len) -#define _pair3(rl, dc, len) \ - _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len) -#define _pair4(rl, dc, len) \ - _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), _val2(rl, dc, len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), \ - _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len), _val2(rl, -(dc), len) +#define _DC2(y, c) _DC(y, c), _DC(y, c) +#define _DC3(y, c) _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c) +#define _DC4(y, c) \ + _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c), \ + _DC(y, c), _DC(y, c), _DC(y, c), _DC(y, c) +#define _AC2(rl, dc) _AC(rl, dc), _AC(rl, -(dc)) +#define _ACL2(rl, dc, len) _ACL(rl, dc, len), _ACL(rl, -(dc), len) +#define _ACL3(rl, dc, len) \ + _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len) +#define _ACL4(rl, dc, len) \ + _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), _ACL(rl, dc, len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), \ + _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len), _ACL(rl, -(dc), len) // This table isn't compressed since it makes no sense to compress less than a // kilobyte's worth of data. -static const DECDCTTAB _default_huffman_table = { - .lut0 = { +static const VLC_TableV3 _default_huffman_table = { + .ac0 = { // 11 x - _pair( 0, 1) + _AC2( 0, 1) }, - .lut2 = { + .ac2 = { // 01 0xx - _pair2( 0, 2, 5), _pair2( 2, 1, 5), + _ACL2( 0, 2, 5), _ACL2( 2, 1, 5), // 01 1x- - _pair3( 1, 1, 4) + _ACL3( 1, 1, 4) }, - .lut3 = { + .ac3 = { // 001 00xxxx - _pair2(13, 1, 9), _pair2( 0, 6, 9), _pair2(12, 1, 9), _pair2(11, 1, 9), - _pair2( 3, 2, 9), _pair2( 1, 3, 9), _pair2( 0, 5, 9), _pair2(10, 1, 9), + _ACL2(13, 1, 9), _ACL2( 0, 6, 9), _ACL2(12, 1, 9), _ACL2(11, 1, 9), + _ACL2( 3, 2, 9), _ACL2( 1, 3, 9), _ACL2( 0, 5, 9), _ACL2(10, 1, 9), // 001 xxx--- - _pair4( 0, 3, 6), _pair4( 4, 1, 6), _pair4( 3, 1, 6) + _ACL4( 0, 3, 6), _ACL4( 4, 1, 6), _ACL4( 3, 1, 6) }, - .lut4 = { + .ac4 = { // 0001 xxx - _pair( 7, 1), _pair( 6, 1), _pair( 1, 2), _pair( 5, 1) + _AC2( 7, 1), _AC2( 6, 1), _AC2( 1, 2), _AC2( 5, 1) }, - .lut5 = { + .ac5 = { // 00001 xxx - _pair( 2, 2), _pair( 9, 1), _pair( 0, 4), _pair( 8, 1) + _AC2( 2, 2), _AC2( 9, 1), _AC2( 0, 4), _AC2( 8, 1) }, - .lut7 = { + .ac7 = { // 0000001 xxxx - _pair(16, 1), _pair( 5, 2), _pair( 0, 7), _pair( 2, 3), - _pair( 1, 4), _pair(15, 1), _pair(14, 1), _pair( 4, 2) + _AC2(16, 1), _AC2( 5, 2), _AC2( 0, 7), _AC2( 2, 3), + _AC2( 1, 4), _AC2(15, 1), _AC2(14, 1), _AC2( 4, 2) }, - .lut8 = { + .ac8 = { // 00000001 xxxxx - _pair( 0, 11), _pair( 8, 2), _pair( 4, 3), _pair( 0, 10), - _pair( 2, 4), _pair( 7, 2), _pair(21, 1), _pair(20, 1), - _pair( 0, 9), _pair(19, 1), _pair(18, 1), _pair( 1, 5), - _pair( 3, 3), _pair( 0, 8), _pair( 6, 2), _pair(17, 1) + _AC2( 0, 11), _AC2( 8, 2), _AC2( 4, 3), _AC2( 0, 10), + _AC2( 2, 4), _AC2( 7, 2), _AC2(21, 1), _AC2(20, 1), + _AC2( 0, 9), _AC2(19, 1), _AC2(18, 1), _AC2( 1, 5), + _AC2( 3, 3), _AC2( 0, 8), _AC2( 6, 2), _AC2(17, 1) }, - .lut9 = { + .ac9 = { // 000000001 xxxxx - _pair(10, 2), _pair( 9, 2), _pair( 5, 3), _pair( 3, 4), - _pair( 2, 5), _pair( 1, 7), _pair( 1, 6), _pair( 0, 15), - _pair( 0, 14), _pair( 0, 13), _pair( 0, 12), _pair(26, 1), - _pair(25, 1), _pair(24, 1), _pair(23, 1), _pair(22, 1) + _AC2(10, 2), _AC2( 9, 2), _AC2( 5, 3), _AC2( 3, 4), + _AC2( 2, 5), _AC2( 1, 7), _AC2( 1, 6), _AC2( 0, 15), + _AC2( 0, 14), _AC2( 0, 13), _AC2( 0, 12), _AC2(26, 1), + _AC2(25, 1), _AC2(24, 1), _AC2(23, 1), _AC2(22, 1) }, - .lut10 = { + .ac10 = { // 0000000001 xxxxx - _pair( 0, 31), _pair( 0, 30), _pair( 0, 29), _pair( 0, 28), - _pair( 0, 27), _pair( 0, 26), _pair( 0, 25), _pair( 0, 24), - _pair( 0, 23), _pair( 0, 22), _pair( 0, 21), _pair( 0, 20), - _pair( 0, 19), _pair( 0, 18), _pair( 0, 17), _pair( 0, 16) + _AC2( 0, 31), _AC2( 0, 30), _AC2( 0, 29), _AC2( 0, 28), + _AC2( 0, 27), _AC2( 0, 26), _AC2( 0, 25), _AC2( 0, 24), + _AC2( 0, 23), _AC2( 0, 22), _AC2( 0, 21), _AC2( 0, 20), + _AC2( 0, 19), _AC2( 0, 18), _AC2( 0, 17), _AC2( 0, 16) }, - .lut11 = { + .ac11 = { // 00000000001 xxxxx - _pair( 0, 40), _pair( 0, 39), _pair( 0, 38), _pair( 0, 37), - _pair( 0, 36), _pair( 0, 35), _pair( 0, 34), _pair( 0, 33), - _pair( 0, 32), _pair( 1, 14), _pair( 1, 13), _pair( 1, 12), - _pair( 1, 11), _pair( 1, 10), _pair( 1, 9), _pair( 1, 8) + _AC2( 0, 40), _AC2( 0, 39), _AC2( 0, 38), _AC2( 0, 37), + _AC2( 0, 36), _AC2( 0, 35), _AC2( 0, 34), _AC2( 0, 33), + _AC2( 0, 32), _AC2( 1, 14), _AC2( 1, 13), _AC2( 1, 12), + _AC2( 1, 11), _AC2( 1, 10), _AC2( 1, 9), _AC2( 1, 8) }, - .lut12 = { + .ac12 = { // 000000000001 xxxxx - _pair( 1, 18), _pair( 1, 17), _pair( 1, 16), _pair( 1, 15), - _pair( 6, 3), _pair(16, 2), _pair(15, 2), _pair(14, 2), - _pair(13, 2), _pair(12, 2), _pair(11, 2), _pair(31, 1), - _pair(30, 1), _pair(29, 1), _pair(28, 1), _pair(27, 1) + _AC2( 1, 18), _AC2( 1, 17), _AC2( 1, 16), _AC2( 1, 15), + _AC2( 6, 3), _AC2(16, 2), _AC2(15, 2), _AC2(14, 2), + _AC2(13, 2), _AC2(12, 2), _AC2(11, 2), _AC2(31, 1), + _AC2(30, 1), _AC2(29, 1), _AC2(28, 1), _AC2(27, 1) + }, + .dc = { + // 00----- + _DC4(1, 0), _DC4(1, 0), _DC4(1, 0), _DC4(1, 0), + // 01----- + _DC4(2, 1), _DC4(2, 1), _DC4(2, 1), _DC4(2, 1), + // 100---- + _DC4(0, 2), _DC4(0, 2), + // 101---- + _DC4(3, 2), _DC4(3, 2), + // 110---- + _DC4(4, 3), _DC4(4, 3), + // 1110--- + _DC4(5, 4), + // 11110-- + _DC3(6, 5), + // 111110- + _DC2(7, 6), + // 1111110 + _DC(8, 7), + // 1111111(0) + _DC(0, 8) + }, + .dc_len = { + _DC(3, 2), _DC(2, 2), _DC(2, 2), _DC(3, 3), + _DC(3, 4), _DC(4, 5), _DC(5, 6), _DC(6, 7), + _DC(7, 8) } }; @@ -100,7 +133,7 @@ static const DECDCTTAB _default_huffman_table = { static VLC_Context _default_context; static size_t _max_buffer_size = 0; -const DECDCTTAB *_vlc_huffman_table = &_default_huffman_table; +const VLC_TableV3 *_vlc_huffman_table = &_default_huffman_table; /* Stateful VLC decoder API (for Sony SDK compatibility) */ @@ -120,10 +153,19 @@ size_t DecDCTvlcSize(size_t size) { /* Lookup table relocation API */ -void DecDCTvlcCopyTable(DECDCTTAB *addr) { +void DecDCTvlcCopyTableV2(VLC_TableV2 *addr) { + if (addr) { + _vlc_huffman_table = (const VLC_TableV3 *) addr; + memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV2)); + } else { + _vlc_huffman_table = &_default_huffman_table; + } +} + +void DecDCTvlcCopyTableV3(VLC_TableV3 *addr) { if (addr) { - _vlc_huffman_table = addr; - memcpy(addr, &_default_huffman_table, sizeof(DECDCTTAB)); + _vlc_huffman_table = (const VLC_TableV3 *) addr; + memcpy(addr, &_default_huffman_table, sizeof(VLC_TableV3)); } else { _vlc_huffman_table = &_default_huffman_table; } diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s index f3a1c67..2de22f7 100644 --- a/libpsn00b/psxpress/vlc.s +++ b/libpsn00b/psxpress/vlc.s @@ -1,375 +1,576 @@ # PSn00bSDK MDEC library (GTE-accelerated VLC decompressor) -# (C) 2022 spicyjpeg - MPL licensed +# (C) 2022-2023 spicyjpeg - MPL licensed # -# Register map: -# - $a0 = ctx -# - $a1 = output -# - $a2 = max_size -# - $a3 = input -# - $t0 = window -# - $t1 = next_window -# - $t2 = remaining -# - $t3 = quant_scale -# - $t4 = is_v3 -# - $t5 = bit_offset -# - $t6 = block_index -# - $t7 = coeff_index -# - $t8 = _vlc_huffman_table -# - $t9 = &ac_jump_area +# TODO: reduce the size of the v3 DC coefficient decoder; currently the code is +# duplicated for each block type, but it can probably be shortened with no +# performance impact... -.set noreorder +.include "gtereg.inc" -.set VLC_Context_input, 0 -.set VLC_Context_window, 4 -.set VLC_Context_next_window, 8 -.set VLC_Context_remaining, 12 -.set VLC_Context_quant_scale, 16 -.set VLC_Context_is_v3, 18 -.set VLC_Context_bit_offset, 19 -.set VLC_Context_block_index, 20 -.set VLC_Context_coeff_index, 21 - -.set DECDCTTAB_lut0, 0 -.set DECDCTTAB_lut2, 4 -.set DECDCTTAB_lut3, 36 -.set DECDCTTAB_lut4, 292 -.set DECDCTTAB_lut5, 308 -.set DECDCTTAB_lut7, 324 -.set DECDCTTAB_lut8, 356 -.set DECDCTTAB_lut9, 420 -.set DECDCTTAB_lut10, 484 -.set DECDCTTAB_lut11, 548 -.set DECDCTTAB_lut12, 612 +.set noreorder +.set noat + +.set value, $v0 +.set length, $v1 +.set ctx, $a0 +.set output, $a1 +.set max_size, $a2 +.set input, $a3 +.set temp, $t0 +.set window, $t1 +.set next_window, $t2 +.set remaining, $t3 +.set is_v3, $t4 +.set bit_offset, $t5 +.set block_index, $t6 +.set coeff_index, $t7 +.set quant_scale, $s0 +.set last_y, $s1 +.set last_cr, $s2 +.set last_cb, $s3 +.set huffman_table, $t8 +.set ac_jump_area, $t9 + +.set VLC_Context_input, 0x0 +.set VLC_Context_window, 0x4 +.set VLC_Context_next_window, 0x8 +.set VLC_Context_remaining, 0xc +.set VLC_Context_is_v3, 0x10 +.set VLC_Context_bit_offset, 0x11 +.set VLC_Context_block_index, 0x12 +.set VLC_Context_coeff_index, 0x13 +.set VLC_Context_quant_scale, 0x14 +.set VLC_Context_last_y, 0x16 +.set VLC_Context_last_cr, 0x18 +.set VLC_Context_last_cb, 0x1a + +.set VLC_Table_ac0, 0x0 +.set VLC_Table_ac2, 0x4 +.set VLC_Table_ac3, 0x24 +.set VLC_Table_ac4, 0x124 +.set VLC_Table_ac5, 0x134 +.set VLC_Table_ac7, 0x144 +.set VLC_Table_ac8, 0x164 +.set VLC_Table_ac9, 0x1a4 +.set VLC_Table_ac10, 0x1e4 +.set VLC_Table_ac11, 0x224 +.set VLC_Table_ac12, 0x264 +.set VLC_Table_dc, 0x2a4 +.set VLC_Table_dc_len, 0x324 .section .text.DecDCTvlcStart .global DecDCTvlcStart .type DecDCTvlcStart, @function DecDCTvlcStart: + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + # Create a new context on-the-fly without writing it to memory then jump # into DecDCTvlcContinue(), skipping context loading. - lw $t0, 8($a3) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) - nop - srl $v0, $t0, 16 - sll $t0, 16 - - lw $t1, 12($a3) # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) - or $t0, $v0 - srl $v0, $t1, 16 - sll $t1, 16 - - lhu $t2, 0($a3) # remaining = bs->uncomp_length * 2 - or $t1, $v0 - - lhu $t3, 4($a3) # quant_scale = (bs->quant_scale & 63) << 10 - sll $t2, 1 - andi $t3, 63 - - lhu $t4, 6($a3) # is_v3 = !(bs->version < 3) - sll $t3, 10 - sltiu $t4, $t4, 3 - xori $t4, 1 - - li $t5, 32 # bit_offset = 32 - li $t6, 5 # block_index = 5 - li $t7, 0 # coeff_index = 0 + lw window, 8(input) # window = (bs->data[0] << 16) | (bs->data[0] >> 16) + li last_y, 0 + srl temp, window, 16 + sll window, 16 + or window, temp + + # next_window = (bs->data[1] << 16) | (bs->data[1] >> 16) + lw next_window, 12(input) + li last_cr, 0 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + + lhu remaining, 0(input) # remaining = bs->uncomp_length * 2 + li last_cb, 0 + sll remaining, 1 + + lw temp, 4(input) # quant_scale = (bs->quant_scale & 63) << 10 + li bit_offset, 32 + andi quant_scale, temp, 63 + sll quant_scale, 10 + + srl temp, 16 # is_v3 = !(bs->version < 3) + sltiu is_v3, temp, 3 + xori is_v3, 1 + + li block_index, 5 + li coeff_index, 0 j _vlc_skip_context_load - addiu $a3, 16 # input = &(bs->data[2]) + addiu input, 16 # input = &(bs->data[2]) .section .text.DecDCTvlcContinue .global DecDCTvlcContinue .type DecDCTvlcContinue, @function DecDCTvlcContinue: - lw $a3, VLC_Context_input($a0) - lw $t0, VLC_Context_window($a0) - lw $t1, VLC_Context_next_window($a0) - lw $t2, VLC_Context_remaining($a0) - lhu $t3, VLC_Context_quant_scale($a0) - lb $t4, VLC_Context_is_v3($a0) - lb $t5, VLC_Context_bit_offset($a0) - lb $t6, VLC_Context_block_index($a0) - lb $t7, VLC_Context_coeff_index($a0) + addiu $sp, -16 + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + + lw input, VLC_Context_input(ctx) + lw window, VLC_Context_window(ctx) + lw next_window, VLC_Context_next_window(ctx) + lw remaining, VLC_Context_remaining(ctx) + lb is_v3, VLC_Context_is_v3(ctx) + lb bit_offset, VLC_Context_bit_offset(ctx) + lb block_index, VLC_Context_block_index(ctx) + lb coeff_index, VLC_Context_coeff_index(ctx) + lhu quant_scale, VLC_Context_quant_scale(ctx) + lh last_y, VLC_Context_last_y(ctx) + lh last_cr, VLC_Context_last_cr(ctx) + lh last_cb, VLC_Context_last_cb(ctx) _vlc_skip_context_load: - # Determine how many bytes to output. This whole block of code basically - # does this: + # Determine how many bytes to output. + # if (max_size <= 0) max_size = 0x3fff0000 # max_size = min((max_size - 1) * 2, remaining) # remaining -= max_size - bgtz $a2, .Lmax_size_valid # if (max_size <= 0) max_size = 0x7ffe0000 - addiu $a2, -1 # else max_size = (max_size - 1) * 2 - lui $a2, 0x3fff + bgtz max_size, .Lmax_size_valid + addiu max_size, -1 + lui max_size, 0x3fff .Lmax_size_valid: - sll $a2, 1 + sll max_size, 1 - blt $a2, $t2, .Lmax_size_ok # if (max_size > remaining) max_size = remaining - lui $v1, 0x3800 - move $a2, $t2 -.Lmax_size_ok: - subu $t2, $a2 # remaining -= max_size + subu remaining, max_size + bgez remaining, .Lmax_size_ok + lui temp, 0x3800 + addu max_size, remaining + li remaining, 0 + +.Lmax_size_ok: # Write the length of the data that will be decoded to first 4 bytes of the # output buffer, which will be then parsed by DecDCTin(). - srl $v0, $a2, 1 # output[0] = 0x38000000 | (max_size / 2) - or $v0, $v1 - sw $v0, 0($a1) + srl value, max_size, 1 # output[0] = 0x38000000 | (max_size / 2) + or value, temp + sw value, 0(output) # Obtain the addresses of the lookup table and jump area in advance so that # they don't have to be retrieved for each coefficient decoded. - lw $t8, _vlc_huffman_table - la $t9, .Lac_prefix_10 + lw huffman_table, _vlc_huffman_table + la ac_jump_area, .Lac_prefix_01 - 32 - beqz $a2, .Lstop_processing - addiu $a1, 4 # output = (uint16_t *) &output[1] + beqz max_size, .Lstop_processing + addiu output, 4 .Lprocess_next_code_loop: # while (max_size) # This is the "hot" part of the decoder, executed for each code in the # bitstream. The first step is to determine if the next code is a DC or AC - # coefficient. - bnez $t7, .Lprocess_ac_coefficient - addiu $t7, 1 # coeff_index++ - bnez $t4, .Lprocess_dc_v3_coefficient - li $v1, 0x01ff + # coefficient; at the same time the GTE is given the task of counting the + # number of leading zeroes/ones in the code (which takes 2 more cycles). + mtc2 window, C2_LZCS + + bnez coeff_index, .Lprocess_ac_coefficient + addiu coeff_index, 1 + bnez is_v3, .Lprocess_dc_v3_coefficient + li temp, 0x1ff .Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3) # The DC coefficient in version 2 frames is not compressed. Value 0x1ff is # used to signal the end of the bitstream. - srl $v0, $t0, 22 # prefix = (window >> (32 - 10)) - beq $v0, $v1, .Lstop_processing # if (prefix == 0x1ff) break - or $v0, $t3 # *output = prefix | quant_scale - sll $t0, 10 # window <<= 10 - b .Lwrite_value - addiu $t5, -10 # bit_offset -= 10 + # prefix = window >> (32 - 10) + # if (prefix == 0x1ff) break + # *output = prefix | quant_scale + srl value, window, 22 + beq value, temp, .Lstop_processing + or value, quant_scale + sll window, 10 + addiu bit_offset, -10 + + b .Lfeed_bitstream + sh value, 0(output) .Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3) - # TODO: version 3 is currently not supported. - jr $ra - li $v0, -1 - -.Lprocess_ac_coefficient: # if (coeff_index) - # Check whether the prefix code is one of the shorter, more common ones, - # and start counting the number of leading zeroes/ones using the GTE (which - # takes 2 more cycles). - srl $v0, $t0, 30 - li $v1, 3 - beq $v0, $v1, .Lac_prefix_11 - li $v1, 2 - beq $v0, $v1, .Lac_prefix_10 - li $v1, 1 - mtc2 $t0, $30 - beq $v0, $v1, .Lac_prefix_01 + # Version 3 DC coefficients are variable-length deltas, prefixed with a + # Huffman code indicating their length. Since the prefix code is up to 7 + # bits long, it makes sense to decode it with a simple 128-byte lookup + # table rather than using the GTE. The codes are different for luma and + # chroma blocks, so each table entry contains the decoded length for both + # block types (packed as two nibbles). Prefix 111111111 is used to signal + # the end of the bitstream. + # prefix = window >> (32 - 9) + # if (prefix == 0x1ff) break + # lengths = huffman_table->dc[prefix >> 2] + srl length, window, 23 + beq length, temp, .Lstop_processing + srl length, 2 + addu length, huffman_table + + addiu $at, block_index, -4 + bltz $at, .Ldc_block_y + lbu length, VLC_Table_dc(length) + beqz $at, .Ldc_block_cb + andi length, 15 # if (block_index >= Cb) dc_length = lengths & 15 + +.Ldc_block_cr: # if (block_index > Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cr_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cr_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cr_positive: + addu last_cr, value + andi last_cr, 0x3ff + +.Ldc_cr_zero: + sll temp, last_cr, 2 # *output = (last_cr << 2) | quant_scale + or temp, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) + +.Ldc_block_cb: # if (block_index == Cb) + # prefix_length = huffman_table->dc_len[dc_length] & 15 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + andi temp, 15 + + sllv window, window, temp + beqz length, .Ldc_cb_zero # if (dc_length) + subu bit_offset, temp + + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_cb_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_cb_positive: + addu last_cb, value + andi last_cb, 0x3ff + +.Ldc_cb_zero: + sll value, last_cb, 2 # *output = (last_cb << 2) | quant_scale + or value, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh value, 0(output) + +.Ldc_block_y: # if (block_index < Cb) nop + srl length, 4 # dc_length = lengths >> 4 + + # prefix_length = huffman_table->dc_len[dc_length] >> 4 + addu temp, length, huffman_table + lbu temp, VLC_Table_dc_len(temp) + li $at, 32 + srl temp, 4 + + sllv window, window, temp + beqz length, .Ldc_y_zero # if (dc_length) + subu bit_offset, temp + + sll temp, last_y, 2 + subu $at, length # value = window >> (32 - dc_length) + srlv value, window, $at + + # Decode the sign bit, then add the decoded delta to the current value. + # if (!(window >> 31)) value -= (1 << dc_length) - 1 + bltz window, .Ldc_y_positive + li temp, -1 + srlv temp, temp, $at + subu value, temp +.Ldc_y_positive: + addu last_y, value + andi last_y, 0x3ff + +.Ldc_y_zero: + sll temp, last_y, 2 # *output = (last_y << 2) | quant_scale + or temp, quant_scale + b .Lupdate_window_dc # update_window(dc_length) + sh temp, 0(output) - # If the code is longer, retrieve the number of leading zeroes from the GTE - # and use it as an index into the jump area. Each block in the area is 8 - # instructions long and handles decoding a specific prefix. - mfc2 $v0, $31 - li $v1, 11 - bgt $v0, $v1, .Lreturn_error # if (prefix > 11) return -1 - sll $v0, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(u32)] - addu $v0, $t9 - jr $v0 +.Lprocess_ac_coefficient: # if (coeff_index) + # Check whether the prefix code is 10 or 11 (i.e. if it starts with 1). If + # not, retrieve the number of leading zeroes from the GTE and use it as an + # index into the jump area. Each block in the area is 8 instructions long + # and handles decoding a specific prefix. + mfc2 temp, C2_LZCR + + bltz window, .Lac_prefix_1 # if (!(window >> 31)) + addiu $at, temp, -11 # if (prefix > 11) return -1 + bgtz $at, .Lreturn_error + sll temp, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(uint32_t)] + addu temp, ac_jump_area + jr temp nop .Lreturn_error: - jr $ra + b .Lreturn li $v0, -1 -.Lac_prefix_11: - # Prefix 11 is followed by a single bit. - srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16) - andi $v0, 2 - addu $v0, $t8 # value = table->lut0[index] - lhu $v0, DECDCTTAB_lut0($v0) - sll $t0, 3 # window <<= 3 - b .Lwrite_value - addiu $t5, -3 # bit_offset -= 3 - #.word 0 +.Lac_prefix_1: # if (window >> 31) + sll window, 1 + bltz window, .Lac_prefix_11 + li temp, 0xfe00 .Lac_prefix_10: # Prefix 10 marks the end of a block. - li $v0, 0xfe00 # value = 0xfe00 - sll $t0, 2 # window <<= 2 - addiu $t5, -2 # bit_offset -= 2 - addiu $t6, -1 # block_index-- - bgez $t6, .Lwrite_value - li $t7, 0 # coeff_index = 0 - b .Lwrite_value - li $t6, 5 # if (block_index < 0) block_index = 5 + # *output = 0xfe00 + # coeff_index = 0 + # if (--block_index < Y3) block_index = Cr + sll window, 1 + addiu bit_offset, -2 + sh temp, 0(output) + + addiu block_index, -1 + bgez block_index, .Lfeed_bitstream + li coeff_index, 0 + b .Lfeed_bitstream + li block_index, 5 + +.Lac_prefix_11: + # Prefix 11 is followed by a single bit. Note that the 10/11 prefix check + # already shifts the window by one bit (without updating the bit offset). + # index = ((window >> (32 - 1 - 1)) & 1) * sizeof(uint16_t) + # *output = huffman_table->ac0[index] + srl value, window, 29 + andi value, 2 + addu value, huffman_table + lhu value, VLC_Table_ac0(value) + sll window, 2 + addiu bit_offset, -3 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_01: # Prefix 01 can be followed by a 2-bit lookup index starting with 1, or a # 3-bit lookup index starting with 0. A 32-bit lookup table is used, # containing both MDEC codes and lengths. - srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32) - andi $v0, 28 - addu $v0, $t8 # value = table->lut2[index] - lw $v0, DECDCTTAB_lut2($v0) - b .Lupdate_window_and_write - srl $v1, $v0, 16 # length = value >> 16 + # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(uint32_t) + # *output = huffman_table->ac2[index] & 0xffff + # length = huffman_table->ac2[index] >> 16 + srl value, window, 25 + andi value, 28 + addu value, huffman_table + lw value, VLC_Table_ac2(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) .word 0, 0 .Lac_prefix_001: # Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a # 3-bit lookup index starting with 01/10/11. - srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32) - andi $v0, 252 - addu $v0, $t8 # value = table->lut3[index] - lw $v0, DECDCTTAB_lut3($v0) - b .Lupdate_window_and_write - srl $v1, $v0, 16 # length = value >> 16 + # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(uint32_t) + # *output = huffman_table->ac3[index] & 0xffff + # length = huffman_table->ac3[index] >> 16 + srl value, window, 21 + andi value, 252 + addu value, huffman_table + lw value, VLC_Table_ac3(value) + + b .Lupdate_window_ac # update_window(value >> 16) + sh value, 0(output) .word 0, 0 .Lac_prefix_0001: # Prefix 0001 is followed by a 3-bit lookup index. - srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16) - andi $v0, 14 - addu $v0, $t8 # value = table->lut4[index] - lhu $v0, DECDCTTAB_lut4($v0) - sll $t0, 7 # window <<= 4 + 3 - b .Lwrite_value - addiu $t5, -7 # bit_offset -= 4 + 3 - .word 0 + # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac4[index] + srl value, window, 24 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac4(value) + sll window, 7 + addiu bit_offset, -7 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00001: # Prefix 00001 is followed by a 3-bit lookup index. - srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16) - andi $v0, 14 - addu $v0, $t8 # value = table->lut5[index] - lhu $v0, DECDCTTAB_lut5($v0) - sll $t0, 8 # window <<= 5 + 3 - b .Lwrite_value - addiu $t5, -8 # bit_offset -= 5 + 3 - .word 0 + # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(uint16_t) + # *output = huffman_table->ac5[index] + srl value, window, 23 + andi value, 14 + addu value, huffman_table + lhu value, VLC_Table_ac5(value) + sll window, 8 + addiu bit_offset, -8 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000001: # Prefix 000001 is an escape code followed by a full 16-bit MDEC value. - srl $v0, $t0, 10 # value = window >> (32 - 6 - 16) - sll $t0, 22 # window <<= 6 + 16 - b .Lwrite_value - addiu $t5, -22 # bit_offset -= 6 + 16 - .word 0, 0, 0, 0 + # *output = window >> (32 - 6 - 16) + srl value, window, 10 + sll window, 22 + addiu bit_offset, -22 + + b .Lfeed_bitstream + sh value, 0(output) + .word 0, 0, 0 .Lac_prefix_0000001: # Prefix 0000001 is followed by a 4-bit lookup index. - srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16) - andi $v0, 30 - addu $v0, $t8 # value = table->lut7[index] - lhu $v0, DECDCTTAB_lut7($v0) - sll $t0, 11 # window <<= 7 + 4 - b .Lwrite_value - addiu $t5, -11 # bit_offset -= 7 + 4 - .word 0 + # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(uint16_t) + # *output = huffman_table->ac7[index] + srl value, window, 20 + andi value, 30 + addu value, huffman_table + lhu value, VLC_Table_ac7(value) + sll window, 11 + addiu bit_offset, -11 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00000001: # Prefix 00000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut8[index] - lhu $v0, DECDCTTAB_lut8($v0) - sll $t0, 13 # window <<= 8 + 5 - b .Lwrite_value - addiu $t5, -13 # bit_offset -= 8 + 5 - .word 0 + # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac8[index] + srl value, window, 18 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac8(value) + sll window, 13 + addiu bit_offset, -13 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000000001: # Prefix 000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut9[index] - lhu $v0, DECDCTTAB_lut9($v0) - sll $t0, 14 # window <<= 9 + 5 - b .Lwrite_value - addiu $t5, -14 # bit_offset -= 9 + 5 - .word 0 + # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac9[index] + srl value, window, 17 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac9(value) + sll window, 14 + addiu bit_offset, -14 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_0000000001: # Prefix 0000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut10[index] - lhu $v0, DECDCTTAB_lut10($v0) - sll $t0, 15 # window <<= 10 + 5 - b .Lwrite_value - addiu $t5, -15 # bit_offset -= 10 + 5 - .word 0 + # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac10[index] + srl value, window, 16 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac10(value) + sll window, 15 + addiu bit_offset, -15 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_00000000001: # Prefix 00000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut11[index] - lhu $v0, DECDCTTAB_lut11($v0) - sll $t0, 16 # window <<= 11 + 5 - b .Lwrite_value - addiu $t5, -16 # bit_offset -= 11 + 5 - .word 0 + # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac11[index] + srl value, window, 15 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac11(value) + sll window, 16 + addiu bit_offset, -16 + + b .Lfeed_bitstream + sh value, 0(output) .Lac_prefix_000000000001: # Prefix 000000000001 is followed by a 5-bit lookup index. - srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16) - andi $v0, 62 - addu $v0, $t8 # value = table->lut12[index] - lhu $v0, DECDCTTAB_lut12($v0) - sll $t0, 17 # window <<= 12 + 5 - b .Lwrite_value - addiu $t5, -17 # bit_offset -= 12 + 5 - .word 0 - -.Lupdate_window_and_write: - sllv $t0, $t0, $v1 # window <<= length - subu $t5, $v1 # bit_offset -= length -.Lwrite_value: - sh $v0, 0($a1) + # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(uint16_t) + # *output = huffman_table->ac12[index] + srl value, window, 14 + andi value, 62 + addu value, huffman_table + lhu value, VLC_Table_ac12(value) + sll window, 17 + addiu bit_offset, -17 + + b .Lfeed_bitstream + sh value, 0(output) + +.Lupdate_window_ac: + srl length, value, 16 +.Lupdate_window_dc: + sllv window, window, length + subu bit_offset, length + .Lfeed_bitstream: # Update the window. This makes sure the next iteration of the loop will be # able to read up to 32 bits from the bitstream. - bgez $t5, .Lskip_feeding # if (bit_offset < 0) - addiu $a2, -1 # max_size-- - - subu $v0, $0, $t5 # window = next_window << (-bit_offset) - sllv $t0, $t1, $v0 - lw $t1, 0($a3) # next_window = (*input << 16) | (*input >> 16) - addiu $t5, 32 # bit_offset += 32 - srl $v0, $t1, 16 - sll $t1, 16 - or $t1, $v0 - addiu $a3, 4 # input++ + bgez bit_offset, .Lskip_feeding # if (bit_offset < 0) + addiu max_size, -1 + + subu temp, $0, bit_offset # window = next_window << (-bit_offset) + sllv window, next_window, temp + lw next_window, 0(input) # next_window = (*input << 16) | (*input >> 16) + addiu bit_offset, 32 + srl temp, next_window, 16 + sll next_window, 16 + or next_window, temp + addiu input, 4 .Lskip_feeding: - srlv $v0, $t1, $t5 # window |= next_window >> bit_offset - or $t0, $v0 + srlv temp, next_window, bit_offset # window |= next_window >> bit_offset + or window, temp - bnez $a2, .Lprocess_next_code_loop - addiu $a1, 2 # output++ + bnez max_size, .Lprocess_next_code_loop + addiu output, 2 .Lstop_processing: # If remaining = 0, skip flushing the context, pad the output buffer with # end-of-block codes if necessary and return 0. Otherwise flush the context # and return 1. - beqz $t2, .Lpad_output_buffer - nop - - sw $a3, VLC_Context_input($a0) - sw $t0, VLC_Context_window($a0) - sw $t1, VLC_Context_next_window($a0) - sw $t2, VLC_Context_remaining($a0) - sh $t3, VLC_Context_quant_scale($a0) - sb $t4, VLC_Context_is_v3($a0) - sb $t5, VLC_Context_bit_offset($a0) - sb $t6, VLC_Context_block_index($a0) - sb $t7, VLC_Context_coeff_index($a0) - - jr $ra + beqz remaining, .Lpad_output_buffer + li temp, 0xfe00 + + sw input, VLC_Context_input(ctx) + sw window, VLC_Context_window(ctx) + sw next_window, VLC_Context_next_window(ctx) + sw remaining, VLC_Context_remaining(ctx) + sb bit_offset, VLC_Context_bit_offset(ctx) + sb block_index, VLC_Context_block_index(ctx) + sb coeff_index, VLC_Context_coeff_index(ctx) + sh last_y, VLC_Context_last_y(ctx) + sh last_cr, VLC_Context_last_cr(ctx) + sh last_cb, VLC_Context_last_cb(ctx) + + b .Lreturn li $v0, 1 .Lpad_output_buffer: - beqz $a2, .Lreturn_zero - li $v0, 0xfe00 -.Lpad_output_buffer_loop: # while (max_size) - sh $v0, 0($a1) # *output = 0xfe00 - addiu $a2, -1 # max_size-- - bnez $a2, .Lpad_output_buffer_loop - addiu $a1, 2 # output++ + beqz max_size, .Lreturn + li $v0, 0 -.Lreturn_zero: +.Lpad_output_buffer_loop: # while (max_size) + sh temp, 0(output) + addiu max_size, -1 + bnez max_size, .Lpad_output_buffer_loop + addiu output, 2 + +.Lreturn: + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) jr $ra - li $v0, 0 + addiu $sp, 16 diff --git a/libpsn00b/psxpress/vlc2.c b/libpsn00b/psxpress/vlc2.c index 9eb99bf..7d9d9f3 100644 --- a/libpsn00b/psxpress/vlc2.c +++ b/libpsn00b/psxpress/vlc2.c @@ -63,7 +63,7 @@ static const uint32_t _compressed_table[TABLE_LENGTH] = { static VLC_Context _default_context; static size_t _max_buffer_size = 0; -const DECDCTTAB2 *_vlc_huffman_table2 = 0; +const DECDCTTAB *_vlc_huffman_table2 = 0; /* VLC decoder */ @@ -77,14 +77,17 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( VLC_Context *ctx, uint32_t *buf, size_t max_size ) { const uint32_t *input = ctx->input; - uint32_t remaining = ctx->remaining; uint32_t window = ctx->window; uint32_t next_window = ctx->next_window; - uint16_t quant_scale = ctx->quant_scale; + uint32_t remaining = ctx->remaining; + int is_v3 = ctx->is_v3; + int bit_offset = ctx->bit_offset; int block_index = ctx->block_index; int coeff_index = ctx->coeff_index; - int bit_offset = ctx->bit_offset; - int is_v3 = ctx->is_v3; + uint16_t quant_scale = ctx->quant_scale; + int16_t last_y = ctx->last_y; + int16_t last_cr = ctx->last_cr; + int16_t last_cb = ctx->last_cb; //if (!_vlc_huffman_table2) //return -1; @@ -120,25 +123,22 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( *output = (uint16_t) _get_bits_unsigned(22); _advance_window(22); } else if (window >> 24) { - // The first lookup table is for codes that not start with + // The first lookup table is for codes that do not start with // 00000000. - value = _vlc_huffman_table2->lut[_get_bits_unsigned(13)]; + value = _vlc_huffman_table2->ac[_get_bits_unsigned(13)]; _advance_window(value >> 16); *output = (uint16_t) value; } else { // If the code starts with 00000000, use the second lookup // table. - value = _vlc_huffman_table2->lut00[_get_bits_unsigned(17)]; + value = _vlc_huffman_table2->ac00[_get_bits_unsigned(17)]; _advance_window(value >> 16); *output = (uint16_t) value; } } else { - // Parse the DC (first) coefficient for this block. Version 2 - // simply stores the signed 10-bit value as-is, while version 3 - // uses a delta encoding combined with a compression method similar - // to exp-Golomb. + // Parse the DC (first) coefficient for this block. if (is_v3) { - // TODO: version 3 is currently not supported. + // This implementation does not support version 3. return -1; } else { value = _get_bits_unsigned(10); @@ -158,7 +158,7 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( // time and processes each 16-bit word starting from the the MSB, so an // endianness conversion is necessary to preserve bit order when // reading 32 bits at a time. Also note that the PS1 CPU is not capable - // of shifting by more than 31 bits - it will shift by 0 bits instead! + // of shifting by >=31 bits - it will shift by (N % 32) bits instead! if (bit_offset < 0) { window = next_window << (-bit_offset); bit_offset += 32; @@ -176,12 +176,15 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( return 0; ctx->input = input; - ctx->remaining = remaining; ctx->window = window; ctx->next_window = next_window; + ctx->remaining = remaining; + ctx->bit_offset = bit_offset; ctx->block_index = block_index; ctx->coeff_index = coeff_index; - ctx->bit_offset = bit_offset; + ctx->last_y = last_y; + ctx->last_cr = last_cr; + ctx->last_cb = last_cb; return 1; } @@ -197,21 +200,24 @@ int DecDCTvlcStart2( return -1; ctx->input = &input[2]; - ctx->remaining = (header->mdec0_header & 0xffff) * 2; ctx->window = (input[0] << 16) | (input[0] >> 16); ctx->next_window = (input[1] << 16) | (input[1] >> 16); - ctx->quant_scale = (header->quant_scale & 63) << 10; + ctx->remaining = (header->mdec0_header & 0xffff) * 2; + ctx->is_v3 = (header->version >= 3); + ctx->bit_offset = 32; ctx->block_index = 0; ctx->coeff_index = 0; - ctx->bit_offset = 32; - ctx->is_v3 = (header->version == 3); + ctx->quant_scale = (header->quant_scale & 63) << 10; + ctx->last_y = 0; + ctx->last_cr = 0; + ctx->last_cb = 0; return DecDCTvlcContinue2(ctx, buf, max_size); } /* Stateful VLC decoder API (for Sony SDK compatibility) */ -int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table) { +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table) { if (table) _vlc_huffman_table2 = table; @@ -230,7 +236,7 @@ size_t DecDCTvlcSize2(size_t size) { /* Lookup table decompressor */ -void DecDCTvlcBuild(DECDCTTAB2 *table) { +void DecDCTvlcBuild(DECDCTTAB *table) { uint32_t *output = (uint32_t *) table; _vlc_huffman_table2 = table; diff --git a/libpsn00b/psxsio/tty.c b/libpsn00b/psxsio/tty.c index a1b33c8..a88af85 100644 --- a/libpsn00b/psxsio/tty.c +++ b/libpsn00b/psxsio/tty.c @@ -8,7 +8,7 @@ * critical section or even from an interrupt handler. */ -#include <ioctl.h> +#include <sys/ioctl.h> #include <psxapi.h> #include <psxsio.h> #include <hwregs_c.h> @@ -93,8 +93,8 @@ void AddSIO(int baud) { close(0); close(1); - DelDev(_sio_dcb.name); - AddDev(&_sio_dcb); + DelDrv(_sio_dcb.name); + AddDrv(&_sio_dcb); open(_sio_dcb.name, 2); open(_sio_dcb.name, 1); } @@ -102,6 +102,6 @@ void AddSIO(int baud) { void DelSIO(void) { SIO_Quit(); - DelDev(_sio_dcb.name); - AddDummyTty(); + DelDrv(_sio_dcb.name); + add_nullcon_driver(); } diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c index 45654ad..64c6d1b 100644 --- a/libpsn00b/psxspu/common.c +++ b/libpsn00b/psxspu/common.c @@ -1,10 +1,11 @@ /* * PSn00bSDK SPU library (common functions) - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed */ #include <stdint.h> #include <assert.h> +#include <psxetc.h> #include <psxspu.h> #include <hwregs_c.h> @@ -31,7 +32,7 @@ static void _wait_status(uint16_t mask, uint16_t value) { return; } - _sdk_log("status register timeout (0x%04x)\n", SPU_STAT); + _sdk_log("timeout, status=0x%04x\n", SPU_STAT); } static size_t _dma_transfer(uint32_t *data, size_t length, int write) { @@ -59,13 +60,14 @@ static size_t _dma_transfer(uint32_t *data, size_t length, int write) { SPU_CTRL |= ctrl; _wait_status(0x0030, ctrl); - DMA_MADR(4) = (uint32_t) data; + DMA_MADR(DMA_SPU) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) - DMA_BCR(4) = 0x00010000 | length; + DMA_BCR(DMA_SPU) = 0x00010000 | length; else - DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + DMA_BCR(DMA_SPU) = DMA_CHUNK_LENGTH | + ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(4) = 0x01000200 | write; + DMA_CHCR(DMA_SPU) = 0x01000200 | write; return length; } @@ -130,8 +132,8 @@ void SpuInit(void) { SPU_EXT_VOL_L = 0; SPU_EXT_VOL_R = 0; - DMA_DPCR |= 0x000b0000; // Enable DMA4 - DMA_CHCR(4) = 0x00000201; // Stop DMA4 + SetDMAPriority(DMA_SPU, 3); + DMA_CHCR(DMA_SPU) = 0x00000201; // Stop DMA SPU_DMA_CTRL = 0x0004; // Reset transfer mode SPU_CTRL = 0xc001; // Enable SPU, DAC, CD audio, disable DMA request @@ -162,12 +164,18 @@ void SpuInit(void) { } size_t SpuRead(uint32_t *data, size_t size) { + _sdk_validate_args(data && size, 0); + return _dma_transfer(data, size, 0) * 4; } size_t SpuWrite(const uint32_t *data, size_t size) { - if (_transfer_addr < WRITABLE_AREA_ADDR) + _sdk_validate_args(data && size, 0); + + if (_transfer_addr < WRITABLE_AREA_ADDR) { + _sdk_log("ignoring attempt to write to capture buffers at 0x%05x\n", _transfer_addr); return 0; + } // I/O transfer mode is not that useful, but whatever. if (_transfer_mode) @@ -177,6 +185,8 @@ size_t SpuWrite(const uint32_t *data, size_t size) { } size_t SpuWritePartly(const uint32_t *data, size_t size) { + //_sdk_validate_args(data && size, 0); + size_t _size = SpuWrite(data, size); _transfer_addr += (_size + 1) / 2; @@ -188,6 +198,10 @@ SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) { return mode; } +SPU_TransferMode SpuGetTransferMode(void) { + return _transfer_mode; +} + uint32_t SpuSetTransferStartAddr(uint32_t addr) { if (addr > 0x7ffff) return 0; @@ -196,6 +210,10 @@ uint32_t SpuSetTransferStartAddr(uint32_t addr) { return addr; } +uint32_t SpuGetTransferStartAddr(void) { + return _transfer_addr * 8; +} + int SpuIsTransferCompleted(int mode) { if (!mode) return ((SPU_STAT >> 10) & 1) ^ 1; diff --git a/template/CMakeLists.txt b/template/CMakeLists.txt index a3399db..e665c7e 100644 --- a/template/CMakeLists.txt +++ b/template/CMakeLists.txt @@ -17,5 +17,5 @@ psn00bsdk_add_cd_image( iso # Target name template # Output file name (= template.bin + template.cue) iso.xml # Path to config file - DEPENDS template + DEPENDS template system.cnf ) diff --git a/template/iso.xml b/template/iso.xml index 29fbd2d..96ea23a 100644 --- a/template/iso.xml +++ b/template/iso.xml @@ -6,10 +6,7 @@ a file from the source tree, you'll have to prepend its path with ${PROJECT_SOURCE_DIR}. --> -<iso_project - image_name="${CD_IMAGE_NAME}.bin" - cue_sheet="${CD_IMAGE_NAME}.cue" -> +<iso_project> <track type="data"> <!-- The "volume", "volume_set", "publisher", "data_preparer" and diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index c48bc10..9246e19 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -38,11 +38,7 @@ file( ) add_library(lzp STATIC ${_sources}) -target_include_directories( - lzp PUBLIC - lzpack/lzp - ${LIBPSN00B_PATH}/include/lzp -) +target_include_directories(lzp PUBLIC ${LIBPSN00B_PATH}/lzp) ## Executables diff --git a/tools/mkpsxiso b/tools/mkpsxiso -Subproject 7bcb4ad2086a8004da32d0ee2289093ed8d4872 +Subproject 9f6275f08829ea9de8122c8232a019e8724acbb diff --git a/tools/tinyxml2 b/tools/tinyxml2 -Subproject e45d9d16d430a3f5d3eee9fe40d5e194e1e5e63 +Subproject e05956094c27117f989d22f25b75633123d72a8 diff --git a/tools/util/elf2x.c b/tools/util/elf2x.c index 38ed60d..fc0ea9f 100644 --- a/tools/util/elf2x.c +++ b/tools/util/elf2x.c @@ -237,13 +237,10 @@ int main(int argc, char** argv) { exe.params.t_size = exe_tsize; exe.params.pc0 = head.prg_entry_addr; - // Some later PAL BIOS versions seem to actually verify the license string - // in the executable (despite what the nocash docs claim) and display the - // dreaded "insert PlayStation CD-ROM" screen if it's not valid. strncpy( exe.header, "PS-X EXE", 8 ); - strcpy( exe.license, - "Sony Computer Entertainment Inc. for Europe area" ); - strcpy( exe.pad2, "Built using GCC and PSn00bSDK libraries" ); + //strcpy( exe.license, + //"Sony Computer Entertainment Inc. for Europe area" ); + //strcpy( exe.pad2, "Built using GCC and PSn00bSDK libraries" ); // Write file |
