From 0a797d2964517ac88e818b0741c5e7674c6fa018 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Thu, 6 Apr 2023 00:40:22 +0200 Subject: Update binutils and mkpsxiso, enable GCC STL headers --- doc/toolchain.md | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'doc') diff --git a/doc/toolchain.md b/doc/toolchain.md index 8e28c24..9b65720 100644 --- a/doc/toolchain.md +++ b/doc/toolchain.md @@ -28,6 +28,7 @@ tested extensively: - ~~GCC 7.4.0 with binutils 2.31~~ (the linker fails to build PS1 DLLs) - GCC **11.1.0** with binutils **2.36** - GCC **11.2.0** with binutils **2.37** +- GCC **12.2.0** with binutils **2.40** If you wish to pick an older GCC release but don't know which binutils version it requires, see [here](https://wiki.osdev.org/Cross-Compiler_Successful_Builds) @@ -78,7 +79,7 @@ for a compatibility table. ```bash ../binutils-/configure \ --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --with-float=soft + --disable-docs --disable-nls --disable-werror --with-float=soft ``` Replace `` as usual. If you don't want to install the toolchain into @@ -117,9 +118,11 @@ options. ```bash ../gcc-/configure \ --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --disable-libada --disable-libssp \ - --disable-libquadmath --disable-libstdc++-v3 --with-float=soft \ - --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + --disable-docs --disable-nls --disable-werror --disable-libada \ + --disable-libssp --disable-libquadmath --disable-threads \ + --disable-libgomp --disable-libstdcxx-pch --disable-hosted-libstdcxx \ + --enable-languages=c,c++ --without-isl --without-headers \ + --with-float=soft --with-gnu-as --with-gnu-ld ``` If you previously set a custom installation path, remember to set it here as @@ -172,7 +175,7 @@ that runs on Windows. ../binutils-/configure \ --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 \ --prefix=/tmp/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --with-float=soft + --disable-docs --disable-nls --disable-werror --with-float=soft ``` Then build binutils again: @@ -187,10 +190,12 @@ that runs on Windows. ```bash ../gcc-/configure \ --build=x86_64-linux-gnu --host=x86_64-w64-mingw32 \ - --prefix=/tmp/mipsel-none-elf --target=mipsel-none-elf \ - --disable-docs --disable-nls --disable-libada --disable-libssp \ - --disable-libquadmath --disable-libstdc++-v3 --with-float=soft \ - --enable-languages=c,c++ --with-gnu-as --with-gnu-ld + --prefix=/usr/local/mipsel-none-elf --target=mipsel-none-elf \ + --disable-docs --disable-nls --disable-werror --disable-libada \ + --disable-libssp --disable-libquadmath --disable-threads \ + --disable-libgomp --disable-libstdcxx-pch --disable-hosted-libstdcxx \ + --enable-languages=c,c++ --without-isl --without-headers \ + --with-float=soft --with-gnu-as --with-gnu-ld ``` And build it as usual: @@ -211,14 +216,17 @@ that runs on Windows. ## Note regarding C++ support -C++ support in PSn00bSDK, besides compile-time features like `constexpr`, only -goes as far as basic classes, namespaces and the ability to dynamically create -and delete class objects at any point of the program. The required dependencies -(which are just wrappers around `malloc()` and `free()`) are supplied by `libc`. +C++ support in PSn00bSDK is limited to the freestanding subset of the standard +library provided by GCC, which includes most metaprogramming and compile-time +utilities but not higher level functionality that requires runtime support such +as exceptions, containers or streams. Basic C++ features that only depend on the +compiler (classes, templates, `constexpr` and so on) are fully supported. -Standard C++ libraries are not implemented and likely never going to be -implemented due to bloat concerns that it may introduce. Besides, the official -SDK lacks full C++ support as well. +Implementing a full STL, while technically possible, is currently out of the +scope of PSn00bSDK. There are other PS1 SDKs that provide an STL, such as +[psyqo](https://github.com/grumpycoders/pcsx-redux/tree/main/src/mips/psyqo), +and they might be a better fit for your project if you plan to make heavy use of +C++ features. ----------------------------------------- -_Last updated on 2021-11-23 by spicyjpeg_ +_Last updated on 2023-04-05 by spicyjpeg_ -- cgit v1.2.3 From 2021cdfca29dc5c98e570a674ac97f92f47a1129 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Thu, 11 May 2023 23:08:11 +0200 Subject: Add GPU IRQ variants of all display list APIs --- doc/drawing_queue.md | 105 ++++++++++++++++++++++++++++++++ libpsn00b/include/psxgpu.h | 5 ++ libpsn00b/psxgpu/common.c | 81 ------------------------- libpsn00b/psxgpu/drawing.c | 148 +++++++++++++++++++++++++++++++++++++++++++++ libpsn00b/psxgpu/env.c | 86 ++++++++++++++++---------- libpsn00b/psxgpu/image.c | 12 +++- 6 files changed, 323 insertions(+), 114 deletions(-) create mode 100644 doc/drawing_queue.md create mode 100644 libpsn00b/psxgpu/drawing.c (limited to 'doc') diff --git a/doc/drawing_queue.md b/doc/drawing_queue.md new file mode 100644 index 0000000..4fa83f7 --- /dev/null +++ b/doc/drawing_queue.md @@ -0,0 +1,105 @@ + +# GPU drawing queue + +`libpsxgpu` manages access to the GPU by implementing a software driven queue. +This queue, separate from the GPU's internal command FIFO, allows for high-level +management of GPU operations such as display list sending, VRAM image uploads +and framebuffer readback, in a similar way to the drawing queue system +implemented behind the scenes by the official SDK. + +The queue is managed internally by the library and can hold up to 16 drawing +operations ("DrawOps"). Each DrawOp is represented by a pointer to a function, +alongside any arguments to be passed to it. Whenever the GPU is idle, +`libpsxgpu` fetches a DrawOp from the queue and calls its respective function, +which should then proceed to actually send commands to the GPU or set up and +start a DMA transfer. `DrawSync()` can be called to wait for the queue to become +empty or get its current length, while `DrawSyncCallback()` may be used to +register a callback that will be invoked once the GPU is idle and no more +DrawOps are pending. + +Completion of each DrawOp (and transition of the GPU from busy to idle state) is +signalled through one of two means: + +- the DMA channel 2 IRQ, fired automatically by the DMA unit when a data + transfer such as a VRAM upload or a display list has finished executing; +- the GPU IRQ, triggered manually using the `GP0(0x1f)` command or the `DR_IRQ` + primitive. + +Note that the end of a DMA transfer does not necessarily imply that the GPU has +finished executing all commands; the last command issued may not yet be done, +hence the ability to use the GPU IRQ instead is provided as a more reliable way +to detect the completion of certain commands. + +## Built-in DrawOps + +The library includes a number of built-in DrawOps for the most common use cases. +The following APIs are wrappers around DrawOps: + +- `DrawBuffer()` and `DrawBufferIRQ()` queue a new DrawOp to start a DMA + transfer in chunked mode (sending one word at a time) with the specified + starting address and number of words. `DrawBuffer2()` and `DrawBufferIRQ2()` + are the underlying DrawOp functions respectively. +- `DrawOTag()` and `DrawOTagIRQ()` queue a new DrawOp to start a DMA transfer in + linked-list mode with the specified starting address, with `DrawOTag2()` and + `DrawOTagIRQ2()` being the respective DrawOp functions. +- `PutDrawEnv()`, `PutDrawEnvFast()`, `DrawOTagEnv()` and `DrawOTagEnvIRQ()` + insert drawing environment setup commands as the first (or only) item in a + display list, then proceed to pass it to `DrawOTag()`. The setup packet + linked into the display list is stored as part of the `DRAWENV` structure. +- `LoadImage()` and `StoreImage()` copy the provided coordinates into a + temporary buffer, then proceed to enqueue a DrawOp to actually start the VRAM + transfer. The synchronous variants of these APIs are `LoadImage2()` and + `StoreImage2()` respectively. +- `MoveImage()` saves the provided coordinates into a temporary buffer, then + enqueues a DrawOp that will issue a `GP0(0x80)` VRAM blitting command. As + this command is handled entirely by the GPU with no DMA transfers involved, + the GPU IRQ is used to detect its completion. + +## Custom DrawOps + +Unlike the official SDK, `libpsxgpu` exposes the drawing queue by providing a +way to enqueue arbitrary custom DrawOps. This can be useful for profiling +purposes or to work around specific GPU bugs (see the use cases section). + +Custom DrawOps can be pushed into the queue by calling `EnqueueDrawOp()` and +passing a pointer to the callback function in charge of issuing the DrawOp's +commands to the GPU, as well as up to 3 arguments to be passed through to it. +The function must: + +- call `SetDrawOpType()` to let the library know which type of IRQ it shall wait + for before moving onto the next DrawOp (either `DRAWOP_TYPE_DMA` or + `DRAWOP_TYPE_GPU_IRQ`); +- wait until the GPU is ready to accept commands by polling the status bits in + `GPU_STAT` and make sure DMA channel 2 is also idle before proceeding; +- issue any commands to the GPU's GP0 register and/or set up a DMA transfer, + terminating them with a `GP0(0x1f)` IRQ command if appropriate. + +Note that DrawOps are called from within the exception handler's context and +must thus not block for significant periods of time, manipulate COP0 registers +or wait for any IRQs to occur. They are also restricted from manipulating the +drawing queue by e.g. calling `EnqueueDrawOp()`, `DrawOTag()` or any other +function that enqueues a DrawOp. + +## Use cases + +### Scissoring commands + +The GPU provides commands to set the origin of all X/Y coordinates passed to it +as well as a scissoring region, all pixels outside of which are automatically +masked out during drawing. These commands are issued to the GP0 register and can +be inserted in a display list through the `DR_OFFSET` and `DR_AREA` primitives, +however they will *not* go through the GPU's command FIFO like most other +primitives. They will instead take effect immediately, resulting in graphical +glitches if the GPU is already busy processing a drawing command (i.e. if they +are not the very first commands in a display list). + +The software-driven drawing queue provides a way around this. By splitting up a +frame's display list into multiple chunks, one for each scissoring command +issued, it is possible to always place scissoring commands at the beginning of a +chunk. Each chunk can be terminated with a `DR_IRQ` primitive and queued for +drawing using `DrawOTagIRQ()` to ensure the GPU goes idle before the next chunk +is sent, preventing scissoring commands from being received by the GPU while +busy. + +----------------------------------------- +_Last updated on 2023-05-11 by spicyjpeg_ diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 2e9e4eb..d7f1ad5 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -606,10 +606,15 @@ void MoveImage2(const RECT *rect, int x, int y); void ClearOTagR(uint32_t *ot, size_t length); void ClearOTag(uint32_t *ot, size_t length); int DrawOTag(const uint32_t *ot); +int DrawOTagIRQ(const uint32_t *ot); int DrawOTagEnv(const uint32_t *ot, DRAWENV *env); +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env); void DrawOTag2(const uint32_t *ot); +void DrawOTagIRQ2(const uint32_t *ot); int DrawBuffer(const uint32_t *buf, size_t length); +int DrawBufferIRQ(const uint32_t *buf, size_t length); void DrawBuffer2(const uint32_t *buf, size_t length); +void DrawBufferIRQ2(const uint32_t *buf, size_t length); void DrawPrim(const uint32_t *pri); void AddPrim(uint32_t *ot, const void *pri); diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index 8af6c5b..7e0758b 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -270,87 +270,6 @@ void *DrawSyncCallback(void (*func)(void)) { return old_callback; } -/* OT and primitive drawing API */ - -void ClearOTagR(uint32_t *ot, size_t length) { - _sdk_validate_args_void(ot && length); - - DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1]; - DMA_BCR(DMA_OTC) = length & 0xffff; - DMA_CHCR(DMA_OTC) = 0x11000002; - - while (DMA_CHCR(DMA_OTC) & (1 << 24)) - __asm__ volatile(""); -} - -void ClearOTag(uint32_t *ot, size_t length) { - _sdk_validate_args_void(ot && length); - - // DMA6 only supports writing to RAM in reverse order (last to first), so - // the OT has to be cleared in software here. This function is thus much - // slower than ClearOTagR(). - // https://problemkaputt.de/psx-spx.htm#dmachannels - for (int i = 0; i < (length - 1); i++) - ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff; - //setaddr(&ot[i], &ot[i + 1]); - - ot[length - 1] = 0xffffff; - //termPrim(&ot[length - 1]); -} - -void AddPrim(uint32_t *ot, const void *pri) { - _sdk_validate_args_void(ot && pri); - - addPrim(ot, pri); -} - -int DrawOTag(const uint32_t *ot) { - _sdk_validate_args(ot, -1); - - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) ot, 0, 0); -} - -void DrawOTag2(const uint32_t *ot) { - _sdk_validate_args_void(ot); - - SetDrawOpType(DRAWOP_TYPE_DMA); - GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 - - while (DMA_CHCR(DMA_GPU) & (1 << 24)) - __asm__ volatile(""); - - DMA_MADR(DMA_GPU) = (uint32_t) ot; - DMA_BCR(DMA_GPU) = 0; - DMA_CHCR(DMA_GPU) = 0x01000401; -} - -int DrawBuffer(const uint32_t *buf, size_t length) { - _sdk_validate_args(buf && length && (length <= 0xffff), -1); - - return EnqueueDrawOp((void *) &DrawBuffer2, (uint32_t) buf, length, 0); -} - -void DrawBuffer2(const uint32_t *buf, size_t length) { - _sdk_validate_args_void(buf && length && (length <= 0xffff)); - - SetDrawOpType(DRAWOP_TYPE_DMA); - GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 - - while (DMA_CHCR(DMA_GPU) & (1 << 24)) - __asm__ volatile(""); - - DMA_MADR(DMA_GPU) = (uint32_t) buf; - DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16); - DMA_CHCR(DMA_GPU) = 0x01000201; -} - -void DrawPrim(const uint32_t *pri) { - _sdk_validate_args_void(pri); - - DrawSync(0); - DrawBuffer2(&pri[1], getlen(pri)); -} - /* Queue pause/resume API */ int IsIdleGPU(int timeout) { diff --git a/libpsn00b/psxgpu/drawing.c b/libpsn00b/psxgpu/drawing.c new file mode 100644 index 0000000..161b2f7 --- /dev/null +++ b/libpsn00b/psxgpu/drawing.c @@ -0,0 +1,148 @@ +/* + * PSn00bSDK GPU library (drawing/display list functions) + * (C) 2022-2023 spicyjpeg - MPL licensed + */ + +#include +#include +#include +#include +#include + +/* Private utilities */ + +// This function is actually referenced in env.c as well, so it can't be static. +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) ot; + DMA_BCR(DMA_GPU) = 0; + DMA_CHCR(DMA_GPU) = 0x01000401; +} + +static void _send_buffer( + GPU_DrawOpType type, const uint32_t *buf, size_t length +) { + SetDrawOpType(type); + GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0 + + while (DMA_CHCR(DMA_GPU) & (1 << 24)) + __asm__ volatile(""); + + DMA_MADR(DMA_GPU) = (uint32_t) buf; + DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16); + DMA_CHCR(DMA_GPU) = 0x01000201; +} + +/* Buffer and primitive drawing API */ + +int DrawOTag(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) ot, + 0 + ); +} + +int DrawOTagIRQ(const uint32_t *ot) { + _sdk_validate_args(ot, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) ot, + 0 + ); +} + +int DrawBuffer(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) buf, + (uint32_t) length + ); +} + +int DrawBufferIRQ(const uint32_t *buf, size_t length) { + _sdk_validate_args(buf && length && (length <= 0xffff), -1); + + return EnqueueDrawOp( + (void *) &DrawBuffer2, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) buf, + (uint32_t) length + ); +} + +void DrawOTag2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_DMA, ot); +} + +void DrawOTagIRQ2(const uint32_t *ot) { + _sdk_validate_args_void(ot); + + _send_linked_list(DRAWOP_TYPE_GPU_IRQ, ot); +} + +void DrawBuffer2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_DMA, buf, length); +} + +void DrawBufferIRQ2(const uint32_t *buf, size_t length) { + _sdk_validate_args_void(buf && length && (length <= 0xffff)); + + _send_buffer(DRAWOP_TYPE_GPU_IRQ, buf, length); +} + +void DrawPrim(const uint32_t *pri) { + _sdk_validate_args_void(pri); + + DrawSync(0); + DrawBuffer2(&pri[1], getlen(pri)); +} + +/* Helper functions */ + +void ClearOTagR(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1]; + DMA_BCR(DMA_OTC) = length & 0xffff; + DMA_CHCR(DMA_OTC) = 0x11000002; + + while (DMA_CHCR(DMA_OTC) & (1 << 24)) + __asm__ volatile(""); +} + +void ClearOTag(uint32_t *ot, size_t length) { + _sdk_validate_args_void(ot && length); + + // DMA6 only supports writing to RAM in reverse order (last to first), so + // the OT has to be cleared in software here. This function is thus much + // slower than ClearOTagR(). + // https://problemkaputt.de/psx-spx.htm#dmachannels + for (int i = 0; i < (length - 1); i++) + ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff; + + ot[length - 1] = 0xffffff; +} + +void AddPrim(uint32_t *ot, const void *pri) { + _sdk_validate_args_void(ot && pri); + + addPrim(ot, pri); +} diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c index c9e6142..236ae4b 100644 --- a/libpsn00b/psxgpu/env.c +++ b/libpsn00b/psxgpu/env.c @@ -34,37 +34,7 @@ static inline uint32_t _get_window_mask(int size) { return mask & 0x1f; } -/* Drawing API */ - -DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { - _sdk_validate_args(env && (w > 0) && (h > 0), 0); - - env->clip.x = x; - env->clip.y = y; - env->clip.w = w; - env->clip.h = h; - - env->ofs[0] = 0; - env->ofs[1] = 0; - - env->tw.x = 0; - env->tw.y = 0; - env->tw.w = 256; - env->tw.h = 256; - - env->tpage = 0x0a; - env->dtd = 1; - env->dfe = 0; - env->isbg = 0; - setRGB0(env, 0, 0, 0); - - env->dr_env.tag = 0; - return env; -} - -int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { - _sdk_validate_args(ot && env, -1); - +static const uint32_t *_build_drawenv_ot(const uint32_t *ot, DRAWENV *env) { // All commands are grouped into a single display list packet for // performance reasons using tagless primitives (the GPU does not care // about the grouping as the display list is parsed by the CPU). @@ -101,7 +71,59 @@ int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { setWH(fill, env->clip.w, _min(env->clip.h, 0x1ff)); } - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0); + return (const uint32_t *) prim; +} + +/* Drawing API */ + +void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot); + +DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { + _sdk_validate_args(env && (w > 0) && (h > 0), 0); + + env->clip.x = x; + env->clip.y = y; + env->clip.w = w; + env->clip.h = h; + + env->ofs[0] = 0; + env->ofs[1] = 0; + + env->tw.x = 0; + env->tw.y = 0; + env->tw.w = 256; + env->tw.h = 256; + + env->tpage = 0x0a; + env->dtd = 1; + env->dfe = 0; + env->isbg = 0; + setRGB0(env, 0, 0, 0); + + env->dr_env.tag = 0; + return env; +} + +int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { + _sdk_validate_args(ot && env, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_DMA, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); +} + +int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env) { + _sdk_validate_args(ot && env, -1); + + return EnqueueDrawOp( + (void *) &_send_linked_list, + (uint32_t) DRAWOP_TYPE_GPU_IRQ, + (uint32_t) _build_drawenv_ot(ot, env), + 0 + ); } void PutDrawEnv(DRAWENV *env) { diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index a08e293..e02c3c2 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -104,7 +104,17 @@ int StoreImage(const RECT *rect, uint32_t *data) { int MoveImage(const RECT *rect, int x, int y) { _sdk_validate_args(rect, -1); - return EnqueueDrawOp((void *) &MoveImage2, (uint32_t) rect, x, y); + int index = _next_saved_rect; + + _saved_rects[index] = *rect; + _next_saved_rect = (index + 1) % QUEUE_LENGTH; + + return EnqueueDrawOp( + (void *) &MoveImage2, + (uint32_t) &_saved_rects[index], + (uint32_t) x, + (uint32_t) y + ); } void LoadImage2(const RECT *rect, const uint32_t *data) { -- cgit v1.2.3