aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspicyjpeg <thatspicyjpeg@gmail.com>2023-05-11 23:08:11 +0200
committerspicyjpeg <thatspicyjpeg@gmail.com>2023-05-11 23:08:11 +0200
commit2021cdfca29dc5c98e570a674ac97f92f47a1129 (patch)
treea7355b8852ae4e9d217560b0cab2dcc02ab8c249
parent3b696fc431a9c3f2aa7ea4f27aec20ce5dd67859 (diff)
downloadpsn00bsdk-2021cdfca29dc5c98e570a674ac97f92f47a1129.tar.gz
Add GPU IRQ variants of all display list APIs
-rw-r--r--doc/drawing_queue.md105
-rw-r--r--libpsn00b/include/psxgpu.h5
-rw-r--r--libpsn00b/psxgpu/common.c81
-rw-r--r--libpsn00b/psxgpu/drawing.c148
-rw-r--r--libpsn00b/psxgpu/env.c86
-rw-r--r--libpsn00b/psxgpu/image.c12
6 files changed, 323 insertions, 114 deletions
diff --git a/doc/drawing_queue.md b/doc/drawing_queue.md
new file mode 100644
index 0000000..4fa83f7
--- /dev/null
+++ b/doc/drawing_queue.md
@@ -0,0 +1,105 @@
+
+# GPU drawing queue
+
+`libpsxgpu` manages access to the GPU by implementing a software driven queue.
+This queue, separate from the GPU's internal command FIFO, allows for high-level
+management of GPU operations such as display list sending, VRAM image uploads
+and framebuffer readback, in a similar way to the drawing queue system
+implemented behind the scenes by the official SDK.
+
+The queue is managed internally by the library and can hold up to 16 drawing
+operations ("DrawOps"). Each DrawOp is represented by a pointer to a function,
+alongside any arguments to be passed to it. Whenever the GPU is idle,
+`libpsxgpu` fetches a DrawOp from the queue and calls its respective function,
+which should then proceed to actually send commands to the GPU or set up and
+start a DMA transfer. `DrawSync()` can be called to wait for the queue to become
+empty or get its current length, while `DrawSyncCallback()` may be used to
+register a callback that will be invoked once the GPU is idle and no more
+DrawOps are pending.
+
+Completion of each DrawOp (and transition of the GPU from busy to idle state) is
+signalled through one of two means:
+
+- the DMA channel 2 IRQ, fired automatically by the DMA unit when a data
+ transfer such as a VRAM upload or a display list has finished executing;
+- the GPU IRQ, triggered manually using the `GP0(0x1f)` command or the `DR_IRQ`
+ primitive.
+
+Note that the end of a DMA transfer does not necessarily imply that the GPU has
+finished executing all commands; the last command issued may not yet be done,
+hence the ability to use the GPU IRQ instead is provided as a more reliable way
+to detect the completion of certain commands.
+
+## Built-in DrawOps
+
+The library includes a number of built-in DrawOps for the most common use cases.
+The following APIs are wrappers around DrawOps:
+
+- `DrawBuffer()` and `DrawBufferIRQ()` queue a new DrawOp to start a DMA
+ transfer in chunked mode (sending one word at a time) with the specified
+ starting address and number of words. `DrawBuffer2()` and `DrawBufferIRQ2()`
+ are the underlying DrawOp functions respectively.
+- `DrawOTag()` and `DrawOTagIRQ()` queue a new DrawOp to start a DMA transfer in
+ linked-list mode with the specified starting address, with `DrawOTag2()` and
+ `DrawOTagIRQ2()` being the respective DrawOp functions.
+- `PutDrawEnv()`, `PutDrawEnvFast()`, `DrawOTagEnv()` and `DrawOTagEnvIRQ()`
+ insert drawing environment setup commands as the first (or only) item in a
+ display list, then proceed to pass it to `DrawOTag()`. The setup packet
+ linked into the display list is stored as part of the `DRAWENV` structure.
+- `LoadImage()` and `StoreImage()` copy the provided coordinates into a
+ temporary buffer, then proceed to enqueue a DrawOp to actually start the VRAM
+ transfer. The synchronous variants of these APIs are `LoadImage2()` and
+ `StoreImage2()` respectively.
+- `MoveImage()` saves the provided coordinates into a temporary buffer, then
+ enqueues a DrawOp that will issue a `GP0(0x80)` VRAM blitting command. As
+ this command is handled entirely by the GPU with no DMA transfers involved,
+ the GPU IRQ is used to detect its completion.
+
+## Custom DrawOps
+
+Unlike the official SDK, `libpsxgpu` exposes the drawing queue by providing a
+way to enqueue arbitrary custom DrawOps. This can be useful for profiling
+purposes or to work around specific GPU bugs (see the use cases section).
+
+Custom DrawOps can be pushed into the queue by calling `EnqueueDrawOp()` and
+passing a pointer to the callback function in charge of issuing the DrawOp's
+commands to the GPU, as well as up to 3 arguments to be passed through to it.
+The function must:
+
+- call `SetDrawOpType()` to let the library know which type of IRQ it shall wait
+ for before moving onto the next DrawOp (either `DRAWOP_TYPE_DMA` or
+ `DRAWOP_TYPE_GPU_IRQ`);
+- wait until the GPU is ready to accept commands by polling the status bits in
+ `GPU_STAT` and make sure DMA channel 2 is also idle before proceeding;
+- issue any commands to the GPU's GP0 register and/or set up a DMA transfer,
+ terminating them with a `GP0(0x1f)` IRQ command if appropriate.
+
+Note that DrawOps are called from within the exception handler's context and
+must thus not block for significant periods of time, manipulate COP0 registers
+or wait for any IRQs to occur. They are also restricted from manipulating the
+drawing queue by e.g. calling `EnqueueDrawOp()`, `DrawOTag()` or any other
+function that enqueues a DrawOp.
+
+## Use cases
+
+### Scissoring commands
+
+The GPU provides commands to set the origin of all X/Y coordinates passed to it
+as well as a scissoring region, all pixels outside of which are automatically
+masked out during drawing. These commands are issued to the GP0 register and can
+be inserted in a display list through the `DR_OFFSET` and `DR_AREA` primitives,
+however they will *not* go through the GPU's command FIFO like most other
+primitives. They will instead take effect immediately, resulting in graphical
+glitches if the GPU is already busy processing a drawing command (i.e. if they
+are not the very first commands in a display list).
+
+The software-driven drawing queue provides a way around this. By splitting up a
+frame's display list into multiple chunks, one for each scissoring command
+issued, it is possible to always place scissoring commands at the beginning of a
+chunk. Each chunk can be terminated with a `DR_IRQ` primitive and queued for
+drawing using `DrawOTagIRQ()` to ensure the GPU goes idle before the next chunk
+is sent, preventing scissoring commands from being received by the GPU while
+busy.
+
+-----------------------------------------
+_Last updated on 2023-05-11 by spicyjpeg_
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h
index 2e9e4eb..d7f1ad5 100644
--- a/libpsn00b/include/psxgpu.h
+++ b/libpsn00b/include/psxgpu.h
@@ -606,10 +606,15 @@ void MoveImage2(const RECT *rect, int x, int y);
void ClearOTagR(uint32_t *ot, size_t length);
void ClearOTag(uint32_t *ot, size_t length);
int DrawOTag(const uint32_t *ot);
+int DrawOTagIRQ(const uint32_t *ot);
int DrawOTagEnv(const uint32_t *ot, DRAWENV *env);
+int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env);
void DrawOTag2(const uint32_t *ot);
+void DrawOTagIRQ2(const uint32_t *ot);
int DrawBuffer(const uint32_t *buf, size_t length);
+int DrawBufferIRQ(const uint32_t *buf, size_t length);
void DrawBuffer2(const uint32_t *buf, size_t length);
+void DrawBufferIRQ2(const uint32_t *buf, size_t length);
void DrawPrim(const uint32_t *pri);
void AddPrim(uint32_t *ot, const void *pri);
diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c
index 8af6c5b..7e0758b 100644
--- a/libpsn00b/psxgpu/common.c
+++ b/libpsn00b/psxgpu/common.c
@@ -270,87 +270,6 @@ void *DrawSyncCallback(void (*func)(void)) {
return old_callback;
}
-/* OT and primitive drawing API */
-
-void ClearOTagR(uint32_t *ot, size_t length) {
- _sdk_validate_args_void(ot && length);
-
- DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1];
- DMA_BCR(DMA_OTC) = length & 0xffff;
- DMA_CHCR(DMA_OTC) = 0x11000002;
-
- while (DMA_CHCR(DMA_OTC) & (1 << 24))
- __asm__ volatile("");
-}
-
-void ClearOTag(uint32_t *ot, size_t length) {
- _sdk_validate_args_void(ot && length);
-
- // DMA6 only supports writing to RAM in reverse order (last to first), so
- // the OT has to be cleared in software here. This function is thus much
- // slower than ClearOTagR().
- // https://problemkaputt.de/psx-spx.htm#dmachannels
- for (int i = 0; i < (length - 1); i++)
- ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff;
- //setaddr(&ot[i], &ot[i + 1]);
-
- ot[length - 1] = 0xffffff;
- //termPrim(&ot[length - 1]);
-}
-
-void AddPrim(uint32_t *ot, const void *pri) {
- _sdk_validate_args_void(ot && pri);
-
- addPrim(ot, pri);
-}
-
-int DrawOTag(const uint32_t *ot) {
- _sdk_validate_args(ot, -1);
-
- return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) ot, 0, 0);
-}
-
-void DrawOTag2(const uint32_t *ot) {
- _sdk_validate_args_void(ot);
-
- SetDrawOpType(DRAWOP_TYPE_DMA);
- GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
-
- while (DMA_CHCR(DMA_GPU) & (1 << 24))
- __asm__ volatile("");
-
- DMA_MADR(DMA_GPU) = (uint32_t) ot;
- DMA_BCR(DMA_GPU) = 0;
- DMA_CHCR(DMA_GPU) = 0x01000401;
-}
-
-int DrawBuffer(const uint32_t *buf, size_t length) {
- _sdk_validate_args(buf && length && (length <= 0xffff), -1);
-
- return EnqueueDrawOp((void *) &DrawBuffer2, (uint32_t) buf, length, 0);
-}
-
-void DrawBuffer2(const uint32_t *buf, size_t length) {
- _sdk_validate_args_void(buf && length && (length <= 0xffff));
-
- SetDrawOpType(DRAWOP_TYPE_DMA);
- GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
-
- while (DMA_CHCR(DMA_GPU) & (1 << 24))
- __asm__ volatile("");
-
- DMA_MADR(DMA_GPU) = (uint32_t) buf;
- DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16);
- DMA_CHCR(DMA_GPU) = 0x01000201;
-}
-
-void DrawPrim(const uint32_t *pri) {
- _sdk_validate_args_void(pri);
-
- DrawSync(0);
- DrawBuffer2(&pri[1], getlen(pri));
-}
-
/* Queue pause/resume API */
int IsIdleGPU(int timeout) {
diff --git a/libpsn00b/psxgpu/drawing.c b/libpsn00b/psxgpu/drawing.c
new file mode 100644
index 0000000..161b2f7
--- /dev/null
+++ b/libpsn00b/psxgpu/drawing.c
@@ -0,0 +1,148 @@
+/*
+ * PSn00bSDK GPU library (drawing/display list functions)
+ * (C) 2022-2023 spicyjpeg - MPL licensed
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <psxetc.h>
+#include <psxgpu.h>
+#include <hwregs_c.h>
+
+/* Private utilities */
+
+// This function is actually referenced in env.c as well, so it can't be static.
+void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot) {
+ SetDrawOpType(type);
+ GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
+
+ while (DMA_CHCR(DMA_GPU) & (1 << 24))
+ __asm__ volatile("");
+
+ DMA_MADR(DMA_GPU) = (uint32_t) ot;
+ DMA_BCR(DMA_GPU) = 0;
+ DMA_CHCR(DMA_GPU) = 0x01000401;
+}
+
+static void _send_buffer(
+ GPU_DrawOpType type, const uint32_t *buf, size_t length
+) {
+ SetDrawOpType(type);
+ GPU_GP1 = 0x04000002; // Enable DMA request, route to GP0
+
+ while (DMA_CHCR(DMA_GPU) & (1 << 24))
+ __asm__ volatile("");
+
+ DMA_MADR(DMA_GPU) = (uint32_t) buf;
+ DMA_BCR(DMA_GPU) = 0x00000001 | (length << 16);
+ DMA_CHCR(DMA_GPU) = 0x01000201;
+}
+
+/* Buffer and primitive drawing API */
+
+int DrawOTag(const uint32_t *ot) {
+ _sdk_validate_args(ot, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) ot,
+ 0
+ );
+}
+
+int DrawOTagIRQ(const uint32_t *ot) {
+ _sdk_validate_args(ot, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) ot,
+ 0
+ );
+}
+
+int DrawBuffer(const uint32_t *buf, size_t length) {
+ _sdk_validate_args(buf && length && (length <= 0xffff), -1);
+
+ return EnqueueDrawOp(
+ (void *) &DrawBuffer2,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) buf,
+ (uint32_t) length
+ );
+}
+
+int DrawBufferIRQ(const uint32_t *buf, size_t length) {
+ _sdk_validate_args(buf && length && (length <= 0xffff), -1);
+
+ return EnqueueDrawOp(
+ (void *) &DrawBuffer2,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) buf,
+ (uint32_t) length
+ );
+}
+
+void DrawOTag2(const uint32_t *ot) {
+ _sdk_validate_args_void(ot);
+
+ _send_linked_list(DRAWOP_TYPE_DMA, ot);
+}
+
+void DrawOTagIRQ2(const uint32_t *ot) {
+ _sdk_validate_args_void(ot);
+
+ _send_linked_list(DRAWOP_TYPE_GPU_IRQ, ot);
+}
+
+void DrawBuffer2(const uint32_t *buf, size_t length) {
+ _sdk_validate_args_void(buf && length && (length <= 0xffff));
+
+ _send_buffer(DRAWOP_TYPE_DMA, buf, length);
+}
+
+void DrawBufferIRQ2(const uint32_t *buf, size_t length) {
+ _sdk_validate_args_void(buf && length && (length <= 0xffff));
+
+ _send_buffer(DRAWOP_TYPE_GPU_IRQ, buf, length);
+}
+
+void DrawPrim(const uint32_t *pri) {
+ _sdk_validate_args_void(pri);
+
+ DrawSync(0);
+ DrawBuffer2(&pri[1], getlen(pri));
+}
+
+/* Helper functions */
+
+void ClearOTagR(uint32_t *ot, size_t length) {
+ _sdk_validate_args_void(ot && length);
+
+ DMA_MADR(DMA_OTC) = (uint32_t) &ot[length - 1];
+ DMA_BCR(DMA_OTC) = length & 0xffff;
+ DMA_CHCR(DMA_OTC) = 0x11000002;
+
+ while (DMA_CHCR(DMA_OTC) & (1 << 24))
+ __asm__ volatile("");
+}
+
+void ClearOTag(uint32_t *ot, size_t length) {
+ _sdk_validate_args_void(ot && length);
+
+ // DMA6 only supports writing to RAM in reverse order (last to first), so
+ // the OT has to be cleared in software here. This function is thus much
+ // slower than ClearOTagR().
+ // https://problemkaputt.de/psx-spx.htm#dmachannels
+ for (int i = 0; i < (length - 1); i++)
+ ot[i] = (uint32_t) &ot[i + 1] & 0x7fffff;
+
+ ot[length - 1] = 0xffffff;
+}
+
+void AddPrim(uint32_t *ot, const void *pri) {
+ _sdk_validate_args_void(ot && pri);
+
+ addPrim(ot, pri);
+}
diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c
index c9e6142..236ae4b 100644
--- a/libpsn00b/psxgpu/env.c
+++ b/libpsn00b/psxgpu/env.c
@@ -34,37 +34,7 @@ static inline uint32_t _get_window_mask(int size) {
return mask & 0x1f;
}
-/* Drawing API */
-
-DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) {
- _sdk_validate_args(env && (w > 0) && (h > 0), 0);
-
- env->clip.x = x;
- env->clip.y = y;
- env->clip.w = w;
- env->clip.h = h;
-
- env->ofs[0] = 0;
- env->ofs[1] = 0;
-
- env->tw.x = 0;
- env->tw.y = 0;
- env->tw.w = 256;
- env->tw.h = 256;
-
- env->tpage = 0x0a;
- env->dtd = 1;
- env->dfe = 0;
- env->isbg = 0;
- setRGB0(env, 0, 0, 0);
-
- env->dr_env.tag = 0;
- return env;
-}
-
-int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
- _sdk_validate_args(ot && env, -1);
-
+static const uint32_t *_build_drawenv_ot(const uint32_t *ot, DRAWENV *env) {
// All commands are grouped into a single display list packet for
// performance reasons using tagless primitives (the GPU does not care
// about the grouping as the display list is parsed by the CPU).
@@ -101,7 +71,59 @@ int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
setWH(fill, env->clip.w, _min(env->clip.h, 0x1ff));
}
- return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0);
+ return (const uint32_t *) prim;
+}
+
+/* Drawing API */
+
+void _send_linked_list(GPU_DrawOpType type, const uint32_t *ot);
+
+DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) {
+ _sdk_validate_args(env && (w > 0) && (h > 0), 0);
+
+ env->clip.x = x;
+ env->clip.y = y;
+ env->clip.w = w;
+ env->clip.h = h;
+
+ env->ofs[0] = 0;
+ env->ofs[1] = 0;
+
+ env->tw.x = 0;
+ env->tw.y = 0;
+ env->tw.w = 256;
+ env->tw.h = 256;
+
+ env->tpage = 0x0a;
+ env->dtd = 1;
+ env->dfe = 0;
+ env->isbg = 0;
+ setRGB0(env, 0, 0, 0);
+
+ env->dr_env.tag = 0;
+ return env;
+}
+
+int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
+ _sdk_validate_args(ot && env, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_DMA,
+ (uint32_t) _build_drawenv_ot(ot, env),
+ 0
+ );
+}
+
+int DrawOTagEnvIRQ(const uint32_t *ot, DRAWENV *env) {
+ _sdk_validate_args(ot && env, -1);
+
+ return EnqueueDrawOp(
+ (void *) &_send_linked_list,
+ (uint32_t) DRAWOP_TYPE_GPU_IRQ,
+ (uint32_t) _build_drawenv_ot(ot, env),
+ 0
+ );
}
void PutDrawEnv(DRAWENV *env) {
diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c
index a08e293..e02c3c2 100644
--- a/libpsn00b/psxgpu/image.c
+++ b/libpsn00b/psxgpu/image.c
@@ -104,7 +104,17 @@ int StoreImage(const RECT *rect, uint32_t *data) {
int MoveImage(const RECT *rect, int x, int y) {
_sdk_validate_args(rect, -1);
- return EnqueueDrawOp((void *) &MoveImage2, (uint32_t) rect, x, y);
+ int index = _next_saved_rect;
+
+ _saved_rects[index] = *rect;
+ _next_saved_rect = (index + 1) % QUEUE_LENGTH;
+
+ return EnqueueDrawOp(
+ (void *) &MoveImage2,
+ (uint32_t) &_saved_rects[index],
+ (uint32_t) x,
+ (uint32_t) y
+ );
}
void LoadImage2(const RECT *rect, const uint32_t *data) {