aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspicyjpeg <thatspicyjpeg@gmail.com>2022-10-18 15:51:52 +0200
committerspicyjpeg <thatspicyjpeg@gmail.com>2022-10-18 15:51:52 +0200
commitb71a55bc489db6bc9beca5cee9cd584e82846ac8 (patch)
tree11b668df8f90b92451ef468fa5f01d54c8204e38
parent2f100c78c0f12b56bcd73c203e6216d415d9f772 (diff)
downloadpsn00bsdk-b71a55bc489db6bc9beca5cee9cd584e82846ac8.tar.gz
Add MoveImage(), use draw queue for psxgpu VRAM APIs
-rw-r--r--libpsn00b/include/psxgpu.h22
-rw-r--r--libpsn00b/psxgpu/common.c176
-rw-r--r--libpsn00b/psxgpu/env.c4
-rw-r--r--libpsn00b/psxgpu/image.c43
-rw-r--r--libpsn00b/psxspu/common.c58
5 files changed, 181 insertions, 122 deletions
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h
index 0e7ec00..f2568b0 100644
--- a/libpsn00b/include/psxgpu.h
+++ b/libpsn00b/include/psxgpu.h
@@ -488,20 +488,30 @@ void PutDrawEnvFast(DRAWENV *env);
int GetODE(void);
int VSync(int mode);
-int DrawSync(int mode);
-
void *VSyncHaltFunction(void (*func)(void));
void *VSyncCallback(void (*func)(void));
+
+int EnqueueDrawOp(
+ void (*func)(uint32_t, uint32_t, uint32_t),
+ uint32_t arg1,
+ uint32_t arg2,
+ uint32_t arg3
+);
+int DrawSync(int mode);
void *DrawSyncCallback(void (*func)(void));
-void LoadImage(const RECT *rect, const uint32_t *data);
-void StoreImage(const RECT *rect, uint32_t *data);
+int LoadImage(const RECT *rect, const uint32_t *data);
+int StoreImage(const RECT *rect, uint32_t *data);
+int MoveImage(const RECT *rect, int x, int y);
+void LoadImage2(const RECT *rect, const uint32_t *data);
+void StoreImage2(const RECT *rect, uint32_t *data);
+void MoveImage2(const RECT *rect, int x, int y);
void ClearOTagR(uint32_t *ot, size_t length);
void ClearOTag(uint32_t *ot, size_t length);
-void DrawOTag(const uint32_t *ot);
+int DrawOTag(const uint32_t *ot);
+int DrawOTagEnv(const uint32_t *ot, DRAWENV *env);
void DrawOTag2(const uint32_t *ot);
-void DrawOTagEnv(const uint32_t *ot, DRAWENV *env);
void DrawPrim(const uint32_t *pri);
void AddPrim(uint32_t *ot, const void *pri);
diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c
index 1e3d9e5..bf70b72 100644
--- a/libpsn00b/psxgpu/common.c
+++ b/libpsn00b/psxgpu/common.c
@@ -16,6 +16,13 @@
static void _default_vsync_halt(void);
+/* Private types */
+
+typedef struct {
+ void (*func)(uint32_t, uint32_t, uint32_t);
+ uint32_t arg1, arg2, arg3;
+} QueueEntry;
+
/* Internal globals */
GPU_VideoMode _gpu_video_mode;
@@ -24,10 +31,10 @@ static void (*_vsync_halt_func)(void) = &_default_vsync_halt;
static void (*_vsync_callback)(void) = (void *) 0;
static void (*_drawsync_callback)(void) = (void *) 0;
-static const uint32_t *volatile _draw_queue[QUEUE_LENGTH];
-static volatile uint8_t _queue_head, _queue_tail, _queue_length;
-static volatile uint32_t _vblank_counter;
-static volatile uint16_t _last_hblank;
+static volatile QueueEntry _draw_queue[QUEUE_LENGTH];
+static volatile uint8_t _queue_head, _queue_tail, _queue_length;
+static volatile uint32_t _vblank_counter;
+static volatile uint16_t _last_hblank;
/* Private utilities and interrupt handlers */
@@ -49,11 +56,11 @@ static void _gpu_dma_handler(void) {
while (!(GPU_GP1 & (1 << 26)))
__asm__ volatile("");
- if (_queue_length) {
- DrawOTag2(_draw_queue[_queue_head++]);
+ if (--_queue_length) {
+ QueueEntry *entry = &_draw_queue[_queue_head++];
+ _queue_head %= QUEUE_LENGTH;
- _queue_length--;
- _queue_head %= QUEUE_LENGTH;
+ entry->func(entry->arg1, entry->arg2, entry->arg3);
} else {
GPU_GP1 = 0x04000000; // Disable DMA request
@@ -103,7 +110,7 @@ void ResetGraph(int mode) {
_last_hblank = 0;
}
-/* Syncing API */
+/* VSync() API */
// TODO: add support for no$psx's "halt" register
static void _default_vsync_halt(void) {
@@ -144,27 +151,6 @@ int VSync(int mode) {
return delta;
}
-int DrawSync(int mode) {
- if (mode)
- return (DMA_BCR(2) >> 16);
-
- // Wait for the queue to become empty.
- // TODO: add a timeout
- while (_queue_length)
- __asm__ volatile("");
-
- // Wait for any DMA transfer to finish if DMA is enabled.
- if (GPU_GP1 & (3 << 29)) {
- while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24)))
- __asm__ volatile("");
- }
-
- while (!(GPU_GP1 & (1 << 26)))
- __asm__ volatile("");
-
- return 0;
-}
-
void *VSyncHaltFunction(void (*func)(void)) {
void *old_callback = _vsync_halt_func;
_vsync_halt_func = func;
@@ -182,6 +168,80 @@ void *VSyncCallback(void (*func)(void)) {
return old_callback;
}
+/* Command queue API */
+
+// This function is normally only used internally, but it is exposed for
+// advanced use cases.
+int EnqueueDrawOp(
+ void (*func)(uint32_t, uint32_t, uint32_t),
+ uint32_t arg1,
+ uint32_t arg2,
+ uint32_t arg3
+) {
+ // If GPU DMA is currently busy, append the command to the queue instead of
+ // executing it immediately. Note that interrupts must be disabled *prior*
+ // to checking if DMA is busy; disabling them afterwards would create a
+ // race condition where the DMA transfer could end while interrupts are
+ // being disabled. Interrupts are disabled through the IRQ_MASK register
+ // rather than by calling EnterCriticalSection() for performance reasons.
+ uint16_t mask = IRQ_MASK;
+ IRQ_MASK = 0;
+
+ if (_queue_length) {
+ if (_queue_length >= QUEUE_LENGTH) {
+ IRQ_MASK = mask;
+ _LOG("psxgpu: draw queue overflow, dropping commands\n");
+ return -1;
+ }
+
+ int length = _queue_length;
+ _queue_length = length + 1;
+
+ QueueEntry *entry = &_draw_queue[_queue_tail++];
+ _queue_tail %= QUEUE_LENGTH;
+
+ entry->func = func;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ entry->arg3 = arg3;
+
+ IRQ_MASK = mask;
+ return length;
+ }
+
+ _queue_length = 1;
+
+ IRQ_MASK = mask;
+ func(arg1, arg2, arg3);
+ return 0;
+}
+
+int DrawSync(int mode) {
+ if (mode)
+ return _queue_length;
+
+ // Wait for the queue to become empty.
+ for (int i = VSYNC_TIMEOUT; i; i--) {
+ if (!_queue_length)
+ break;
+ }
+
+ if (!_queue_length) {
+ // Wait for any DMA transfer to finish if DMA is enabled.
+ if (GPU_GP1 & (3 << 29)) {
+ while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24)))
+ __asm__ volatile("");
+ }
+
+ while (!(GPU_GP1 & (1 << 26)))
+ __asm__ volatile("");
+ } else {
+ printf("psxgpu: DrawSync() timeout\n");
+ }
+
+ return _queue_length;
+}
+
void *DrawSyncCallback(void (*func)(void)) {
EnterCriticalSection();
@@ -214,45 +274,8 @@ void ClearOTag(uint32_t *ot, size_t length) {
ot[length - 1] = 0x00ffffff;
}
-void DrawOTag(const uint32_t *ot) {
- // If GPU DMA is currently busy, append the OT to the queue instead of
- // drawing it immediately. Note that interrupts must be disabled *prior* to
- // checking if DMA is busy; disabling them afterwards would create a race
- // condition where the DMA transfer could end while interrupts are being
- // disabled. Interrupts are disabled through the IRQ_MASK register rather
- // than by calling EnterCriticalSection() for performance reasons.
- uint16_t mask = IRQ_MASK;
- IRQ_MASK = 0;
-
- if (DMA_CHCR(2) & (1 << 24)) {
- if (_queue_length < QUEUE_LENGTH) {
- _draw_queue[_queue_tail++] = ot;
-
- _queue_length++;
- _queue_tail %= QUEUE_LENGTH;
-
- IRQ_MASK = mask;
- return;
- }
-
- IRQ_MASK = mask;
- _LOG("psxgpu: DrawOTag() failed, draw queue full\n");
- return;
- }
-
- IRQ_MASK = mask;
- DrawOTag2(ot);
-}
-
-void DrawOTag2(const uint32_t *ot) {
- GPU_GP1 = 0x04000002;
-
- while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24)))
- __asm__ volatile("");
-
- DMA_MADR(2) = (uint32_t) ot;
- DMA_BCR(2) = 0;
- DMA_CHCR(2) = 0x01000401;
+void AddPrim(uint32_t *ot, const void *pri) {
+ addPrim(ot, pri);
}
void DrawPrim(const uint32_t *pri) {
@@ -273,8 +296,19 @@ void DrawPrim(const uint32_t *pri) {
DMA_CHCR(2) = 0x01000201;
}
-void AddPrim(uint32_t *ot, const void *pri) {
- addPrim(ot, pri);
+int DrawOTag(const uint32_t *ot) {
+ return EnqueueDrawOp(&DrawOTag2, (uint32_t) ot, 0, 0);
+}
+
+void DrawOTag2(const uint32_t *ot) {
+ GPU_GP1 = 0x04000002;
+
+ while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24)))
+ __asm__ volatile("");
+
+ DMA_MADR(2) = (uint32_t) ot;
+ DMA_BCR(2) = 0;
+ DMA_CHCR(2) = 0x01000401;
}
/* Misc. functions */
diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c
index 5642ad4..1b97026 100644
--- a/libpsn00b/psxgpu/env.c
+++ b/libpsn00b/psxgpu/env.c
@@ -37,7 +37,7 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) {
return env;
}
-void DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
+int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
DR_ENV *prim = &(env->dr_env);
// All commands are grouped into a single display list packet for
@@ -85,7 +85,7 @@ void DrawOTagEnv(const uint32_t *ot, DRAWENV *env) {
//while (!(GPU_GP1 & (1 << 26)))
//__asm__ volatile("");
- DrawOTag((const uint32_t *) prim);
+ return EnqueueDrawOp(&DrawOTag2, (uint32_t) prim, 0, 0);
}
void PutDrawEnv(DRAWENV *env) {
diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c
index a0d7065..c09a59d 100644
--- a/libpsn00b/psxgpu/image.c
+++ b/libpsn00b/psxgpu/image.c
@@ -18,12 +18,7 @@
#define _LOG(...) printf(__VA_ARGS__)
#endif
-static void _load_store_image(
- uint32_t command,
- int mode,
- const RECT *rect,
- uint32_t *data
-) {
+static void _dma_transfer(const RECT *rect, uint32_t *data, int write) {
size_t length = rect->w * rect->h;
if (length % 2)
_LOG("psxgpu: can't transfer an odd number of pixels\n");
@@ -34,18 +29,17 @@ static void _load_store_image(
length += DMA_CHUNK_LENGTH - 1;
}
- DrawSync(0);
GPU_GP1 = 0x04000000; // Disable DMA request
GPU_GP0 = 0x01000000; // Flush cache
- GPU_GP0 = command;
+ GPU_GP0 = write ? 0xa0000000 : 0xc0000000;
//GPU_GP0 = rect->x | (rect->y << 16);
GPU_GP0 = *((const uint32_t *) &(rect->x));
//GPU_GP0 = rect->w | (rect->h << 16);
GPU_GP0 = *((const uint32_t *) &(rect->w));
// Enable DMA request, route to GP0 (2) or from GPU_READ (3)
- GPU_GP1 = 0x04000000 | mode;
+ GPU_GP1 = 0x04000002 | (write ^ 1);
DMA_MADR(2) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
@@ -53,17 +47,38 @@ static void _load_store_image(
else
DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
- DMA_CHCR(2) = 0x01000200 | ((mode & 1) ^ 1);
+ DMA_CHCR(2) = 0x01000200 | write;
}
/* VRAM transfer API */
-void LoadImage(const RECT *rect, const uint32_t *data) {
- _load_store_image(0xa0000000, 2, rect, (uint32_t *) data);
+int LoadImage(const RECT *rect, const uint32_t *data) {
+ return EnqueueDrawOp(&_dma_transfer, (uint32_t) rect, (uint32_t) data, 1);
}
-void StoreImage(const RECT *rect, uint32_t *data) {
- _load_store_image(0xc0000000, 3, rect, data);
+int StoreImage(const RECT *rect, uint32_t *data) {
+ return EnqueueDrawOp(&_dma_transfer, (uint32_t) rect, (uint32_t) data, 0);
+}
+
+int MoveImage(const RECT *rect, int x, int y) {
+ return EnqueueDrawOp(&MoveImage2, (uint32_t) rect, x, y);
+}
+
+void LoadImage2(const RECT *rect, const uint32_t *data) {
+ _dma_transfer(rect, (uint32_t *) data, 1);
+}
+
+void StoreImage2(const RECT *rect, uint32_t *data) {
+ _dma_transfer(rect, data, 0);
+}
+
+void MoveImage2(const RECT *rect, int x, int y) {
+ GPU_GP0 = 0x80000000;
+ //GPU_GP0 = rect->x | (rect->y << 16);
+ GPU_GP0 = *((const uint32_t *) &(rect->x));
+ GPU_GP0 = (x & 0xffff) | (y << 16);
+ //GPU_GP0 = rect->w | (rect->h << 16);
+ GPU_GP0 = *((const uint32_t *) &(rect->w));
}
/* .TIM image parsers */
diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c
index d6508c7..380bd3d 100644
--- a/libpsn00b/psxspu/common.c
+++ b/libpsn00b/psxspu/common.c
@@ -34,6 +34,33 @@ static void _wait_status(uint16_t mask, uint16_t value) {
_LOG("psxspu: status register timeout (0x%04x)\n", SPU_STAT);
}
+static void _dma_transfer(uint32_t *data, size_t length, int write) {
+ if (length % 4)
+ _LOG("psxspu: can't transfer a number of bytes that isn't multiple of 4\n");
+
+ length /= 4;
+ if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
+ _LOG("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
+ length += DMA_CHUNK_LENGTH - 1;
+ }
+
+ SPU_CTRL &= 0xffcf; // Disable DMA request
+ _wait_status(0x0030, 0x0000);
+
+ // Enable DMA request for writing (2) or reading (3)
+ SPU_ADDR = _transfer_addr;
+ SPU_CTRL |= write ? 0x0020 : 0x0030;
+ _wait_status(0x0400, 0x0000);
+
+ DMA_MADR(4) = (uint32_t) data;
+ if (length < DMA_CHUNK_LENGTH)
+ DMA_BCR(4) = 0x00010000 | length;
+ else
+ DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
+
+ DMA_CHCR(4) = 0x01000200 | write;
+}
+
/* Public API */
void SpuInit(void) {
@@ -87,35 +114,8 @@ void SpuInit(void) {
SPU_CD_VOL_R = 0x7fff;
}
-static void _load_store_data(uint32_t *data, size_t length, int mode) {
- if (length % 4)
- _LOG("psxspu: can't transfer a number of bytes that isn't multiple of 4\n");
-
- length /= 4;
- if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
- _LOG("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
- length += DMA_CHUNK_LENGTH - 1;
- }
-
- SPU_CTRL &= 0xffcf; // Disable DMA request
- _wait_status(0x0030, 0x0000);
-
- // Enable DMA request for writing (2) or reading (3)
- SPU_ADDR = _transfer_addr;
- SPU_CTRL |= mode << 4;
- _wait_status(0x0400, 0x0000);
-
- DMA_MADR(4) = (uint32_t) data;
- if (length < DMA_CHUNK_LENGTH)
- DMA_BCR(4) = 0x00010000 | length;
- else
- DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
-
- DMA_CHCR(4) = 0x01000200 | ((mode & 1) ^ 1);
-}
-
void SpuRead(uint32_t *data, size_t size) {
- _load_store_data(data, size, 3);
+ _dma_transfer(data, size, 0);
}
void SpuWrite(const uint32_t *data, size_t size) {
@@ -138,7 +138,7 @@ void SpuWrite(const uint32_t *data, size_t size) {
return;
}
- _load_store_data((uint32_t *) data, size, 2);
+ _dma_transfer((uint32_t *) data, size, 1);
}
SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) {