diff options
| author | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-10-18 15:51:52 +0200 |
|---|---|---|
| committer | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-10-18 15:51:52 +0200 |
| commit | b71a55bc489db6bc9beca5cee9cd584e82846ac8 (patch) | |
| tree | 11b668df8f90b92451ef468fa5f01d54c8204e38 | |
| parent | 2f100c78c0f12b56bcd73c203e6216d415d9f772 (diff) | |
| download | psn00bsdk-b71a55bc489db6bc9beca5cee9cd584e82846ac8.tar.gz | |
Add MoveImage(), use draw queue for psxgpu VRAM APIs
| -rw-r--r-- | libpsn00b/include/psxgpu.h | 22 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/common.c | 176 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/env.c | 4 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/image.c | 43 | ||||
| -rw-r--r-- | libpsn00b/psxspu/common.c | 58 |
5 files changed, 181 insertions, 122 deletions
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 0e7ec00..f2568b0 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -488,20 +488,30 @@ void PutDrawEnvFast(DRAWENV *env); int GetODE(void); int VSync(int mode); -int DrawSync(int mode); - void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); + +int EnqueueDrawOp( + void (*func)(uint32_t, uint32_t, uint32_t), + uint32_t arg1, + uint32_t arg2, + uint32_t arg3 +); +int DrawSync(int mode); void *DrawSyncCallback(void (*func)(void)); -void LoadImage(const RECT *rect, const uint32_t *data); -void StoreImage(const RECT *rect, uint32_t *data); +int LoadImage(const RECT *rect, const uint32_t *data); +int StoreImage(const RECT *rect, uint32_t *data); +int MoveImage(const RECT *rect, int x, int y); +void LoadImage2(const RECT *rect, const uint32_t *data); +void StoreImage2(const RECT *rect, uint32_t *data); +void MoveImage2(const RECT *rect, int x, int y); void ClearOTagR(uint32_t *ot, size_t length); void ClearOTag(uint32_t *ot, size_t length); -void DrawOTag(const uint32_t *ot); +int DrawOTag(const uint32_t *ot); +int DrawOTagEnv(const uint32_t *ot, DRAWENV *env); void DrawOTag2(const uint32_t *ot); -void DrawOTagEnv(const uint32_t *ot, DRAWENV *env); void DrawPrim(const uint32_t *pri); void AddPrim(uint32_t *ot, const void *pri); diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index 1e3d9e5..bf70b72 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -16,6 +16,13 @@ static void _default_vsync_halt(void); +/* Private types */ + +typedef struct { + void (*func)(uint32_t, uint32_t, uint32_t); + uint32_t arg1, arg2, arg3; +} QueueEntry; + /* Internal globals */ GPU_VideoMode _gpu_video_mode; @@ -24,10 +31,10 @@ static void (*_vsync_halt_func)(void) = &_default_vsync_halt; static void (*_vsync_callback)(void) = (void *) 0; static void (*_drawsync_callback)(void) = (void *) 0; -static const uint32_t *volatile _draw_queue[QUEUE_LENGTH]; -static volatile uint8_t _queue_head, _queue_tail, _queue_length; -static volatile uint32_t _vblank_counter; -static volatile uint16_t _last_hblank; +static volatile QueueEntry _draw_queue[QUEUE_LENGTH]; +static volatile uint8_t _queue_head, _queue_tail, _queue_length; +static volatile uint32_t _vblank_counter; +static volatile uint16_t _last_hblank; /* Private utilities and interrupt handlers */ @@ -49,11 +56,11 @@ static void _gpu_dma_handler(void) { while (!(GPU_GP1 & (1 << 26))) __asm__ volatile(""); - if (_queue_length) { - DrawOTag2(_draw_queue[_queue_head++]); + if (--_queue_length) { + QueueEntry *entry = &_draw_queue[_queue_head++]; + _queue_head %= QUEUE_LENGTH; - _queue_length--; - _queue_head %= QUEUE_LENGTH; + entry->func(entry->arg1, entry->arg2, entry->arg3); } else { GPU_GP1 = 0x04000000; // Disable DMA request @@ -103,7 +110,7 @@ void ResetGraph(int mode) { _last_hblank = 0; } -/* Syncing API */ +/* VSync() API */ // TODO: add support for no$psx's "halt" register static void _default_vsync_halt(void) { @@ -144,27 +151,6 @@ int VSync(int mode) { return delta; } -int DrawSync(int mode) { - if (mode) - return (DMA_BCR(2) >> 16); - - // Wait for the queue to become empty. - // TODO: add a timeout - while (_queue_length) - __asm__ volatile(""); - - // Wait for any DMA transfer to finish if DMA is enabled. - if (GPU_GP1 & (3 << 29)) { - while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24))) - __asm__ volatile(""); - } - - while (!(GPU_GP1 & (1 << 26))) - __asm__ volatile(""); - - return 0; -} - void *VSyncHaltFunction(void (*func)(void)) { void *old_callback = _vsync_halt_func; _vsync_halt_func = func; @@ -182,6 +168,80 @@ void *VSyncCallback(void (*func)(void)) { return old_callback; } +/* Command queue API */ + +// This function is normally only used internally, but it is exposed for +// advanced use cases. +int EnqueueDrawOp( + void (*func)(uint32_t, uint32_t, uint32_t), + uint32_t arg1, + uint32_t arg2, + uint32_t arg3 +) { + // If GPU DMA is currently busy, append the command to the queue instead of + // executing it immediately. Note that interrupts must be disabled *prior* + // to checking if DMA is busy; disabling them afterwards would create a + // race condition where the DMA transfer could end while interrupts are + // being disabled. Interrupts are disabled through the IRQ_MASK register + // rather than by calling EnterCriticalSection() for performance reasons. + uint16_t mask = IRQ_MASK; + IRQ_MASK = 0; + + if (_queue_length) { + if (_queue_length >= QUEUE_LENGTH) { + IRQ_MASK = mask; + _LOG("psxgpu: draw queue overflow, dropping commands\n"); + return -1; + } + + int length = _queue_length; + _queue_length = length + 1; + + QueueEntry *entry = &_draw_queue[_queue_tail++]; + _queue_tail %= QUEUE_LENGTH; + + entry->func = func; + entry->arg1 = arg1; + entry->arg2 = arg2; + entry->arg3 = arg3; + + IRQ_MASK = mask; + return length; + } + + _queue_length = 1; + + IRQ_MASK = mask; + func(arg1, arg2, arg3); + return 0; +} + +int DrawSync(int mode) { + if (mode) + return _queue_length; + + // Wait for the queue to become empty. + for (int i = VSYNC_TIMEOUT; i; i--) { + if (!_queue_length) + break; + } + + if (!_queue_length) { + // Wait for any DMA transfer to finish if DMA is enabled. + if (GPU_GP1 & (3 << 29)) { + while (!(GPU_GP1 & (1 << 28)) || (DMA_CHCR(2) & (1 << 24))) + __asm__ volatile(""); + } + + while (!(GPU_GP1 & (1 << 26))) + __asm__ volatile(""); + } else { + printf("psxgpu: DrawSync() timeout\n"); + } + + return _queue_length; +} + void *DrawSyncCallback(void (*func)(void)) { EnterCriticalSection(); @@ -214,45 +274,8 @@ void ClearOTag(uint32_t *ot, size_t length) { ot[length - 1] = 0x00ffffff; } -void DrawOTag(const uint32_t *ot) { - // If GPU DMA is currently busy, append the OT to the queue instead of - // drawing it immediately. Note that interrupts must be disabled *prior* to - // checking if DMA is busy; disabling them afterwards would create a race - // condition where the DMA transfer could end while interrupts are being - // disabled. Interrupts are disabled through the IRQ_MASK register rather - // than by calling EnterCriticalSection() for performance reasons. - uint16_t mask = IRQ_MASK; - IRQ_MASK = 0; - - if (DMA_CHCR(2) & (1 << 24)) { - if (_queue_length < QUEUE_LENGTH) { - _draw_queue[_queue_tail++] = ot; - - _queue_length++; - _queue_tail %= QUEUE_LENGTH; - - IRQ_MASK = mask; - return; - } - - IRQ_MASK = mask; - _LOG("psxgpu: DrawOTag() failed, draw queue full\n"); - return; - } - - IRQ_MASK = mask; - DrawOTag2(ot); -} - -void DrawOTag2(const uint32_t *ot) { - GPU_GP1 = 0x04000002; - - while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) - __asm__ volatile(""); - - DMA_MADR(2) = (uint32_t) ot; - DMA_BCR(2) = 0; - DMA_CHCR(2) = 0x01000401; +void AddPrim(uint32_t *ot, const void *pri) { + addPrim(ot, pri); } void DrawPrim(const uint32_t *pri) { @@ -273,8 +296,19 @@ void DrawPrim(const uint32_t *pri) { DMA_CHCR(2) = 0x01000201; } -void AddPrim(uint32_t *ot, const void *pri) { - addPrim(ot, pri); +int DrawOTag(const uint32_t *ot) { + return EnqueueDrawOp(&DrawOTag2, (uint32_t) ot, 0, 0); +} + +void DrawOTag2(const uint32_t *ot) { + GPU_GP1 = 0x04000002; + + while (!(GPU_GP1 & (1 << 26)) || (DMA_CHCR(2) & (1 << 24))) + __asm__ volatile(""); + + DMA_MADR(2) = (uint32_t) ot; + DMA_BCR(2) = 0; + DMA_CHCR(2) = 0x01000401; } /* Misc. functions */ diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c index 5642ad4..1b97026 100644 --- a/libpsn00b/psxgpu/env.c +++ b/libpsn00b/psxgpu/env.c @@ -37,7 +37,7 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { return env; } -void DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { +int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { DR_ENV *prim = &(env->dr_env); // All commands are grouped into a single display list packet for @@ -85,7 +85,7 @@ void DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { //while (!(GPU_GP1 & (1 << 26))) //__asm__ volatile(""); - DrawOTag((const uint32_t *) prim); + return EnqueueDrawOp(&DrawOTag2, (uint32_t) prim, 0, 0); } void PutDrawEnv(DRAWENV *env) { diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index a0d7065..c09a59d 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -18,12 +18,7 @@ #define _LOG(...) printf(__VA_ARGS__) #endif -static void _load_store_image( - uint32_t command, - int mode, - const RECT *rect, - uint32_t *data -) { +static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { size_t length = rect->w * rect->h; if (length % 2) _LOG("psxgpu: can't transfer an odd number of pixels\n"); @@ -34,18 +29,17 @@ static void _load_store_image( length += DMA_CHUNK_LENGTH - 1; } - DrawSync(0); GPU_GP1 = 0x04000000; // Disable DMA request GPU_GP0 = 0x01000000; // Flush cache - GPU_GP0 = command; + GPU_GP0 = write ? 0xa0000000 : 0xc0000000; //GPU_GP0 = rect->x | (rect->y << 16); GPU_GP0 = *((const uint32_t *) &(rect->x)); //GPU_GP0 = rect->w | (rect->h << 16); GPU_GP0 = *((const uint32_t *) &(rect->w)); // Enable DMA request, route to GP0 (2) or from GPU_READ (3) - GPU_GP1 = 0x04000000 | mode; + GPU_GP1 = 0x04000002 | (write ^ 1); DMA_MADR(2) = (uint32_t) data; if (length < DMA_CHUNK_LENGTH) @@ -53,17 +47,38 @@ static void _load_store_image( else DMA_BCR(2) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); - DMA_CHCR(2) = 0x01000200 | ((mode & 1) ^ 1); + DMA_CHCR(2) = 0x01000200 | write; } /* VRAM transfer API */ -void LoadImage(const RECT *rect, const uint32_t *data) { - _load_store_image(0xa0000000, 2, rect, (uint32_t *) data); +int LoadImage(const RECT *rect, const uint32_t *data) { + return EnqueueDrawOp(&_dma_transfer, (uint32_t) rect, (uint32_t) data, 1); } -void StoreImage(const RECT *rect, uint32_t *data) { - _load_store_image(0xc0000000, 3, rect, data); +int StoreImage(const RECT *rect, uint32_t *data) { + return EnqueueDrawOp(&_dma_transfer, (uint32_t) rect, (uint32_t) data, 0); +} + +int MoveImage(const RECT *rect, int x, int y) { + return EnqueueDrawOp(&MoveImage2, (uint32_t) rect, x, y); +} + +void LoadImage2(const RECT *rect, const uint32_t *data) { + _dma_transfer(rect, (uint32_t *) data, 1); +} + +void StoreImage2(const RECT *rect, uint32_t *data) { + _dma_transfer(rect, data, 0); +} + +void MoveImage2(const RECT *rect, int x, int y) { + GPU_GP0 = 0x80000000; + //GPU_GP0 = rect->x | (rect->y << 16); + GPU_GP0 = *((const uint32_t *) &(rect->x)); + GPU_GP0 = (x & 0xffff) | (y << 16); + //GPU_GP0 = rect->w | (rect->h << 16); + GPU_GP0 = *((const uint32_t *) &(rect->w)); } /* .TIM image parsers */ diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c index d6508c7..380bd3d 100644 --- a/libpsn00b/psxspu/common.c +++ b/libpsn00b/psxspu/common.c @@ -34,6 +34,33 @@ static void _wait_status(uint16_t mask, uint16_t value) { _LOG("psxspu: status register timeout (0x%04x)\n", SPU_STAT); } +static void _dma_transfer(uint32_t *data, size_t length, int write) { + if (length % 4) + _LOG("psxspu: can't transfer a number of bytes that isn't multiple of 4\n"); + + length /= 4; + if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { + _LOG("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + length += DMA_CHUNK_LENGTH - 1; + } + + SPU_CTRL &= 0xffcf; // Disable DMA request + _wait_status(0x0030, 0x0000); + + // Enable DMA request for writing (2) or reading (3) + SPU_ADDR = _transfer_addr; + SPU_CTRL |= write ? 0x0020 : 0x0030; + _wait_status(0x0400, 0x0000); + + DMA_MADR(4) = (uint32_t) data; + if (length < DMA_CHUNK_LENGTH) + DMA_BCR(4) = 0x00010000 | length; + else + DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); + + DMA_CHCR(4) = 0x01000200 | write; +} + /* Public API */ void SpuInit(void) { @@ -87,35 +114,8 @@ void SpuInit(void) { SPU_CD_VOL_R = 0x7fff; } -static void _load_store_data(uint32_t *data, size_t length, int mode) { - if (length % 4) - _LOG("psxspu: can't transfer a number of bytes that isn't multiple of 4\n"); - - length /= 4; - if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _LOG("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); - length += DMA_CHUNK_LENGTH - 1; - } - - SPU_CTRL &= 0xffcf; // Disable DMA request - _wait_status(0x0030, 0x0000); - - // Enable DMA request for writing (2) or reading (3) - SPU_ADDR = _transfer_addr; - SPU_CTRL |= mode << 4; - _wait_status(0x0400, 0x0000); - - DMA_MADR(4) = (uint32_t) data; - if (length < DMA_CHUNK_LENGTH) - DMA_BCR(4) = 0x00010000 | length; - else - DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16); - - DMA_CHCR(4) = 0x01000200 | ((mode & 1) ^ 1); -} - void SpuRead(uint32_t *data, size_t size) { - _load_store_data(data, size, 3); + _dma_transfer(data, size, 0); } void SpuWrite(const uint32_t *data, size_t size) { @@ -138,7 +138,7 @@ void SpuWrite(const uint32_t *data, size_t size) { return; } - _load_store_data((uint32_t *) data, size, 2); + _dma_transfer((uint32_t *) data, size, 1); } SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) { |
