diff options
| author | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-12-18 14:00:52 +0100 |
|---|---|---|
| committer | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-12-18 14:00:52 +0100 |
| commit | 3b7c46ab74548a9a79bfb867551c51dd877c8f4d (patch) | |
| tree | b001e9875385df3a917e51399e2d20ee559d65b0 | |
| parent | 70833192a803061008d2221b27e9baada6042c90 (diff) | |
| download | psn00bsdk-3b7c46ab74548a9a79bfb867551c51dd877c8f4d.tar.gz | |
Misc. bugfixes, add support for DRAWENV texture windows
| -rw-r--r-- | libpsn00b/include/psxgpu.h | 15 | ||||
| -rw-r--r-- | libpsn00b/lzp/lzp.c | 1 | ||||
| -rw-r--r-- | libpsn00b/lzp/qlp.c | 1 | ||||
| -rw-r--r-- | libpsn00b/psxetc/dl.c | 10 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/common.c | 4 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/env.c | 99 | ||||
| -rw-r--r-- | libpsn00b/psxgpu/image.c | 36 | ||||
| -rw-r--r-- | libpsn00b/psxpress/mdec.c | 4 |
8 files changed, 115 insertions, 55 deletions
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index b0c5302..26e560f 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -39,6 +39,11 @@ typedef enum _GPU_VideoMode { #define setTPage(p, tp, abr, x, y) ((p)->tpage = getTPage(tp, abr, x, y)) #define setClut(p, x, y) ((p)->clut = getClut(x, y)) +#define setColor0(p, rgb) (((P_COLOR *) &((p)->r0))->color = (rgb)) +#define setColor1(p, rgb) (((P_COLOR *) &((p)->r1))->color = (rgb)) +#define setColor2(p, rgb) (((P_COLOR *) &((p)->r2))->color = (rgb)) +#define setColor3(p, rgb) (((P_COLOR *) &((p)->r3))->color = (rgb)) + #define setRGB0(p, r, g, b) ((p)->r0 = (r), (p)->g0 = (g), (p)->b0 = (b)) #define setRGB1(p, r, g, b) ((p)->r1 = (r), (p)->g1 = (g), (p)->b1 = (b)) #define setRGB2(p, r, g, b) ((p)->r2 = (r), (p)->g2 = (g), (p)->b2 = (b)) @@ -198,9 +203,15 @@ typedef enum _GPU_VideoMode { typedef struct _P_TAG { uint32_t addr:24; uint32_t len:8; - uint8_t r, g, b, code; + uint32_t color:24; + uint32_t code:8; } P_TAG; +typedef struct _P_COLOR { + uint32_t color:24; + uint32_t pad:8; +} P_COLOR; + typedef struct _POLY_F3 { uint32_t tag; uint8_t r0, g0, b0, code; @@ -402,7 +413,7 @@ typedef struct _SPRT_FIXED SPRT_16; typedef struct _DR_ENV { uint32_t tag; - uint32_t code[15]; + uint32_t code[8]; } DR_ENV; typedef struct _DR_AREA { diff --git a/libpsn00b/lzp/lzp.c b/libpsn00b/lzp/lzp.c index 9f2da48..3ef5b24 100644 --- a/libpsn00b/lzp/lzp.c +++ b/libpsn00b/lzp/lzp.c @@ -1,3 +1,4 @@ +#include <stddef.h> #include <stdio.h> #include <string.h> #include <ctype.h> diff --git a/libpsn00b/lzp/qlp.c b/libpsn00b/lzp/qlp.c index e54f99f..1154a65 100644 --- a/libpsn00b/lzp/qlp.c +++ b/libpsn00b/lzp/qlp.c @@ -1,3 +1,4 @@ +#include <stddef.h> #include <stdio.h> #include <string.h> #include <ctype.h> diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c index ec1e0c4..ff712eb 100644 --- a/libpsn00b/psxetc/dl.c +++ b/libpsn00b/psxetc/dl.c @@ -242,7 +242,7 @@ void *DL_GetMapSymbol(const char *name) { // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html uint32_t hash = _elf_hash(name); - for (int i = _symbol_map.bucket[hash % _symbol_map.nbucket]; i > 0;) { + for (int i = _symbol_map.bucket[hash % _symbol_map.nbucket]; i >= 0;) { if (i >= _symbol_map.nchain) { _sdk_log( "DL_GetMapSymbol() index out of bounds (%d >= %d)\n", @@ -426,7 +426,8 @@ DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) { const uint32_t *ctor_list = DL_GetDLLSymbol(dll, "__CTOR_LIST__"); if (ctor_list) { for (int i = ((int) ctor_list[0]); i >= 1; i--) { - void (*ctor)(void) = (void (*)(void)) ctor_list[i]; + void (*ctor)(void) = (void (*)(void)) + ((uint8_t *) ptr + ctor_list[i + 1]); DL_PRE_CALL(ctor); ctor(); } @@ -444,7 +445,8 @@ void DL_DestroyDLL(DLL *dll) { const uint32_t *dtor_list = DL_GetDLLSymbol(dll, "__DTOR_LIST__"); if (dtor_list) { for (int i = 0; i < ((int) dtor_list[0]); i++) { - void (*dtor)(void) = (void (*)(void)) dtor_list[i + 1]; + void (*dtor)(void) = (void (*)(void)) + ((uint8_t *) dll->ptr + dtor_list[i + 1]); DL_PRE_CALL(dtor); dtor(); } @@ -472,7 +474,7 @@ void *DL_GetDLLSymbol(const DLL *dll, const char *name) { // Go through the hash table's chain until the symbol name matches the one // provided. - for (int i = bucket[_elf_hash(name) % nbucket]; i > 0;) { + for (int i = bucket[_elf_hash(name) % nbucket]; i >= 0;) { if (i >= nchain) { _sdk_log("DL_GetDLLSymbol() index out of bounds (%d >= %d)\n", i, nchain); return 0; diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index 93fdb52..e41bd31 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -255,8 +255,8 @@ void ClearOTagR(uint32_t *ot, size_t length) { DMA_BCR(6) = length & 0xffff; DMA_CHCR(6) = 0x11000002; - //while (DMA_CHCR(6) & (1 << 24)) - //__asm__ volatile(""); + while (DMA_CHCR(6) & (1 << 24)) + __asm__ volatile(""); } void ClearOTag(uint32_t *ot, size_t length) { diff --git a/libpsn00b/psxgpu/env.c b/libpsn00b/psxgpu/env.c index 07edacf..8784947 100644 --- a/libpsn00b/psxgpu/env.c +++ b/libpsn00b/psxgpu/env.c @@ -11,6 +11,28 @@ extern GPU_VideoMode _gpu_video_mode; +/* Private utilities */ + +// Converts a texture window size value (a power of two in 8-128 range) into a +// bit mask by setting the leading zeroes of the value: +// 0 = 0b00000000 -> 0b00000 +// 8 = 0b00001000 -> 0b11111 +// 16 = 0b00010000 -> 0b11110 +// 32 = 0b00100000 -> 0b11100 +// 64 = 0b01000000 -> 0b11000 +// 128 = 0b10000000 -> 0b10000 +// The GPU uses the mask to process texture coordinates as follows: +// x &= ~(mask << 3) +// x |= (offset << 3) & (mask << 3) +static inline uint32_t _get_window_mask(int size) { + uint32_t mask = size >> 3; + + mask |= mask << 1; + mask |= mask << 2; + mask |= mask << 4; + return mask & 0x1f; +} + /* Drawing API */ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { @@ -24,8 +46,8 @@ DRAWENV *SetDefDrawEnv(DRAWENV *env, int x, int y, int w, int h) { env->tw.x = 0; env->tw.y = 0; - env->tw.w = 0; - env->tw.h = 0; + env->tw.w = 256; + env->tw.h = 256; env->tpage = 0x0a; env->dtd = 1; @@ -41,50 +63,53 @@ int DrawOTagEnv(const uint32_t *ot, DRAWENV *env) { DR_ENV *prim = &(env->dr_env); // All commands are grouped into a single display list packet for - // performance reasons (keep in mind that the GPU doesn't care about this - // as the display list is parsed by the DMA unit in the CPU and only the - // payload is sent to the GPU). + // performance reasons (the GPU does not care about the grouping as the + // display list is parsed by the DMA unit in the CPU). setaddr(prim, ot); - setlen(prim, 4); + setlen(prim, 5); + + // Texture page (reset active page and set dither/mask bits) + prim->code[0] = 0xe1000000 | env->tpage; + prim->code[0] |= (env->dtd & 1) << 9; + prim->code[0] |= (env->dfe & 1) << 10; + + // Texture window + prim->code[1] = 0xe2000000; + prim->code[1] |= _get_window_mask(env->tw.w); + prim->code[1] |= _get_window_mask(env->tw.h) << 5; + prim->code[1] |= (env->tw.x & 0xf8) << 7; // ((tw.x / 8) & 0x1f) << 10 + prim->code[1] |= (env->tw.y & 0xf8) << 12; // ((tw.y / 8) & 0x1f) << 15 // Set drawing area top left - prim->code[0] = 0xe3000000; - prim->code[0] |= env->clip.x & 0x3ff; - prim->code[0] |= (env->clip.y & 0x3ff) << 10; + prim->code[2] = 0xe3000000; + prim->code[2] |= env->clip.x & 0x3ff; + prim->code[2] |= (env->clip.y & 0x3ff) << 10; // Set drawing area bottom right - prim->code[1] = 0xe4000000; - prim->code[1] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff; - prim->code[1] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10; + prim->code[3] = 0xe4000000; + prim->code[3] |= (env->clip.x + (env->clip.w - 1)) & 0x3ff; + prim->code[3] |= ((env->clip.y + (env->clip.h - 1)) & 0x3ff) << 10; // Set drawing offset - prim->code[2] = 0xe5000000; - prim->code[2] |= (env->clip.x + env->ofs[0]) & 0x7ff; - prim->code[2] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11; - - // Texture page (reset active page and set dither/mask bits) - prim->code[3] = 0xe1000000 | env->tpage; - prim->code[3] |= (env->dtd & 1) << 9; - prim->code[3] |= (env->dfe & 1) << 10; + prim->code[4] = 0xe5000000; + prim->code[4] |= (env->clip.x + env->ofs[0]) & 0x7ff; + prim->code[4] |= ((env->clip.y + env->ofs[1]) & 0x7ff) << 11; if (env->isbg) { - setlen(prim, 7); + setlen(prim, 8); // Rectangle fill // FIXME: reportedly this command doesn't accept height values >511... - prim->code[4] = 0x02000000; - //prim->code[4] |= env->r0 | (env->g0 << 8) | (env->b0 << 16); - prim->code[4] |= *((const uint32_t *) &(env->isbg)) >> 8; - //prim->code[5] = env->clip.x; - //prim->code[5] |= env->clip.y << 16; - prim->code[5] = *((const uint32_t *) &(env->clip.x)); - prim->code[6] = env->clip.w; - prim->code[6] |= _min(env->clip.h, 0x1ff) << 16; + prim->code[5] = 0x02000000; + //prim->code[5] |= env->r0 | (env->g0 << 8) | (env->b0 << 16); + //prim->code[6] = env->clip.x; + //prim->code[6] |= env->clip.y << 16; + prim->code[5] |= *((const uint32_t *) &(env->isbg)) >> 8; + prim->code[6] = *((const uint32_t *) &(env->clip.x)); + prim->code[7] = env->clip.w; + prim->code[7] |= _min(env->clip.h, 0x1ff) << 16; } - //while (!(GPU_GP1 & (1 << 26))) - //__asm__ volatile(""); - return EnqueueDrawOp((void *) &DrawOTag2, (uint32_t) prim, 0, 0); } @@ -96,12 +121,10 @@ void PutDrawEnv(DRAWENV *env) { // useful if the DRAWENV structure is never modified (which is the case most of // the time). void PutDrawEnvFast(DRAWENV *env) { - if (!(env->dr_env.tag)) { + if (!(env->dr_env.tag)) DrawOTagEnv((const uint32_t *) 0x00ffffff, env); - return; - } - - DrawOTag((const uint32_t *) &(env->dr_env)); + else + DrawOTag((const uint32_t *) &(env->dr_env)); } /* Display API */ @@ -132,7 +155,7 @@ void PutDispEnv(const DISPENV *env) { mode |= (env->isinter & 1) << 5; mode |= (env->reverse & 1) << 7; - if (env->disp.h >= 256) + if (env->disp.h > 256) mode |= 1 << 2; // Calculate the horizontal display range values. The original code was diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c index e73505f..fc018a4 100644 --- a/libpsn00b/psxgpu/image.c +++ b/libpsn00b/psxgpu/image.c @@ -12,7 +12,17 @@ #include <psxgpu.h> #include <hwregs_c.h> -#define DMA_CHUNK_LENGTH 8 +#define QUEUE_LENGTH 16 +#define DMA_CHUNK_LENGTH 8 + +/* Internal globals */ + +// LoadImage() and StoreImage() run asynchronously but may be called with a +// pointer to a RECT struct in the stack, which might no longer be valid by the +// time the transfer is actually started. This buffer is used to store a copy +// of all RECTs passed to LoadImage()/StoreImage() as a workaround. +static RECT _saved_rects[QUEUE_LENGTH]; +static int _next_saved_rect = 0; /* Private utilities */ @@ -51,21 +61,33 @@ static void _dma_transfer(const RECT *rect, uint32_t *data, int write) { /* VRAM transfer API */ int LoadImage(const RECT *rect, const uint32_t *data) { + int index = _next_saved_rect; + + _saved_rects[index] = *rect; + _next_saved_rect = (index + 1) % QUEUE_LENGTH; + return EnqueueDrawOp( - (void *) &_dma_transfer, (uint32_t) rect, (uint32_t) data, 1 + (void *) &_dma_transfer, + (uint32_t) &_saved_rects[index], + (uint32_t) data, + 1 ); } int StoreImage(const RECT *rect, uint32_t *data) { + int index = _next_saved_rect; + + _saved_rects[index] = *rect; + _next_saved_rect = (index + 1) % QUEUE_LENGTH; + return EnqueueDrawOp( - (void *) &_dma_transfer, (uint32_t) rect, (uint32_t) data, 0 + (void *) &_dma_transfer, + (uint32_t) &_saved_rects[index], + (uint32_t) data, + 0 ); } -/*int MoveImage(const RECT *rect, int x, int y) { - return EnqueueDrawOp((void *) &MoveImage2, (uint32_t) rect, x, y); -}*/ - void LoadImage2(const RECT *rect, const uint32_t *data) { _dma_transfer(rect, (uint32_t *) data, 1); } diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index b430f2c..d15a4db 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -127,7 +127,7 @@ void DecDCTin(const uint32_t *data, int mode) { // the stream. void DecDCTinRaw(const uint32_t *data, size_t length) { if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("input data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } @@ -157,7 +157,7 @@ void DecDCTout(uint32_t *data, size_t length) { DecDCToutSync(0); if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) { - _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); + _sdk_log("output data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH); length += DMA_CHUNK_LENGTH - 1; } |
