From 09f321e37fc187affa664d32e36e32c0533a7e8e Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Mon, 23 Jan 2023 09:36:22 +0100 Subject: Add BS v3 decoding, fix MDEC API and strvideo example --- examples/mdec/strvideo/main.c | 74 ++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 29 deletions(-) (limited to 'examples/mdec') diff --git a/examples/mdec/strvideo/main.c b/examples/mdec/strvideo/main.c index 28d39b2..57cb6ef 100644 --- a/examples/mdec/strvideo/main.c +++ b/examples/mdec/strvideo/main.c @@ -1,6 +1,6 @@ /* * PSn00bSDK .STR FMV playback example - * (C) 2022 spicyjpeg - MPL licensed + * (C) 2022-2023 spicyjpeg - MPL licensed * * This example demonstrates playback of full-motion video in the standard .STR * format, using the MDEC for frame decoding and XA for audio. Decoded frames @@ -34,9 +34,10 @@ * Playback is stopped once the .STR header is no longer present in sectors * read. * - * Note that PSn00bSDK's bitstream decoding API only supports version 1 and 2 - * bitstreams currently, so make sure your .STR files are encoded as v2 and not - * v3. + * PSn00bSDK's bitstream decoding API supports both version 2 and 3 bitstreams. + * Encoding your .STR files as v3 may result in slightly higher quality + * depending on the encoder, but also higher CPU usage during playback compared + * to the older v2. */ #include @@ -102,13 +103,12 @@ void init_context(RenderContext *ctx) { FntOpen(4, 12, 312, 16, 2, 256); } -void display(RenderContext *ctx, int sync) { +void display(RenderContext *ctx) { Framebuffer *db; ctx->db_active ^= 1; DrawSync(0); - if (sync) - VSync(0); + //VSync(0); db = &(ctx->db[ctx->db_active]); PutDrawEnv(&(db->draw)); @@ -163,13 +163,13 @@ typedef struct { volatile int8_t cur_frame, cur_slice; } StreamContext; -StreamContext str_ctx; +static StreamContext str_ctx; // This buffer is used by cd_sector_handler() as a temporary area for sectors // read from the CD. Due to DMA limitations it can't be allocated on the stack // (especially not in the interrupt callbacks' stack, whose size is very // limited). -STR_Header sector_header; +static STR_Header sector_header; void cd_sector_handler(void) { StreamBuffer *frame = &str_ctx.frames[str_ctx.cur_frame]; @@ -268,7 +268,7 @@ void init_stream(void) { // optional but makes the decompressor slightly faster. See the libpsxpress // documentation for more details. DecDCTvlcSize(0x8000); - DecDCTvlcCopyTable((DECDCTTAB *) 0x1f800000); + DecDCTvlcCopyTableV3((VLC_TableV3 *) 0x1f800000); str_ctx.cur_frame = 0; str_ctx.cur_slice = 0; @@ -309,7 +309,7 @@ void start_stream(CdlFILE *file) { static RenderContext ctx; -#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx, 1); } +#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx); } #define SHOW_ERROR(...) { SHOW_STATUS(__VA_ARGS__); while (1) __asm__("nop"); } int main(int argc, const char* argv[]) { @@ -318,7 +318,7 @@ int main(int argc, const char* argv[]) { SHOW_STATUS("INITIALIZING\n"); SpuInit(); CdInit(); - InitGeom(); // Required for PSn00bSDK's DecDCTvlc() + InitGeom(); // GTE initialization required by the VLC decompressor DecDCTReset(0); SHOW_STATUS("OPENING VIDEO FILE\n"); @@ -330,8 +330,9 @@ int main(int argc, const char* argv[]) { init_stream(); start_stream(&file); - // Disable framebuffer clearing to get rid of flickering during playback. - display(&ctx, 1); + // Clear the screen, then disable framebuffer clearing to get rid of + // flickering during playback. + display(&ctx); ctx.db[0].draw.isbg = 0; ctx.db[1].draw.isbg = 0; #ifdef DISP_24BPP @@ -339,9 +340,13 @@ int main(int argc, const char* argv[]) { ctx.db[1].disp.isrgb24 = 1; #endif - int decode_errors = 0; + int frame_time = 1, decode_errors = 0; while (1) { +#ifdef DRAW_OVERLAY + int frame_start = TIMER_VALUE(1); +#endif + // Wait for a full frame to be read from the disc and decompress the // bitstream into the format expected by the MDEC. If the video has // ended, restart playback from the beginning. @@ -355,38 +360,45 @@ int main(int argc, const char* argv[]) { } #ifdef DRAW_OVERLAY - // Measure CPU usage of the decompressor using the hblank counter. - int total_time = TIMER_VALUE(1) + 1; - TIMER_VALUE(1) = 0; + int decode_time = TIMER_VALUE(1); #endif - if (DecDCTvlc(frame->bs_data, frame->mdec_data)) { + VLC_Context vlc_ctx; + if (DecDCTvlcStart( + &vlc_ctx, + frame->mdec_data, + sizeof(frame->mdec_data) / 4, + frame->bs_data + )) { decode_errors++; continue; } #ifdef DRAW_OVERLAY - int cpu_usage = TIMER_VALUE(1) * 100 / total_time; + // Calculate CPU usage of the decompressor. + decode_time = (TIMER_VALUE(1) - decode_time) & 0xffff; + int cpu_usage = decode_time * 100 / frame_time; #endif // Wait for the MDEC to finish decoding the previous frame, then flip // the framebuffers to display it and prepare the buffer for the next // frame. - // NOTE: you should *not* call VSync(0) during playback, as the refresh - // rate of the GPU is not synced to the video's frame rate. If you want - // to minimize screen tearing, consider triple buffering instead (i.e. - // always keep 2 fully decoded frames in VRAM and use VSyncCallback() - // to register a function that displays the next decoded frame whenever - // vblank occurs). + // NOTE: as the refresh rate of the GPU is not synced to the video's + // frame rate, this VSync(0) call may potentially end up waiting too + // long and desynchronizing playback. A better solution would be to + // implement triple buffering (i.e. always keep 2 fully decoded frames + // in VRAM and use VSyncCallback() to register a function that displays + // the next decoded frame if available whenever vblank occurs). + VSync(0); DecDCTinSync(0); DecDCToutSync(0); #ifdef DRAW_OVERLAY - FntPrint(-1, "FRAME:%5d READ ERRORS: %5d\n", str_ctx.frame_id, str_ctx.dropped_frames); - FntPrint(-1, "CPU: %5d%% DECODE ERRORS:%5d\n", cpu_usage, decode_errors); + FntPrint(-1, "FRAME:%6d READ ERRORS: %6d\n", str_ctx.frame_id, str_ctx.dropped_frames); + FntPrint(-1, "CPU: %6d%% DECODE ERRORS:%6d\n", cpu_usage, decode_errors); FntFlush(-1); #endif - display(&ctx, 0); + display(&ctx); // Feed the newly decompressed frame to the MDEC. The MDEC will not // actually start decoding it until an output buffer is also configured @@ -414,6 +426,10 @@ int main(int argc, const char* argv[]) { str_ctx.slices[str_ctx.cur_slice], BLOCK_SIZE * str_ctx.slice_pos.h / 2 ); + +#ifdef DRAW_OVERLAY + frame_time = (TIMER_VALUE(1) - frame_start) & 0xffff; +#endif } return 0; -- cgit v1.2.3 From 8e3a757d4d7d5dfc62f69ce4ede08f1cf79e3461 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Tue, 4 Apr 2023 00:46:20 +0200 Subject: Add IsIdleGPU(), tweak psxgpu.h, fix mdec/strvideo --- examples/demos/n00bdemo/main.c | 29 ++++++++++++------------- examples/mdec/strvideo/main.c | 19 +++++++++++------ libpsn00b/include/psxgpu.h | 48 +++++++++++++++++++++++++++--------------- libpsn00b/include/psxpress.h | 5 ++--- libpsn00b/psxgpu/common.c | 15 +++++++++++++ libpsn00b/psxpress/vlc2.c | 11 ++++------ 6 files changed, 78 insertions(+), 49 deletions(-) (limited to 'examples/mdec') diff --git a/examples/demos/n00bdemo/main.c b/examples/demos/n00bdemo/main.c index c9ca3ce..55dbbc4 100644 --- a/examples/demos/n00bdemo/main.c +++ b/examples/demos/n00bdemo/main.c @@ -358,10 +358,9 @@ void stencilstuff() { /* The stencil demo is achieved by utilizing the mask bit setting primitive GP0(E6h). The structure of this primitive is defined as - DR_MASK initialized and set by setDrawMask(). These are not available - in Sony's SDK by default. + DR_STP initialized and set by setDrawStp(). - The DR_MASK primitive controls mask bit operations for drawing + The DR_STP primitive controls mask bit operations for drawing primitives such as setting mask bits on every pixel drawn or mask bit test where pixels won't be drawn on pixels with the mask bit set. It applies to most graphics drawing primitives except VRAM fill. @@ -373,10 +372,10 @@ void stencilstuff() { bit operation disabled. The stencil effect featured in this demo is achieved by enabling set - mask bit with DR_MASK, drawing semi-transparent primitives using + mask bit with DR_STP, drawing semi-transparent primitives using additive blending but color is all zero to make it completely invisible but is enough to update the mask bits, disable mask set bit but enable - mask test with DR_MASK and then drawing a rectangle that fills the + mask test with DR_STP and then drawing a rectangle that fills the entire screen. Semi-transparency mask in textures must not be used when drawing the scene that will be 'below' the mask layer. */ @@ -384,7 +383,7 @@ void stencilstuff() { int spin=0; - DR_MASK *mask; + DR_STP *mask; TILE *rect; SC_OT s_ot; @@ -430,10 +429,10 @@ void stencilstuff() { // Sort mask primitive that enables setting mask bits - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 1, 0 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 1, 0 ); addPrim( ot[db]+20, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort the stars @@ -465,10 +464,10 @@ void stencilstuff() { // Sort mask primitive that enables mask bit test - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 0, 1 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 0, 1 ); addPrim( ot[db]+18, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort rectangle that fills the screen @@ -482,10 +481,10 @@ void stencilstuff() { // Clear all mask settings - mask = (DR_MASK*)nextpri; - setDrawMask( mask, 0, 0 ); + mask = (DR_STP*)nextpri; + setDrawStp( mask, 0, 0 ); addPrim( ot[db]+15, mask ); - nextpri += sizeof(DR_MASK); + nextpri += sizeof(DR_STP); // Sort overlay then display diff --git a/examples/mdec/strvideo/main.c b/examples/mdec/strvideo/main.c index 57cb6ef..853e0c2 100644 --- a/examples/mdec/strvideo/main.c +++ b/examples/mdec/strvideo/main.c @@ -189,8 +189,15 @@ void cd_sector_handler(void) { return; // If this sector is actually part of a new frame, validate the sectors - // that have been read so far and flip the bitstream data buffers. - if (sector_header.frame_id != str_ctx.frame_id) { + // that have been read so far and flip the bitstream data buffers. If the + // frame number is actually lower than the current one, assume the drive + // has started reading another .STR file and stop playback. + if ((int) sector_header.frame_id < str_ctx.frame_id) { + str_ctx.frame_ready = -1; + return; + } + + if ((int) sector_header.frame_id > str_ctx.frame_id) { // Do not set the ready flag if any sector has been missed. if (str_ctx.sector_count) str_ctx.dropped_frames++; @@ -263,11 +270,9 @@ void init_stream(void) { CdReadyCallback(&cd_event_handler); ExitCriticalSection(); - // Set the maximum amount of data DecDCTvlc() can output and copy the - // lookup table used for decompression to the scratchpad area. This is - // optional but makes the decompressor slightly faster. See the libpsxpress - // documentation for more details. - DecDCTvlcSize(0x8000); + // Copy the lookup table used for frame decompression to the scratchpad + // area. This is optional but makes the decompressor slightly faster. See + // the libpsxpress documentation for more details. DecDCTvlcCopyTableV3((VLC_TableV3 *) 0x1f800000); str_ctx.cur_frame = 0; diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h index 2329908..78d8342 100644 --- a/libpsn00b/include/psxgpu.h +++ b/libpsn00b/include/psxgpu.h @@ -83,7 +83,7 @@ typedef enum _GPU_VideoMode { (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ (p)->u2 = (_u2), (p)->v2 = (_v2) - + #define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \ (p)->u0 = (_u0), (p)->v0 = (_v0), \ (p)->u1 = (_u1), (p)->v1 = (_v1), \ @@ -202,38 +202,48 @@ typedef enum _GPU_VideoMode { #define setTexWindow_T(p, r) \ (p)->code[0] = (0xe2000000 | \ - ((r)->w % 32) | \ - (((r)->h % 32) << 5) | \ - (((r)->x % 32) << 10) | \ - (((r)->y % 32) << 15) \ + ((r)->w & 0x1f) | \ + (((r)->h & 0x1f) << 5) | \ + (((r)->x & 0x1f) << 10) | \ + (((r)->y & 0x1f) << 15) \ ) #define setTexWindow(p, r) \ setlen(p, 1), setTexWindow_T(p, r) -#define setDrawArea_T(p, r) \ +#define setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) \ (p)->code[0] = (0xe3000000 | \ - ((r)->x % 1024) | \ - (((r)->y % 1024) << 10) \ + ((_x0) & 0x3ff) | \ + (((_y0) & 0x3ff) << 10) \ ), \ (p)->code[1] = (0xe4000000 | \ - (((r)->x + (r)->w - 1) % 1024) | \ - ((((r)->y + (r)->h - 1) % 1024) << 10) \ + ((_x1) & 0x3ff) | \ + (((_y1) & 0x3ff) << 10) \ + ) +#define setDrawAreaXY(p, _x0, _y0, _x1, _y1) \ + setlen(p, 2), setDrawAreaXY_T(p, _x0, _y0, _x1, _y1) + +#define setDrawArea_T(p, r) \ + setDrawAreaXY_T(p, \ + (r)->x, \ + (r)->y, \ + (r)->x + (r)->w - 1, \ + (r)->y + (r)->h - 1 \ ) #define setDrawArea(p, r) \ setlen(p, 2), setDrawArea_T(p, r) #define setDrawOffset_T(p, _x, _y) \ (p)->code[0] = (0xe5000000 | \ - ((_x) % 1024) | \ - (((_y) % 1024) << 11) \ + ((_x) & 0x7ff) | \ + (((_y) & 0x7ff) << 11) \ ) #define setDrawOffset(p, _x, _y) \ setlen(p, 1), setDrawOffset_T(p, _x, _y) -#define setDrawMask_T(p, sb, mt) \ - (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1)) -#define setDrawMask(p, sb, mt) \ - setlen(p, 1), setDrawMask_T(p, sb, mt) +#define setDrawStp_T(p, pbw, mt) \ + (p)->code[0] = (0xe6000000 | (pbw) | ((mt) << 1)) +#define setDrawStp(p, pbw, mt) \ + setlen(p, 1), setDrawStp_T(p, pbw, mt) /* Primitive structure definitions */ @@ -469,7 +479,7 @@ _DEF_PRIM(DR_TWIN, _DEF_PRIM(DR_TPAGE, uint32_t code[1]; ) -_DEF_PRIM(DR_MASK, +_DEF_PRIM(DR_STP, uint32_t code[1]; ) @@ -481,6 +491,9 @@ _DEF_PRIM(DR_ENV, FILL_T fill; ) +#undef _DEF_PRIM +#undef _DEF_ALIAS + /* Structure definitions */ typedef struct _RECT { @@ -545,6 +558,7 @@ void PutDrawEnv(DRAWENV *env); void PutDrawEnvFast(DRAWENV *env); int GetODE(void); +int IsIdleGPU(int timeout); int VSync(int mode); void *VSyncHaltFunction(void (*func)(void)); void *VSyncCallback(void (*func)(void)); diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index c3b13f4..ea0c2ec 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -246,8 +246,6 @@ int DecDCToutSync(int mode); * can be different). If max_size = 0, the entire frame will always be decoded * in one shot. * - * Only bitstream version 2 is currently supported. - * * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the * first time. Attempting to call this function with the GTE disabled will * result in a crash. @@ -411,7 +409,8 @@ void DecDCTvlcCopyTableV3(VLC_TableV3 *addr); * buffer can be different). If max_size = 0, the entire frame will always be * decoded in one shot. * - * Only bitstream version 2 is currently supported. + * This function only supports decoding version 1 and 2 bitstreams. Use + * DecDCTvlcStart() to decode a version 3 bitstream. * * @param ctx Pointer to VLC_Context structure (which will be initialized) * @param buf diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c index 537f672..e354261 100644 --- a/libpsn00b/psxgpu/common.c +++ b/libpsn00b/psxgpu/common.c @@ -321,3 +321,18 @@ void DrawOTag2(const uint32_t *ot) { DMA_BCR(DMA_GPU) = 0; DMA_CHCR(DMA_GPU) = 0x01000401; } + +/* Queue pause/resume API */ + +int IsIdleGPU(int timeout) { + if (timeout <= 0) + timeout = 1; + + for (; timeout; timeout--) { + if (GPU_GP1 & (1 << 26)) + return 0; + } + + //_sdk_log("IsIdleGPU() timeout\n"); + return -1; +} diff --git a/libpsn00b/psxpress/vlc2.c b/libpsn00b/psxpress/vlc2.c index 24c54ce..7d9d9f3 100644 --- a/libpsn00b/psxpress/vlc2.c +++ b/libpsn00b/psxpress/vlc2.c @@ -123,7 +123,7 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( *output = (uint16_t) _get_bits_unsigned(22); _advance_window(22); } else if (window >> 24) { - // The first lookup table is for codes that not start with + // The first lookup table is for codes that do not start with // 00000000. value = _vlc_huffman_table2->ac[_get_bits_unsigned(13)]; _advance_window(value >> 16); @@ -136,12 +136,9 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( *output = (uint16_t) value; } } else { - // Parse the DC (first) coefficient for this block. Version 2 - // simply stores the signed 10-bit value as-is, while version 3 - // uses a delta encoding combined with a compression method similar - // to exp-Golomb. + // Parse the DC (first) coefficient for this block. if (is_v3) { - // TODO: version 3 is currently not supported. + // This implementation does not support version 3. return -1; } else { value = _get_bits_unsigned(10); @@ -161,7 +158,7 @@ int __attribute__((optimize(3))) DecDCTvlcContinue2( // time and processes each 16-bit word starting from the the MSB, so an // endianness conversion is necessary to preserve bit order when // reading 32 bits at a time. Also note that the PS1 CPU is not capable - // of shifting by more than 31 bits - it will shift by 0 bits instead! + // of shifting by >=31 bits - it will shift by (N % 32) bits instead! if (bit_offset < 0) { window = next_window << (-bit_offset); bit_offset += 32; -- cgit v1.2.3 From fd846206ae9419af5ed227989b3ad49b541a823c Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Tue, 4 Apr 2023 01:09:50 +0200 Subject: Add missing CD image dependencies to CMake scripts --- examples/cdrom/cdbrowse/CMakeLists.txt | 5 ++++- examples/cdrom/cdbrowse/iso.xml | 5 +---- examples/cdrom/cdxa/CMakeLists.txt | 6 ++++-- examples/cdrom/cdxa/iso.xml | 5 +---- examples/io/system573/iso.xml | 5 +---- examples/mdec/strvideo/CMakeLists.txt | 5 ++++- examples/mdec/strvideo/iso.xml | 5 +---- examples/sound/cdstream/CMakeLists.txt | 5 ++++- examples/sound/cdstream/iso.xml | 5 +---- examples/system/dynlink/CMakeLists.txt | 2 +- examples/system/dynlink/iso.xml | 5 +---- libpsn00b/cmake/internal_setup.cmake | 15 +++++++-------- template/CMakeLists.txt | 2 +- template/iso.xml | 5 +---- 14 files changed, 32 insertions(+), 43 deletions(-) (limited to 'examples/mdec') diff --git a/examples/cdrom/cdbrowse/CMakeLists.txt b/examples/cdrom/cdbrowse/CMakeLists.txt index 0cc091f..70a4585 100644 --- a/examples/cdrom/cdbrowse/CMakeLists.txt +++ b/examples/cdrom/cdbrowse/CMakeLists.txt @@ -13,7 +13,10 @@ project( file(GLOB _sources *.c) psn00bsdk_add_executable(cdbrowse GPREL ${_sources}) -psn00bsdk_add_cd_image(cdbrowse_iso cdbrowse iso.xml DEPENDS cdbrowse) +psn00bsdk_add_cd_image( + cdbrowse_iso cdbrowse iso.xml + DEPENDS cdbrowse system.cnf +) psn00bsdk_target_incbin(cdbrowse PRIVATE ball16c ball16c.tim) diff --git a/examples/cdrom/cdbrowse/iso.xml b/examples/cdrom/cdbrowse/iso.xml index 771b0e9..f1c00f7 100644 --- a/examples/cdrom/cdbrowse/iso.xml +++ b/examples/cdrom/cdbrowse/iso.xml @@ -1,8 +1,5 @@ - + - + - + - + - + - + - +