/* * PSn00bSDK MDEC library * (C) 2022-2023 spicyjpeg - MPL licensed */ /** * @file psxpress.h * @brief MDEC library header * * @details This is a fully original reimplementation of the official SDK's * "data compression" library. This library is made up of two parts, the MDEC * API and functions to decompress Huffman-encoded bitstreams (.BS files, or * frames in .STR files) into data to be fed to the MDEC. Two different * implementations of the latter are provided, one using the GTE and scratchpad * region and an older one using a large lookup table in main RAM. * * FMV playback is not part of this library per se, but can implemented using * the APIs defined here alongside some code to stream data from the CD drive. * * Currently bitstream versions 1, 2 and 3 are supported. Version 0 and .IKI * bitstreams are not supported, but no encoder is publicly available for those * anyway. */ #pragma once #include #include /* Structure definitions */ typedef struct { uint8_t iq_y[64]; // Luma quantization table, stored in zigzag order uint8_t iq_c[64]; // Chroma quantization table, stored in zigzag order int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point) } DECDCTENV; typedef struct { uint16_t ac0[2]; uint32_t ac2[8], ac3[64]; uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; } VLC_TableV2; typedef struct { uint16_t ac0[2]; uint32_t ac2[8], ac3[64]; uint16_t ac4[8], ac5[8], ac7[16], ac8[32]; uint16_t ac9[32], ac10[32], ac11[32], ac12[32]; uint8_t dc[128], dc_len[9]; uint8_t _reserved[3]; } VLC_TableV3; typedef struct { uint32_t ac[8192], ac00[512]; } DECDCTTAB; typedef enum { DECDCT_MODE_24BPP = 1, DECDCT_MODE_16BPP = 0, DECDCT_MODE_16BPP_BIT15 = 2, DECDCT_MODE_RAW = -1 } DECDCTMODE; typedef struct { const uint32_t *input; uint32_t window, next_window, remaining; int8_t is_v3, bit_offset, block_index, coeff_index; uint16_t quant_scale; int16_t last_y, last_cr, last_cb; } VLC_Context; typedef struct { uint32_t mdec0_header; uint16_t quant_scale; uint16_t version; } BS_Header; /* Public API */ #ifdef __cplusplus extern "C" { #endif /** * @brief Resets and optionally initializes the MDEC. * * @details Resets the MDEC and aborts any MDEC DMA transfers. If mode = 0, the * default IDCT matrix and quantization tables are also loaded and the MDEC is * put into color output mode, discarding any custom environment previously set * with DecDCTPutEnv(). * * DecDCTReset(0) must be called at least once prior to using the MDEC. * * @param mode * * @see DecDCTPutEnv() */ void DecDCTReset(int mode); /** * @brief Loads default or custom quantization and IDCT tables into the MDEC. * * @details Uploads the specified decoding environment's quantization tables * and IDCT matrix to the MDEC, or restores the default tables if a null * pointer is passed. Calling this function is normally not required as * DecDCTReset(0) initializes the MDEC with the default tables, but it may be * useful for e.g. decoding JPEG or a format with custom quantization tables. * * The second argument, not present in the official SDK, specifies whether the * MDEC shall be put into color (0) or monochrome (1) output mode. In * monochrome mode each DCT block decoded from the input stream is transformed * into an 8x8x8bpp bitmap, while in color mode each group of 6 DCT blocks (Cr, * Cb, Y1-4) is used to form a 16x16 RGB bitmap. * * This function uses DecDCTinSync() to wait for the MDEC to become ready and * should not be called during decoding or after calling DecDCTin(). * * @param env Pointer to DECDCTENV or 0 for default tables * @param mono 0 for color (normal), 1 for monochrome */ void DecDCTPutEnv(const DECDCTENV *env, int mono); /** * @brief Feeds the MDEC with a run-length code buffer from the specified * location. * * @details Sets up the MDEC to start fetching and decoding the given buffer. * This function is meant to be used with buffers generated by DecDCTvlc(), * DecDCTvlc2() or their variants: the first 32-bit word of the buffer is * initially copied to the MDEC0 register, then all subsequent data is read in * 128-byte (32-word) chunks. The length of the stream (in 32-bit units, minus * the first word) is encoded by DecDCTvlc() in the lower 16 bits of the first * word. * * The mode argument optionally specifies the output color depth (0 for 16bpp, * 1 for 24bpp) if not already set in the first word. Passing -1 will result in * DecDCTin() copying the first word as-is to MDEC0 without manipulating any of * its bits. * * @param data * @param mode DECDCT_MODE_* or -1 * * @see DecDCTinRaw(), DecDCTinSync() */ void DecDCTin(const uint32_t *data, int mode); /** * @brief Feeds the MDEC with raw data from the specified location. * * @details Configures the MDEC to automatically fetch data (the input stream, * IDCT matrix or quantization tables) in 128-byte (32-word) chunks from the * specified address in main RAM. The transfer is stopped, and any callback * registered with DMACallback(0) is fired, once a certain number of 32-bit * words have been read; usually the length should match the number of input * words expected by the MDEC. If the MDEC expects more data its operation will * be paused and can be resumed by calling DecDCTinRaw() again. * * This is a low-level variant of DecDCTin() that only sets up the DMA transfer * and does not write anything to the MDEC0 register. The actual transfer won't * start until the MDEC is given a valid command. * * @param data * @param length Number of 32-bit words to read (must be multiple of 32) * * @see DecDCTin(), DecDCTinSync() */ void DecDCTinRaw(const uint32_t *data, size_t length); /** * @brief Waits for an MDEC input transfer to finish or returns its status. * * @details Waits for the MDEC to finish decoding the input stream (if * mode = 0) or returns whether it is busy (if mode = 1). MDEC commands can be * issued only when the MDEC isn't busy. * * WARNING: DecDCTinSync(0) might time out and return -1 if the MDEC can't * output decoded data, e.g. if the length passed to DecDCTout() was too small * and no callback is registered to set up further transfers. DecDCTinSync(0) * shall only be used alongside DMACallback(1) or if the entirety of the * decoded stream (usually a whole frame) is being written to main RAM. * * @param mode * @return 0 or -1 in case of a timeout (mode = 0), MDEC busy flag (mode = 1) */ int DecDCTinSync(int mode); /** * @brief Writes image data decoded by the MDEC to the specified location. * * @details Configures the MDEC to automatically transfer decoded image data in * 128-byte (32-word) chunks to the specified address in main RAM. MDEC * operation is paused once a certain number of 32-bit words have been output * and can be resumed by calling DecDCTout() again: the MDEC will continue * decoding the input stream from where it left off. Any callback registered * with DMACallback(1) is also fired whenever the transfer ends. * * This behavior allows the MDEC's output to be buffered into 16-pixel-wide * vertical strips in main RAM, which can then be uploaded to VRAM using * LoadImage(). * * @param data * @param length Number of 32-bit words to output (must be multiple of 32) * * @see DecDCToutSync() */ void DecDCTout(uint32_t *data, size_t length); /** * @brief Waits for an MDEC output transfer to finish or returns its status. * * @details Waits until the transfer set up by DecDCTout() finishes (if * mode = 0) or returns whether it is still in progress (if mode = 1). * * WARNING: DecDCToutSync(0) might time out and return -1 if the MDEC is unable * to consume enough input data in order to produce the desired amount of data. * If the input stream isn't contiguous in memory, DMACallback(0) shall be used * to register a callback that calls DecDCTin() or DecDCTinRaw() to feed the * MDEC. * * @param mode * @return 0 or -1 in case of a timeout (mode = 0), DMA busy flag (mode = 1) */ int DecDCToutSync(int mode); /** * @brief Decompresses or begins decompressing a .BS file into MDEC codes. * * @details Begins decompressing the contents of a .BS file (or of a single STR * frame) into a buffer that can be passed to DecDCTin(). This function uses a * small (<1 KB) lookup table combined with the GTE to accelerate the process; * performance is roughly on par with DecDCTvlcStart2() if the lookup table * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTableV2() or * DecDCTvlcCopyTableV3(). The contents of the GTE's LZCS and LZCR registers, * if any, will be destroyed. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the * maximum number of words that will be written to the output buffer; if more * data needs to be written, this function will return 1. To continue decoding * call DecDCTvlcContinue() with the same VLC_Context object (the output buffer * can be different). If max_size = 0, the entire frame will always be decoded * in one shot. * * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the * first time. Attempting to call this function with the GTE disabled will * result in a crash. * * @param ctx Pointer to new VLC_Context structure * @param buf * @param max_size Maximum number of 32-bit words to output * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcContinue(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); /** * @brief Resumes or finishes decompressing a .BS file into MDEC codes. * * @details Resumes the decompression process started by DecDCTvlcStart(). The * state of the decompressor is contained entirely in the VLC_Context structure * so an arbitrary number of bitstreams can be decoded concurrently (although * the limited CPU power makes it impractical to do so) by keeping a separate * context for each bitstream. * * This function behaves like DecDCTvlcStart(), returning 1 if more data has to * be written or 0 otherwise. DecDCTvlcContinue() shall not be called after a * previous call to DecDCTvlcStart() or DecDCTvlcContinue() with the same * context returned 0; in that case the context shall be discarded or reused to * decode another bitstream. * * The contents of the GTE's LZCS and LZCR registers, if any, will be * destroyed. * * See DecDCTvlcStart() for more details. * * @param ctx Pointer to already initialized VLC_Context structure * @param buf * @param max_size Maximum number of 32-bit words to output * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcStart() */ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); /** * @brief Decompresses a .BS file into MDEC codes. * * @details A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for * compatibility with the official SDK. This function uses an internal context; * additionally, the maximum output buffer size is not passed as an argument * but is instead set by calling DecDCTvlcSize(). * * This function behaves identically to DecDCTvlcContinue() if bs = 0 and * DecDCTvlcStart() otherwise. * * See DecDCTvlcStart() for more details. * * WARNING: InitGeom() must be called prior to using DecDCTvlc() for the first * time. Attempting to call this function with the GTE disabled will result in * a crash. * * @param bs Pointer to bitstream data or 0 to resume decoding * @param buf * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcSize(), DecDCTvlcCopyTableV2(), DecDCTvlcCopyTableV3() */ int DecDCTvlc(const uint32_t *bs, uint32_t *buf); /** * @brief Sets the maximum amount of data to be decompressed. * * @details Sets the maximum number of 32-bit words that a single call to * DecDCTvlc() will output. If size = 0, the entire frame will always be * decoded in one shot. * * Note that DecDCTvlcStart() and DecDCTvlcContinue() do not use the value set * by this function and instead expect the maximum size to be passed as an * argument. * * @param size Maximum number of 32-bit words to output * @return Previously set value * * @see DecDCTvlc() */ size_t DecDCTvlcSize(size_t size); /** * @brief Copies the lookup tables used by the .BS decompressor (v1/v2) to the * scratchpad region. * * @details Copies the lookup table used by DecDCTvlcContinue(), * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this * table is always present in main RAM, however this function can be used to * copy it to the scratchpad region to boost decompression performance. * * This function copies a 676-byte table (VLC_TableV2 structure) containing * only the data necessary for decoding version 1 and 2 bitstreams, to help * save scratchpad space. If support for version 3 is required, * DecDCTvlcCopyTableV3() can be used instead to copy the full 816-byte table. * * The address passed to this function is saved. Calls to DecDCTvlcStart(), * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to * using the library's internal table in main RAM. * * WARNING: attempting to decode a version 3 .BS file or .STR frame after * calling this function will result in undefined behavior and potentially a * crash. To re-enable version 3 decoding, use DecDCTvlcCopyTableV3() to copy * the full table to the scratchpad or revert to using the built-in table in * main RAM. * * @param addr Pointer to free 676-byte area in scratchpad region or 0 to reset * * @see DecDCTvlcCopyTableV3() */ void DecDCTvlcCopyTableV2(VLC_TableV2 *addr); /** * @brief Copies the lookup tables used by the .BS decompressor (v1/v2/v3) to * the scratchpad region. * * @details Copies the lookup table used by DecDCTvlcContinue(), * DecDCTvlcStart() and DecDCTvlc() to the specified address. A copy of this * table is always present in main RAM, however this function can be used to * copy it to the scratchpad region to boost decompression performance. * * This function copies the full 816-byte table (VLC_TableV3 structure), * including the data used to decode version 3 bitstreams. If support for * version 3 is not required, DecDCTvlcCopyTableV2() can be used instead to * save scratchpad space by only copying the first 676 bytes of the table. * * The address passed to this function is saved. Calls to DecDCTvlcStart(), * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table * copied. Call DecDCTvlcCopyTableV2(0) or DecDCTvlcCopyTableV3(0) to revert to * using the library's internal table in main RAM. * * @param addr Pointer to free 816-byte area in scratchpad region or 0 to reset * * @see DecDCTvlcCopyTableV2() */ void DecDCTvlcCopyTableV3(VLC_TableV3 *addr); /** * @brief Decompresses or begins decompressing a .BS file into MDEC codes * (alternate implementation). * * @details Begins decompressing the contents of a .BS file (or of a single STR * frame) into a buffer that can be passed to DecDCTin(). This function uses a * large (34 KB) lookup table that must be loaded into main RAM beforehand by * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. * Depending on the specific bitstream being decoded DecDCTvlcStart2() might be * slightly faster or slower than DecDCTvlcStart() with its lookup table copied * to the scratchpad (see DecDCTvlcCopyTableV2() and DecDCTvlcCopyTableV3()). * DecDCTvlcStart() with the table in main RAM tends to be much slower. * * A VLC_Context object must be created and passed to this function, which will * then proceed to initialize its fields. The max_size argument sets the * maximum number of words that will be written to the output buffer; if more * data needs to be written, this function will return 1. To continue decoding * call DecDCTvlcContinue2() with the same VLC_Context object (the output * buffer can be different). If max_size = 0, the entire frame will always be * decoded in one shot. * * This function only supports decoding version 1 and 2 bitstreams. Use * DecDCTvlcStart() to decode a version 3 bitstream. * * @param ctx Pointer to VLC_Context structure (which will be initialized) * @param buf * @param max_size Maximum number of 32-bit words to output * @param bs * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcContinue2(), DecDCTvlcBuild() */ int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); /** * @brief Resumes or finishes decompressing a .BS file into MDEC codes * (alternate implementation). * * @details Resumes the decompression process started by DecDCTvlcStart2(). The * state of the decompressor is contained entirely in the VLC_Context structure * so an arbitrary number of bitstreams can be decoded concurrently (although * the limited CPU power makes it impractical to do so) by keeping a separate * context for each bitstream. * * This function behaves like DecDCTvlcStart2(), returning 1 if more data has * to be written or 0 otherwise. DecDCTvlcContinue2() shall not be called after * a previous call to DecDCTvlcStart2() or DecDCTvlcContinue2() with the same * context returned 0; in that case the context shall be discarded or reused to * decode another bitstream. * * See DecDCTvlcStart2() for more details. * * @param ctx Pointer to already initialized VLC_Context structure * @param buf * @param max_size Maximum number of 32-bit words to output * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcStart2() */ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); /** * @brief Decompresses a .BS file into MDEC codes (alternate implementation). * * @details A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for * compatibility with the official SDK. This function uses an internal context; * additionally, the maximum output buffer size is not passed as an argument * but is instead set by calling DecDCTvlcSize2(). * * This function behaves identically to DecDCTvlcContinue2() if bs = 0 and * DecDCTvlcStart2() otherwise. The table argument can optionally be passed to * use a custom lookup table. If zero, the last pointer passed to * DecDCTvlcBuild() will be used. * * See DecDCTvlcStart2() for more details. * * @param bs Pointer to bitstream data or 0 to resume decoding * @param buf * @param table Pointer to decompressed table or 0 to use last table used * @return 0, 1 if more data needs to be output or -1 in case of failure * * @see DecDCTvlcSize2(), DecDCTvlcBuild() */ int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB *table); /** * @brief Sets the maximum amount of data to be decompressed (alternate * implementation). * * @details Sets the maximum number of 32-bit words that a single call to * DecDCTvlc2() will output. If size = 0, the entire frame will always be * decoded in one shot. * * Note that DecDCTvlcStart2() and DecDCTvlcContinue2() do not use the value * set by this function and instead expect the maximum size to be passed as an * argument. * * @param size Maximum number of 32-bit words to output * @return Previously set value * * @see DecDCTvlc2() */ size_t DecDCTvlcSize2(size_t size); /** * @brief Generates the lookup table used by the alternate implementation of * the .BS decompressor. * * @details Generates the lookup table required by DecDCTvlcStart2(), * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB structure) into the * specified buffer. Since the table is relatively large (34 KB), it is * recommended to only generate it in a dynamically-allocated buffer when * needed and deallocate the buffer afterwards. * * The address passed to this function is saved. Calls to DecDCTvlcStart2() and * DecDCTvlcContinue2() will automatically use the last table decompressed. * * @param table */ void DecDCTvlcBuild(DECDCTTAB *table); #ifdef __cplusplus } #endif