diff options
| author | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-07-17 21:33:35 +0200 |
|---|---|---|
| committer | spicyjpeg <thatspicyjpeg@gmail.com> | 2022-07-17 21:33:35 +0200 |
| commit | 5b63607ba4ca12c2a2935ea9618b3ffe6a6d3ab3 (patch) | |
| tree | f7ec51b65eb148d4fbddca5127589592e57d219c /libpsn00b/include | |
| parent | c800972bc13ad0c7015b7d44fe9f124b719e792e (diff) | |
| download | psn00bsdk-5b63607ba4ca12c2a2935ea9618b3ffe6a6d3ab3.tar.gz | |
Add experimental psxpress Huffman decoding API
Diffstat (limited to 'libpsn00b/include')
| -rw-r--r-- | libpsn00b/include/psxpress.h | 250 |
1 files changed, 242 insertions, 8 deletions
diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index 6203c2a..2106a53 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -17,6 +17,28 @@ typedef struct _DECDCTENV { int16_t dct[64]; // Inverse DCT matrix (2.14 fixed-point) } DECDCTENV; +// This is the "small" lookup table used by DecDCTvlc(). It can be copied to +// the scratchpad. +typedef struct _DECDCTTAB { + uint16_t lut0[2]; + uint32_t lut2[8]; + uint32_t lut3[64]; + uint16_t lut4[8]; + uint16_t lut5[8]; + uint16_t lut7[16]; + uint16_t lut8[32]; + uint16_t lut9[32]; + uint16_t lut10[32]; + uint16_t lut11[32]; + uint16_t lut12[32]; +} DECDCTTAB; + +// This is the "large" table used by DecDCTvlc2(). +typedef struct _DECDCTTAB2 { + uint32_t lut[8192]; + uint32_t lut00[512]; +} DECDCTTAB2; + typedef enum _DECDCTMODE { DECDCT_MODE_24BPP = 1, DECDCT_MODE_16BPP = 0, @@ -24,6 +46,23 @@ typedef enum _DECDCTMODE { DECDCT_MODE_RAW = -1 } DECDCTMODE; +typedef struct _VLC_Context { + const uint32_t *input; + uint32_t window, next_window, remaining; + uint16_t quant_scale; + int8_t is_v3, bit_offset, block_index, coeff_index; +} VLC_Context; + +// Despite what some docs claim, the "number of 32-byte blocks" and "always +// 0x3800" fields are actually a single 32-bit field which is copied over to +// the output buffer, then parsed by DecDCTin() and written to the MDEC0 +// register. +typedef struct { + uint32_t mdec0_header; + uint16_t quant_scale; + uint16_t version; +} BS_Header; + /* Public API */ #ifdef __cplusplus @@ -64,12 +103,12 @@ void DecDCTReset(int mode); void DecDCTPutEnv(const DECDCTENV *env, int mono); /** - * @brief Sets up the MDEC to start fetching and decoding a stream from the - * given address in main RAM. The first 32-bit word is initially copied to the - * MDEC0 register, then all subsequent data is read in 128-byte (32-word) - * chunks. The length of the stream (in 32-bit units, minus the first word) - * must be encoded in the lower 16 bits of the first word, as expected by the - * MDEC. + * @brief Sets up the MDEC to start fetching and decoding the given buffer. + * This function is meant to be used with buffers generated by DecDCTvlc(): the + * first 32-bit word of the buffer is initially copied to the MDEC0 register, + * then all subsequent data is read in 128-byte (32-word) chunks. The length of + * the stream (in 32-bit units, minus the first word) is encoded by DecDCTvlc() + * in the lower 16 bits of the first word. * * The mode argument optionally specifies the output color depth (0 for 16bpp, * 1 for 24bpp) if not already set in the first word. Passing -1 will result in @@ -111,7 +150,7 @@ void DecDCTinRaw(const uint32_t *data, size_t length); * stream (usually a whole frame) is being written to main RAM. * * @param mode - * @return 0 or -1 in case of a timeout (mode = 0) / MDEC busy flag (mode = 1) + * @return 0 or -1 in case of a timeout (mode = 0), MDEC busy flag (mode = 1) */ int DecDCTinSync(int mode); @@ -142,10 +181,205 @@ void DecDCTout(uint32_t *data, size_t length); * to register a callback that calls DecDCTin() to feed the MDEC. * * @param mode - * @return 0 or -1 in case of a timeout (mode = 0) / DMA busy flag (mode = 1) + * @return 0 or -1 in case of a timeout (mode = 0), DMA busy flag (mode = 1) */ int DecDCToutSync(int mode); +/** + * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * frame) into a buffer that can be passed to DecDCTin(). This function uses a + * small (<1 KB) lookup table combined with the GTE to accelerate the process; + * performance is roughly on par with DecDCTvlcStart2() if the lookup table + * is copied to the scratchpad beforehand by calling DecDCTvlcCopyTable(). The + * contents of the GTE's LZCR register, if any, will be destroyed. + * + * A VLC_Context object must be created and passed to this function, which will + * then proceed to initialize its fields. The max_size argument sets the + * maximum number of words that will be written to the output buffer; if more + * data needs to be written, this function will return 1. To continue decoding + * call DecDCTvlcContinue() with the same VLC_Context object (the output buffer + * can be different). If max_size = 0, the entire frame will always be decoded + * in one shot. + * + * Only bitstream version 2 is currently supported. + * + * WARNING: InitGeom() must be called prior to using DecDCTvlcStart() for the + * first time. Attempting to call this function with the GTE disabled will + * result in a crash. + * + * @param ctx Pointer to VLC_Context structure (which will be initialized) + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @param bs + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); + +/** + * @brief Resumes the decompression process started by DecDCTvlcStart(). The + * state of the decompressor is contained entirely in the VLC_Context structure + * so an arbitrary number of bitstreams can be decoded concurrently (although + * the limited CPU power makes it impractical to do so) by keeping a separate + * context for each bitstream. + * + * This function behaves like DecDCTvlcStart(), returning 1 if more data has to + * be written or 0 otherwise. DecDCTvlcContinue() shall not be called after a + * previous call to DecDCTvlcStart() or DecDCTvlcContinue() with the same + * context returned 0; in that case the context shall be discarded or reused to + * decode another bitstream. + * + * The contents of the GTE's LZCR register, if any, will be destroyed. + * + * See DecDCTvlcStart() for more details. + * + * @param ctx Pointer to already initialized VLC_Context structure + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size); + +/** + * A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for compatibility + * with the official SDK. This function uses an internal context; additionally, + * the maximum output buffer size is not passed as an argument but is instead + * set by calling DecDCTvlcSize(). + * + * This function behaves identically to DecDCTvlcContinue() if bs = 0 and + * DecDCTvlcStart() otherwise. + * + * See DecDCTvlcStart() for more details. + * + * WARNING: InitGeom() must be called prior to using DecDCTvlc() for the first + * time. Attempting to call this function with the GTE disabled will result in + * a crash. + * + * @param bs Pointer to bitstream data or 0 to resume decoding + * @param buf + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlc(const uint32_t *bs, uint32_t *buf); + +/** + * @brief Sets the maximum number of 32-bit words that a single call to + * DecDCTvlc() will output. If size = 0, the entire frame will always be + * decoded in one shot. + * + * @param size Maximum number of 32-bit words to output + * @return Previously set value + */ +size_t DecDCTvlcSize(size_t size); + +/** + * @brief Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(), + * DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified + * address. A copy of this table is always present in main RAM, however this + * function can be used to copy it to the scratchpad region to boost + * decompression performance. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart(), + * DecDCTvlcContinue() and DecDCTvlc() will automatically use the last table + * copied. Call DecDCTvlcCopyTable(0) to revert to using the library's internal + * table in main RAM. + * + * @param addr Pointer to free area in scratchpad region or 0 to reset + */ +void DecDCTvlcCopyTable(DECDCTTAB *addr); + +/** + * @brief Begins decompressing the contents of a .BS file (or of a single .STR + * frame) into a buffer that can be passed to DecDCTin(). This function uses a + * large (34 KB) lookup table that must be loaded into main RAM beforehand by + * calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad. + * Depending on the specific bitstream being decoded DecDCTvlcStart2() might be + * slightly faster or slower than DecDCTvlcStart() with its lookup table copied + * to the scratchpad (see DecDCTvlcCopyTable()). DecDCTvlcStart() with the + * table in main RAM tends to be much slower. + * + * A VLC_Context object must be created and passed to this function, which will + * then proceed to initialize its fields. The max_size argument sets the + * maximum number of words that will be written to the output buffer; if more + * data needs to be written, this function will return 1. To continue decoding + * call DecDCTvlcContinue2() with the same VLC_Context object (the output + * buffer can be different). If max_size = 0, the entire frame will always be + * decoded in one shot. + * + * Only bitstream version 2 is currently supported. + * + * @param ctx Pointer to VLC_Context structure (which will be initialized) + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @param bs + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs); + +/** + * @brief Resumes the decompression process started by DecDCTvlcStart2(). The + * state of the decompressor is contained entirely in the VLC_Context structure + * so an arbitrary number of bitstreams can be decoded concurrently (although + * the limited CPU power makes it impractical to do so) by keeping a separate + * context for each bitstream. + * + * This function behaves like DecDCTvlcStart2(), returning 1 if more data has + * to be written or 0 otherwise. DecDCTvlcContinue2() shall not be called after + * a previous call to DecDCTvlcStart2() or DecDCTvlcContinue2() with the same + * context returned 0; in that case the context shall be discarded or reused to + * decode another bitstream. + * + * See DecDCTvlcStart2() for more details. + * + * @param ctx Pointer to already initialized VLC_Context structure + * @param buf + * @param max_size Maximum number of 32-bit words to output + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size); + +/** + * A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for + * compatibility with the official SDK. This function uses an internal context; + * additionally, the maximum output buffer size is not passed as an argument + * but is instead set by calling DecDCTvlcSize2(). + * + * This function behaves identically to DecDCTvlcContinue() if bs = 0 and + * DecDCTvlcStart() otherwise. The table argument can optionally be passed to + * use a custom lookup table. If zero, the last pointer passed to + * DecDCTvlcBuild() will be used. + * + * See DecDCTvlcStart2() for more details. + * + * @param bs Pointer to bitstream data or 0 to resume decoding + * @param buf + * @param table Pointer to decompressed table or 0 to use last table used + * @return 0, 1 if more data needs to be output or -1 in case of failure + */ +int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table); + +/** + * @brief Sets the maximum number of 32-bit words that a single call to + * DecDCTvlc2() will output. If size = 0, the entire frame will always be + * decoded in one shot. + * + * @param size Maximum number of 32-bit words to output + * @return Previously set value + */ +size_t DecDCTvlcSize2(size_t size); + +/** + * @brief Generates the lookup table required by DecDCTvlcStart2(), + * DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the + * specified buffer. Since the table is relatively large (34 KB), it is + * recommended to only generate it in a dynamically-allocated buffer when + * needed and deallocate the buffer afterwards. + * + * The address passed to this function is saved. Calls to DecDCTvlcStart2() and + * DecDCTvlcContinue2() will automatically use the last table decompressed. + * + * @param table + */ +void DecDCTvlcBuild(DECDCTTAB2 *table); + #ifdef __cplusplus } #endif |
