diff options
| author | spicyjpeg <88942473+spicyjpeg@users.noreply.github.com> | 2022-03-20 14:02:42 +0100 |
|---|---|---|
| committer | spicyjpeg <88942473+spicyjpeg@users.noreply.github.com> | 2022-03-20 14:02:42 +0100 |
| commit | 6c19e712e2588b52791f604feb31273acb074d41 (patch) | |
| tree | f864f22e4eb10b15e8c247d9eccf0113f54a62aa | |
| parent | 4bbfe640a8c357137524e797a8d2bd0a94d3abfa (diff) | |
| download | psn00bsdk-6c19e712e2588b52791f604feb31273acb074d41.tar.gz | |
Add mdec/mdecimage example, psxpress fixes
| -rw-r--r-- | examples/mdec/mdecimage/CMakeLists.txt | 20 | ||||
| -rw-r--r-- | examples/mdec/mdecimage/bunpattern.png | bin | 0 -> 59932 bytes | |||
| -rw-r--r-- | examples/mdec/mdecimage/encode_image.py | 218 | ||||
| -rw-r--r-- | examples/mdec/mdecimage/image.bin | bin | 0 -> 163072 bytes | |||
| -rw-r--r-- | examples/mdec/mdecimage/main.c | 82 | ||||
| -rw-r--r-- | libpsn00b/include/psxpress.h | 1 | ||||
| -rw-r--r-- | libpsn00b/psxpress/mdec.c | 119 |
7 files changed, 381 insertions, 59 deletions
diff --git a/examples/mdec/mdecimage/CMakeLists.txt b/examples/mdec/mdecimage/CMakeLists.txt new file mode 100644 index 0000000..b76adb4 --- /dev/null +++ b/examples/mdec/mdecimage/CMakeLists.txt @@ -0,0 +1,20 @@ +# PSn00bSDK example CMake script +# (C) 2021 spicyjpeg - MPL licensed + +cmake_minimum_required(VERSION 3.20) + +project( + mdecimage + LANGUAGES C ASM + VERSION 1.0.0 + DESCRIPTION "PSn00bSDK MDEC static image example" + HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk" +) + +file(GLOB _sources *.c) +psn00bsdk_add_executable(mdecimage STATIC ${_sources}) +#psn00bsdk_add_cd_image(mdecimage_iso mdecimage iso.xml DEPENDS mdecimage) + +psn00bsdk_target_incbin(mdecimage PRIVATE mdec_image image.bin) + +install(FILES ${PROJECT_BINARY_DIR}/mdecimage.exe TYPE BIN) diff --git a/examples/mdec/mdecimage/bunpattern.png b/examples/mdec/mdecimage/bunpattern.png Binary files differnew file mode 100644 index 0000000..61524f8 --- /dev/null +++ b/examples/mdec/mdecimage/bunpattern.png diff --git a/examples/mdec/mdecimage/encode_image.py b/examples/mdec/mdecimage/encode_image.py new file mode 100644 index 0000000..3a5bcea --- /dev/null +++ b/examples/mdec/mdecimage/encode_image.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +# Simple MDEC image encoder (requires PIL/Pillow and NumPy to be installed) +# (C) 2022 spicyjpeg - MPL licensed + +import math +from warnings import warn +from argparse import ArgumentParser, FileType + +import numpy +from PIL import Image + +LUMA_SCALE = 8 +CHROMA_SCALE = 16 + +## Tables + +ZIGZAG_TABLE = numpy.array(( + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +), numpy.uint8).argsort() + +# The default luma and chroma quantization table is based on the MPEG-1 +# quantization table, with the only difference being the first value (2 instead +# of 8). +QUANT_TABLE = numpy.array(( + 2, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +), numpy.uint8).reshape(( 8, 8 )) + +S = [ math.cos((i or 4) / 16 * math.pi) / 2 for i in range(8) ] + +DCT_MATRIX = numpy.array(( + S[0], S[0], S[0], S[0], S[0], S[0], S[0], S[0], + S[1], S[3], S[5], S[7], -S[7], -S[5], -S[3], -S[1], + S[2], S[6], -S[6], -S[2], -S[2], -S[6], S[6], S[2], + S[3], -S[7], -S[1], -S[5], S[5], S[1], S[7], -S[3], + S[4], -S[4], -S[4], S[4], S[4], -S[4], -S[4], S[4], + S[5], -S[1], S[7], S[3], -S[3], -S[7], S[1], -S[5], + S[6], -S[2], S[2], -S[6], -S[6], S[2], -S[2], S[6], + S[7], -S[5], S[3], -S[1], S[1], -S[3], S[5], -S[7] +), numpy.float32).reshape(( 8, 8 )) + +## Helpers + +def to_int10(value): + clamped = min(max(int(value), -0x200), 0x1ff) + + return clamped + (0 if clamped >= 0 else 0x400) + +def rgb_to_ycbcr_planar(image): + scaled = image.astype(numpy.float32) / 255.0 + r, g, b = scaled.transpose(( 2, 0, 1 )) + + # https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion + y = 16 + r * 65.481 + g * 128.553 + b * 24.966 + cb = 128 - r * 37.797 - g * 74.203 + b * 112.000 + cr = 128 + r * 112.000 - g * 93.786 - b * 18.214 + + return y, cb, cr + +## Block encoder + +def encode_block(buffer, block, scale): + # Perform discrete cosine transform on the block, divide the coefficients by + # the quantization table and reorder them in zigzag order. + _block = block.astype(numpy.float32) - 128.0 + coeffs = (DCT_MATRIX @ _block @ DCT_MATRIX.T) / QUANT_TABLE + coeffs = coeffs.reshape(( 64, ))[ZIGZAG_TABLE] + + buffer[0] = (scale << 10) | to_int10(round(coeffs[0])) + offset = 1 + + # Divide the AC coefficients by the given quantization scale and encode them + # as run-length pairs by counting how many zeroes there are between each + # non-zero value. + ac_values = coeffs[1:] * 8.0 / scale + encoded = [] + run_length = 0 + + for ac in ac_values.round().astype(numpy.int32): + if ac: + buffer[offset] = (run_length << 10) | to_int10(ac) + offset += 1 + + run_length = 0 + else: + run_length += 1 + + # Flush any remaining zeroes. + if run_length: + buffer[offset] = (run_length - 1) << 10 + offset += 1 + + # Add 1 or 2 end-of-block codes depending on whether the number of 16-bit + # values output so far is odd or even. Some emulators will break if blocks + # are not 32-bit aligned. + buffer[offset] = 0xfe00 + offset += 1 + if offset % 2: + buffer[offset] = 0xfe00 + offset += 1 + + return offset + +def encode_macroblock(buffer, block, y_scale, c_scale): + #y, cb, cr = rgb_to_ycbcr_planar(block) + y, cb, cr = block.transpose(( 2, 0, 1 )) + offset = 0 + + # Split the macroblock into 6 monochrome 8x8 blocks (Cr, Cb at half + # resolution + Y1-4). The MDEC uses 4:2:0 chroma subsampling. + # TODO: use bilinear sampling instead of nearest-neighbor for chroma + offset += encode_block(buffer[offset:], cr[0:16:2, 0:16:2], c_scale) + offset += encode_block(buffer[offset:], cb[0:16:2, 0:16:2], c_scale) + offset += encode_block(buffer[offset:], y[0: 8, 0: 8], y_scale) + offset += encode_block(buffer[offset:], y[0: 8, 8:16], y_scale) + offset += encode_block(buffer[offset:], y[8:16, 0: 8], y_scale) + offset += encode_block(buffer[offset:], y[8:16, 8:16], y_scale) + + return offset + +## Main + +def get_args(): + parser = ArgumentParser( + description = "Generates uncompressed MDEC bitstream data from an image." + ) + parser.add_argument( + "input_file", + type = FileType("rb"), + help = "input image file" + ) + parser.add_argument( + "-o", "--output", + type = FileType("wb"), + default = "image.bin", + help = "where to output converted image data (image.bin by default)", + metavar = "file" + ) + parser.add_argument( + "-m", "--monochrome", + action = "store_true", + help = "encode image as monochrome (8x8 blocks) instead of color (16x16 macroblocks)" + ) + parser.add_argument( + "-y", "--luma", + type = int, + default = LUMA_SCALE, + help = f"quantization scale for luma/monochrome blocks (0-63, default {LUMA_SCALE})", + metavar = "scale" + ) + parser.add_argument( + "-c", "--chroma", + type = int, + default = CHROMA_SCALE, + help = f"quantization scale for chroma blocks (0-63, default {CHROMA_SCALE})", + metavar = "scale" + ) + + return parser.parse_args() + +def main(): + args = get_args() + if args.luma < 0 or args.luma > 63: + raise ValueError("luma quantization scale must be in 0-63 range") + if args.chroma < 0 or args.chroma > 63: + raise ValueError("chroma quantization scale must be in 0-63 range") + + image = Image.open(args.input_file, "r") + data = numpy.array(image.convert("YCbCr"), numpy.uint8) + size = 8 if args.monochrome else 16 + + if image.width % size: + warn(RuntimeWarning(f"image width is not a multiple of {size}, trimming")) + if image.height % size: + warn(RuntimeWarning(f"image height is not a multiple of {size}, trimming")) + + # Preallocate 1 MB for the converted image data (faster than expanding an + # array dynamically -- this script is too slow already). + buffer = numpy.empty(0x80000, numpy.uint16) + offset = 0 + + # Split the image into 8x8 or 16x16 blocks and encode them in column-major + # order. + for x in range(0, image.width, size): + for y in range(0, image.height, size): + block = data[y:(y + size), x:(x + size)] + + if args.monochrome: + offset += encode_block(buffer[offset:], block[:, :, 0], args.luma) + else: + offset += encode_macroblock(buffer[offset:], block, args.luma, args.chroma) + + # Pad the generated data to the size of a DMA chunk (32x 32-bit words or + # 128 bytes). + length = (offset + 63) & 0xffffffc0 + buffer[offset:length] = 0xfe00 + + if length > (0xffff * 2): + warn(RuntimeWarning("image is too large to be decoded with a single DecDCTin() call")) + + with args.output as _file: + buffer[0:length].tofile(_file) + +if __name__ == "__main__": + main() diff --git a/examples/mdec/mdecimage/image.bin b/examples/mdec/mdecimage/image.bin Binary files differnew file mode 100644 index 0000000..976b4b6 --- /dev/null +++ b/examples/mdec/mdecimage/image.bin diff --git a/examples/mdec/mdecimage/main.c b/examples/mdec/mdecimage/main.c new file mode 100644 index 0000000..b59fdaf --- /dev/null +++ b/examples/mdec/mdecimage/main.c @@ -0,0 +1,82 @@ +/* + * PSn00bSDK MDEC static image example + * (C) 2022 spicyjpeg - MPL licensed + * + * This is a modified version of the graphics/rgb24 example showing how to feed + * run-length encoded data into the MDEC and retrieve a decoded 24bpp image. To + * keep the example simple no additional compression is applied (usually MDEC + * data would be Huffman encoded to save more space, with the initial + * decompression being done in software). A Python script is included to encode + * an image into the format expected by the MDEC; quality and file size can be + * tweaked by changing the quantization scales with the -y and -c arguments. + * + * Using the MDEC to decode static images can be useful for e.g. menu + * backgrounds or loading screens, where smaller file sizes are desirable even + * if quality is sacrificed. + */ + +#include <stdint.h> +#include <stddef.h> +#include <psxgpu.h> +#include <psxpress.h> +#include <hwregs_c.h> + +extern const uint32_t mdec_image[]; +extern const size_t mdec_image_size; + +#define SCREEN_XRES 640 +#define SCREEN_YRES 480 + +//#define BLOCK_SIZE 8 // Monochrome (8x8), 15bpp display +//#define BLOCK_SIZE 12 // Monochrome (8x8), 24bpp display +//#define BLOCK_SIZE 16 // Color (16x16), 15bpp display +#define BLOCK_SIZE 24 // Color (16x16), 24bpp display + +int main(int argc, const char* argv[]) { + DISPENV disp; + + ResetGraph(0); + DecDCTReset(0); + + // Set up the GPU for 640x480 interlaced 24bpp output. + SetDefDispEnv(&disp, 0, 0, SCREEN_XRES, SCREEN_YRES); + disp.isrgb24 = 1; + disp.isinter = 1; + + PutDispEnv(&disp); + SetDispMask(1); + + // Start feeding image data to the MDEC. This doesn't immediately start the + // decoding, instead the MDEC will wait until a destination buffer is also + // set up. + MDEC0 = 0x30000000 | (mdec_image_size / 4); // 0x38000000 for 15bpp + DecDCTinRaw(mdec_image, mdec_image_size / 4); + + // Fetch decoded data from the MDEC in vertical 8x480 or 16x480 "slices". + // This is necessary as the MDEC doesn't buffer an entire frame but only + // returns a series of square macroblocks, which can't be placed into VRAM + // with a single LoadImage() call. + //for (uint32_t x = 0; x < SCREEN_XRES; x += BLOCK_SIZE) { // 15bpp + for (uint32_t x = 0; x < (SCREEN_XRES * 3 / 2); x += BLOCK_SIZE) { // 24bpp + RECT rect; + uint32_t slice[BLOCK_SIZE * SCREEN_YRES / 2]; + + rect.x = x; + rect.y = 0; + rect.w = BLOCK_SIZE; + rect.h = SCREEN_YRES; + + // Configure the MDEC to output to the slice buffer and let it finish + // decoding a slice, then upload it to the framebuffer. + DecDCTout(slice, BLOCK_SIZE * SCREEN_YRES / 2); + DecDCToutSync(0); + + LoadImage(&rect, (u_long *) slice); + DrawSync(0); + } + + for (;;) + __asm__ volatile(""); + + return 0; +} diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h index ad5f6a3..b060170 100644 --- a/libpsn00b/include/psxpress.h +++ b/libpsn00b/include/psxpress.h @@ -7,6 +7,7 @@ #define __PSXPRESS_H #include <stdint.h> +#include <stddef.h> /* Structure definitions */ diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c index 82e2465..ca4c75a 100644 --- a/libpsn00b/psxpress/mdec.c +++ b/libpsn00b/psxpress/mdec.c @@ -11,7 +11,7 @@ #define MDEC_SYNC_TIMEOUT 0x1000000 -/* Default IDCT matrix */ +/* Default IDCT matrix and quantization tables */ #define S0 0x5a82 // 0x4000 * cos(0/16 * pi) * sqrt(2) #define S1 0x7d8a // 0x4000 * cos(1/16 * pi) * 2 @@ -22,54 +22,63 @@ #define S6 0x30fb // 0x4000 * cos(6/16 * pi) * 2 #define S7 0x18f8 // 0x4000 * cos(7/16 * pi) * 2 -static const int16_t _default_idct_matrix[] = { - S0, S0, S0, S0, S0, S0, S0, S0, - S1, S3, S5, S7, -S7, -S5, -S3, -S1, - S2, S6, -S6, -S2, -S2, -S6, S6, S2, - S3, -S7, -S1, -S5, S5, S1, S7, -S3, - S4, -S4, -S4, S4, S4, -S4, -S4, S4, - S5, -S1, S7, S3, -S3, -S7, S1, -S5, - S6, -S2, S2, -S6, -S6, S2, -S2, S6, - S7, -S5, S3, -S1, S1, -S3, S5, -S7 -}; - -/* Default quantization tables */ - -// The default luma and chroma quantization table is based on the MPEG-1 -// quantization table, with the only difference being the first value (2 -// instead of 8). Note that quantization tables are stored in zigzag order -// rather than row- or column-major. -// https://problemkaputt.de/psx-spx.htm#mdecdecompression -static const uint8_t _default_quant_table[] = { - 2, 16, 16, 19, 16, 19, 22, 22, - 22, 22, 22, 22, 26, 24, 26, 27, - 27, 27, 26, 26, 26, 26, 27, 27, - 27, 29, 29, 29, 34, 34, 34, 29, - 29, 29, 27, 27, 29, 29, 32, 32, - 34, 34, 37, 38, 37, 35, 35, 34, - 35, 38, 38, 40, 40, 40, 48, 48, - 46, 46, 56, 56, 58, 69, 69, 83 -}; -/*static const uint8_t _jpeg_y_quant_table[] = { - 16, 11, 12, 14, 12, 10, 16, 14, - 13, 14, 18, 17, 16, 19, 24, 40, - 26, 24, 22, 22, 24, 49, 35, 37, - 29, 40, 58, 51, 61, 60, 57, 51, - 56, 55, 64, 72, 92, 78, 64, 68, - 87, 69, 55, 56, 80, 109, 81, 87, - 95, 98, 103, 104, 103, 62, 77, 113, - 121, 112, 100, 120, 92, 101, 103, 99 +static const DECDCTENV _default_mdec_env = { + // The default luma and chroma quantization table is based on the MPEG-1 + // quantization table, with the only difference being the first value (2 + // instead of 8). Note that quantization tables are stored in zigzag order + // rather than row- or column-major. + // https://problemkaputt.de/psx-spx.htm#mdecdecompression + .iq_y = { + 2, 16, 16, 19, 16, 19, 22, 22, + 22, 22, 22, 22, 26, 24, 26, 27, + 27, 27, 26, 26, 26, 26, 27, 27, + 27, 29, 29, 29, 34, 34, 34, 29, + 29, 29, 27, 27, 29, 29, 32, 32, + 34, 34, 37, 38, 37, 35, 35, 34, + 35, 38, 38, 40, 40, 40, 48, 48, + 46, 46, 56, 56, 58, 69, 69, 83 + }, + .iq_c = { + 2, 16, 16, 19, 16, 19, 22, 22, + 22, 22, 22, 22, 26, 24, 26, 27, + 27, 27, 26, 26, 26, 26, 27, 27, + 27, 29, 29, 29, 34, 34, 34, 29, + 29, 29, 27, 27, 29, 29, 32, 32, + 34, 34, 37, 38, 37, 35, 35, 34, + 35, 38, 38, 40, 40, 40, 48, 48, + 46, 46, 56, 56, 58, 69, 69, 83 + }, + /*.iq_y = { + 16, 11, 12, 14, 12, 10, 16, 14, + 13, 14, 18, 17, 16, 19, 24, 40, + 26, 24, 22, 22, 24, 49, 35, 37, + 29, 40, 58, 51, 61, 60, 57, 51, + 56, 55, 64, 72, 92, 78, 64, 68, + 87, 69, 55, 56, 80, 109, 81, 87, + 95, 98, 103, 104, 103, 62, 77, 113, + 121, 112, 100, 120, 92, 101, 103, 99 + }, + .iq_c = { + 17, 18, 18, 24, 21, 24, 47, 26, + 26, 47, 99, 66, 56, 66, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 + },*/ + .dct = { + S0, S0, S0, S0, S0, S0, S0, S0, + S1, S3, S5, S7, -S7, -S5, -S3, -S1, + S2, S6, -S6, -S2, -S2, -S6, S6, S2, + S3, -S7, -S1, -S5, S5, S1, S7, -S3, + S4, -S4, -S4, S4, S4, -S4, -S4, S4, + S5, -S1, S7, S3, -S3, -S7, S1, -S5, + S6, -S2, S2, -S6, -S6, S2, -S2, S6, + S7, -S5, S3, -S1, S1, -S3, S5, -S7 + } }; -static const uint8_t _jpeg_c_quant_table[] = { - 17, 18, 18, 24, 21, 24, 47, 26, - 26, 47, 99, 66, 56, 66, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 -};*/ /* Public API */ @@ -88,24 +97,16 @@ void DecDCTReset(int32_t mode) { } void DecDCTPutEnv(const DECDCTENV *env, int32_t mono) { - const int16_t *dct = env ? env->dct : _default_idct_matrix; - const uint8_t *iq_y = env ? env->iq_y : _default_quant_table; - const uint8_t *iq_c = env ? env->iq_c : _default_quant_table; - + const DECDCTENV *_env = env ? env : &_default_mdec_env; DecDCTinSync(0); MDEC0 = 0x60000000; // Set IDCT matrix - DecDCTinRaw((const uint32_t *) dct, 32); + DecDCTinRaw((const uint32_t *) _env->dct, 32); DecDCTinSync(0); MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s) - DecDCTinRaw((const uint32_t *) iq_y, 16); + DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32); DecDCTinSync(0); - - if (!mono) { - DecDCTinRaw((const uint32_t *) iq_c, 16); - DecDCTinSync(0); - } } void DecDCTin(const uint32_t *data, int32_t mode) { |
