aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorspicyjpeg <88942473+spicyjpeg@users.noreply.github.com>2022-03-20 14:02:42 +0100
committerspicyjpeg <88942473+spicyjpeg@users.noreply.github.com>2022-03-20 14:02:42 +0100
commit6c19e712e2588b52791f604feb31273acb074d41 (patch)
treef864f22e4eb10b15e8c247d9eccf0113f54a62aa
parent4bbfe640a8c357137524e797a8d2bd0a94d3abfa (diff)
downloadpsn00bsdk-6c19e712e2588b52791f604feb31273acb074d41.tar.gz
Add mdec/mdecimage example, psxpress fixes
-rw-r--r--examples/mdec/mdecimage/CMakeLists.txt20
-rw-r--r--examples/mdec/mdecimage/bunpattern.pngbin0 -> 59932 bytes
-rw-r--r--examples/mdec/mdecimage/encode_image.py218
-rw-r--r--examples/mdec/mdecimage/image.binbin0 -> 163072 bytes
-rw-r--r--examples/mdec/mdecimage/main.c82
-rw-r--r--libpsn00b/include/psxpress.h1
-rw-r--r--libpsn00b/psxpress/mdec.c119
7 files changed, 381 insertions, 59 deletions
diff --git a/examples/mdec/mdecimage/CMakeLists.txt b/examples/mdec/mdecimage/CMakeLists.txt
new file mode 100644
index 0000000..b76adb4
--- /dev/null
+++ b/examples/mdec/mdecimage/CMakeLists.txt
@@ -0,0 +1,20 @@
+# PSn00bSDK example CMake script
+# (C) 2021 spicyjpeg - MPL licensed
+
+cmake_minimum_required(VERSION 3.20)
+
+project(
+ mdecimage
+ LANGUAGES C ASM
+ VERSION 1.0.0
+ DESCRIPTION "PSn00bSDK MDEC static image example"
+ HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk"
+)
+
+file(GLOB _sources *.c)
+psn00bsdk_add_executable(mdecimage STATIC ${_sources})
+#psn00bsdk_add_cd_image(mdecimage_iso mdecimage iso.xml DEPENDS mdecimage)
+
+psn00bsdk_target_incbin(mdecimage PRIVATE mdec_image image.bin)
+
+install(FILES ${PROJECT_BINARY_DIR}/mdecimage.exe TYPE BIN)
diff --git a/examples/mdec/mdecimage/bunpattern.png b/examples/mdec/mdecimage/bunpattern.png
new file mode 100644
index 0000000..61524f8
--- /dev/null
+++ b/examples/mdec/mdecimage/bunpattern.png
Binary files differ
diff --git a/examples/mdec/mdecimage/encode_image.py b/examples/mdec/mdecimage/encode_image.py
new file mode 100644
index 0000000..3a5bcea
--- /dev/null
+++ b/examples/mdec/mdecimage/encode_image.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+# Simple MDEC image encoder (requires PIL/Pillow and NumPy to be installed)
+# (C) 2022 spicyjpeg - MPL licensed
+
+import math
+from warnings import warn
+from argparse import ArgumentParser, FileType
+
+import numpy
+from PIL import Image
+
+LUMA_SCALE = 8
+CHROMA_SCALE = 16
+
+## Tables
+
+ZIGZAG_TABLE = numpy.array((
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+), numpy.uint8).argsort()
+
+# The default luma and chroma quantization table is based on the MPEG-1
+# quantization table, with the only difference being the first value (2 instead
+# of 8).
+QUANT_TABLE = numpy.array((
+ 2, 16, 19, 22, 26, 27, 29, 34,
+ 16, 16, 22, 24, 27, 29, 34, 37,
+ 19, 22, 26, 27, 29, 34, 34, 38,
+ 22, 22, 26, 27, 29, 34, 37, 40,
+ 22, 26, 27, 29, 32, 35, 40, 48,
+ 26, 27, 29, 32, 35, 40, 48, 58,
+ 26, 27, 29, 34, 38, 46, 56, 69,
+ 27, 29, 35, 38, 46, 56, 69, 83
+), numpy.uint8).reshape(( 8, 8 ))
+
+S = [ math.cos((i or 4) / 16 * math.pi) / 2 for i in range(8) ]
+
+DCT_MATRIX = numpy.array((
+ S[0], S[0], S[0], S[0], S[0], S[0], S[0], S[0],
+ S[1], S[3], S[5], S[7], -S[7], -S[5], -S[3], -S[1],
+ S[2], S[6], -S[6], -S[2], -S[2], -S[6], S[6], S[2],
+ S[3], -S[7], -S[1], -S[5], S[5], S[1], S[7], -S[3],
+ S[4], -S[4], -S[4], S[4], S[4], -S[4], -S[4], S[4],
+ S[5], -S[1], S[7], S[3], -S[3], -S[7], S[1], -S[5],
+ S[6], -S[2], S[2], -S[6], -S[6], S[2], -S[2], S[6],
+ S[7], -S[5], S[3], -S[1], S[1], -S[3], S[5], -S[7]
+), numpy.float32).reshape(( 8, 8 ))
+
+## Helpers
+
+def to_int10(value):
+ clamped = min(max(int(value), -0x200), 0x1ff)
+
+ return clamped + (0 if clamped >= 0 else 0x400)
+
+def rgb_to_ycbcr_planar(image):
+ scaled = image.astype(numpy.float32) / 255.0
+ r, g, b = scaled.transpose(( 2, 0, 1 ))
+
+ # https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+ y = 16 + r * 65.481 + g * 128.553 + b * 24.966
+ cb = 128 - r * 37.797 - g * 74.203 + b * 112.000
+ cr = 128 + r * 112.000 - g * 93.786 - b * 18.214
+
+ return y, cb, cr
+
+## Block encoder
+
+def encode_block(buffer, block, scale):
+ # Perform discrete cosine transform on the block, divide the coefficients by
+ # the quantization table and reorder them in zigzag order.
+ _block = block.astype(numpy.float32) - 128.0
+ coeffs = (DCT_MATRIX @ _block @ DCT_MATRIX.T) / QUANT_TABLE
+ coeffs = coeffs.reshape(( 64, ))[ZIGZAG_TABLE]
+
+ buffer[0] = (scale << 10) | to_int10(round(coeffs[0]))
+ offset = 1
+
+ # Divide the AC coefficients by the given quantization scale and encode them
+ # as run-length pairs by counting how many zeroes there are between each
+ # non-zero value.
+ ac_values = coeffs[1:] * 8.0 / scale
+ encoded = []
+ run_length = 0
+
+ for ac in ac_values.round().astype(numpy.int32):
+ if ac:
+ buffer[offset] = (run_length << 10) | to_int10(ac)
+ offset += 1
+
+ run_length = 0
+ else:
+ run_length += 1
+
+ # Flush any remaining zeroes.
+ if run_length:
+ buffer[offset] = (run_length - 1) << 10
+ offset += 1
+
+ # Add 1 or 2 end-of-block codes depending on whether the number of 16-bit
+ # values output so far is odd or even. Some emulators will break if blocks
+ # are not 32-bit aligned.
+ buffer[offset] = 0xfe00
+ offset += 1
+ if offset % 2:
+ buffer[offset] = 0xfe00
+ offset += 1
+
+ return offset
+
+def encode_macroblock(buffer, block, y_scale, c_scale):
+ #y, cb, cr = rgb_to_ycbcr_planar(block)
+ y, cb, cr = block.transpose(( 2, 0, 1 ))
+ offset = 0
+
+ # Split the macroblock into 6 monochrome 8x8 blocks (Cr, Cb at half
+ # resolution + Y1-4). The MDEC uses 4:2:0 chroma subsampling.
+ # TODO: use bilinear sampling instead of nearest-neighbor for chroma
+ offset += encode_block(buffer[offset:], cr[0:16:2, 0:16:2], c_scale)
+ offset += encode_block(buffer[offset:], cb[0:16:2, 0:16:2], c_scale)
+ offset += encode_block(buffer[offset:], y[0: 8, 0: 8], y_scale)
+ offset += encode_block(buffer[offset:], y[0: 8, 8:16], y_scale)
+ offset += encode_block(buffer[offset:], y[8:16, 0: 8], y_scale)
+ offset += encode_block(buffer[offset:], y[8:16, 8:16], y_scale)
+
+ return offset
+
+## Main
+
+def get_args():
+ parser = ArgumentParser(
+ description = "Generates uncompressed MDEC bitstream data from an image."
+ )
+ parser.add_argument(
+ "input_file",
+ type = FileType("rb"),
+ help = "input image file"
+ )
+ parser.add_argument(
+ "-o", "--output",
+ type = FileType("wb"),
+ default = "image.bin",
+ help = "where to output converted image data (image.bin by default)",
+ metavar = "file"
+ )
+ parser.add_argument(
+ "-m", "--monochrome",
+ action = "store_true",
+ help = "encode image as monochrome (8x8 blocks) instead of color (16x16 macroblocks)"
+ )
+ parser.add_argument(
+ "-y", "--luma",
+ type = int,
+ default = LUMA_SCALE,
+ help = f"quantization scale for luma/monochrome blocks (0-63, default {LUMA_SCALE})",
+ metavar = "scale"
+ )
+ parser.add_argument(
+ "-c", "--chroma",
+ type = int,
+ default = CHROMA_SCALE,
+ help = f"quantization scale for chroma blocks (0-63, default {CHROMA_SCALE})",
+ metavar = "scale"
+ )
+
+ return parser.parse_args()
+
+def main():
+ args = get_args()
+ if args.luma < 0 or args.luma > 63:
+ raise ValueError("luma quantization scale must be in 0-63 range")
+ if args.chroma < 0 or args.chroma > 63:
+ raise ValueError("chroma quantization scale must be in 0-63 range")
+
+ image = Image.open(args.input_file, "r")
+ data = numpy.array(image.convert("YCbCr"), numpy.uint8)
+ size = 8 if args.monochrome else 16
+
+ if image.width % size:
+ warn(RuntimeWarning(f"image width is not a multiple of {size}, trimming"))
+ if image.height % size:
+ warn(RuntimeWarning(f"image height is not a multiple of {size}, trimming"))
+
+ # Preallocate 1 MB for the converted image data (faster than expanding an
+ # array dynamically -- this script is too slow already).
+ buffer = numpy.empty(0x80000, numpy.uint16)
+ offset = 0
+
+ # Split the image into 8x8 or 16x16 blocks and encode them in column-major
+ # order.
+ for x in range(0, image.width, size):
+ for y in range(0, image.height, size):
+ block = data[y:(y + size), x:(x + size)]
+
+ if args.monochrome:
+ offset += encode_block(buffer[offset:], block[:, :, 0], args.luma)
+ else:
+ offset += encode_macroblock(buffer[offset:], block, args.luma, args.chroma)
+
+ # Pad the generated data to the size of a DMA chunk (32x 32-bit words or
+ # 128 bytes).
+ length = (offset + 63) & 0xffffffc0
+ buffer[offset:length] = 0xfe00
+
+ if length > (0xffff * 2):
+ warn(RuntimeWarning("image is too large to be decoded with a single DecDCTin() call"))
+
+ with args.output as _file:
+ buffer[0:length].tofile(_file)
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/mdec/mdecimage/image.bin b/examples/mdec/mdecimage/image.bin
new file mode 100644
index 0000000..976b4b6
--- /dev/null
+++ b/examples/mdec/mdecimage/image.bin
Binary files differ
diff --git a/examples/mdec/mdecimage/main.c b/examples/mdec/mdecimage/main.c
new file mode 100644
index 0000000..b59fdaf
--- /dev/null
+++ b/examples/mdec/mdecimage/main.c
@@ -0,0 +1,82 @@
+/*
+ * PSn00bSDK MDEC static image example
+ * (C) 2022 spicyjpeg - MPL licensed
+ *
+ * This is a modified version of the graphics/rgb24 example showing how to feed
+ * run-length encoded data into the MDEC and retrieve a decoded 24bpp image. To
+ * keep the example simple no additional compression is applied (usually MDEC
+ * data would be Huffman encoded to save more space, with the initial
+ * decompression being done in software). A Python script is included to encode
+ * an image into the format expected by the MDEC; quality and file size can be
+ * tweaked by changing the quantization scales with the -y and -c arguments.
+ *
+ * Using the MDEC to decode static images can be useful for e.g. menu
+ * backgrounds or loading screens, where smaller file sizes are desirable even
+ * if quality is sacrificed.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <psxgpu.h>
+#include <psxpress.h>
+#include <hwregs_c.h>
+
+extern const uint32_t mdec_image[];
+extern const size_t mdec_image_size;
+
+#define SCREEN_XRES 640
+#define SCREEN_YRES 480
+
+//#define BLOCK_SIZE 8 // Monochrome (8x8), 15bpp display
+//#define BLOCK_SIZE 12 // Monochrome (8x8), 24bpp display
+//#define BLOCK_SIZE 16 // Color (16x16), 15bpp display
+#define BLOCK_SIZE 24 // Color (16x16), 24bpp display
+
+int main(int argc, const char* argv[]) {
+ DISPENV disp;
+
+ ResetGraph(0);
+ DecDCTReset(0);
+
+ // Set up the GPU for 640x480 interlaced 24bpp output.
+ SetDefDispEnv(&disp, 0, 0, SCREEN_XRES, SCREEN_YRES);
+ disp.isrgb24 = 1;
+ disp.isinter = 1;
+
+ PutDispEnv(&disp);
+ SetDispMask(1);
+
+ // Start feeding image data to the MDEC. This doesn't immediately start the
+ // decoding, instead the MDEC will wait until a destination buffer is also
+ // set up.
+ MDEC0 = 0x30000000 | (mdec_image_size / 4); // 0x38000000 for 15bpp
+ DecDCTinRaw(mdec_image, mdec_image_size / 4);
+
+ // Fetch decoded data from the MDEC in vertical 8x480 or 16x480 "slices".
+ // This is necessary as the MDEC doesn't buffer an entire frame but only
+ // returns a series of square macroblocks, which can't be placed into VRAM
+ // with a single LoadImage() call.
+ //for (uint32_t x = 0; x < SCREEN_XRES; x += BLOCK_SIZE) { // 15bpp
+ for (uint32_t x = 0; x < (SCREEN_XRES * 3 / 2); x += BLOCK_SIZE) { // 24bpp
+ RECT rect;
+ uint32_t slice[BLOCK_SIZE * SCREEN_YRES / 2];
+
+ rect.x = x;
+ rect.y = 0;
+ rect.w = BLOCK_SIZE;
+ rect.h = SCREEN_YRES;
+
+ // Configure the MDEC to output to the slice buffer and let it finish
+ // decoding a slice, then upload it to the framebuffer.
+ DecDCTout(slice, BLOCK_SIZE * SCREEN_YRES / 2);
+ DecDCToutSync(0);
+
+ LoadImage(&rect, (u_long *) slice);
+ DrawSync(0);
+ }
+
+ for (;;)
+ __asm__ volatile("");
+
+ return 0;
+}
diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h
index ad5f6a3..b060170 100644
--- a/libpsn00b/include/psxpress.h
+++ b/libpsn00b/include/psxpress.h
@@ -7,6 +7,7 @@
#define __PSXPRESS_H
#include <stdint.h>
+#include <stddef.h>
/* Structure definitions */
diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c
index 82e2465..ca4c75a 100644
--- a/libpsn00b/psxpress/mdec.c
+++ b/libpsn00b/psxpress/mdec.c
@@ -11,7 +11,7 @@
#define MDEC_SYNC_TIMEOUT 0x1000000
-/* Default IDCT matrix */
+/* Default IDCT matrix and quantization tables */
#define S0 0x5a82 // 0x4000 * cos(0/16 * pi) * sqrt(2)
#define S1 0x7d8a // 0x4000 * cos(1/16 * pi) * 2
@@ -22,54 +22,63 @@
#define S6 0x30fb // 0x4000 * cos(6/16 * pi) * 2
#define S7 0x18f8 // 0x4000 * cos(7/16 * pi) * 2
-static const int16_t _default_idct_matrix[] = {
- S0, S0, S0, S0, S0, S0, S0, S0,
- S1, S3, S5, S7, -S7, -S5, -S3, -S1,
- S2, S6, -S6, -S2, -S2, -S6, S6, S2,
- S3, -S7, -S1, -S5, S5, S1, S7, -S3,
- S4, -S4, -S4, S4, S4, -S4, -S4, S4,
- S5, -S1, S7, S3, -S3, -S7, S1, -S5,
- S6, -S2, S2, -S6, -S6, S2, -S2, S6,
- S7, -S5, S3, -S1, S1, -S3, S5, -S7
-};
-
-/* Default quantization tables */
-
-// The default luma and chroma quantization table is based on the MPEG-1
-// quantization table, with the only difference being the first value (2
-// instead of 8). Note that quantization tables are stored in zigzag order
-// rather than row- or column-major.
-// https://problemkaputt.de/psx-spx.htm#mdecdecompression
-static const uint8_t _default_quant_table[] = {
- 2, 16, 16, 19, 16, 19, 22, 22,
- 22, 22, 22, 22, 26, 24, 26, 27,
- 27, 27, 26, 26, 26, 26, 27, 27,
- 27, 29, 29, 29, 34, 34, 34, 29,
- 29, 29, 27, 27, 29, 29, 32, 32,
- 34, 34, 37, 38, 37, 35, 35, 34,
- 35, 38, 38, 40, 40, 40, 48, 48,
- 46, 46, 56, 56, 58, 69, 69, 83
-};
-/*static const uint8_t _jpeg_y_quant_table[] = {
- 16, 11, 12, 14, 12, 10, 16, 14,
- 13, 14, 18, 17, 16, 19, 24, 40,
- 26, 24, 22, 22, 24, 49, 35, 37,
- 29, 40, 58, 51, 61, 60, 57, 51,
- 56, 55, 64, 72, 92, 78, 64, 68,
- 87, 69, 55, 56, 80, 109, 81, 87,
- 95, 98, 103, 104, 103, 62, 77, 113,
- 121, 112, 100, 120, 92, 101, 103, 99
+static const DECDCTENV _default_mdec_env = {
+ // The default luma and chroma quantization table is based on the MPEG-1
+ // quantization table, with the only difference being the first value (2
+ // instead of 8). Note that quantization tables are stored in zigzag order
+ // rather than row- or column-major.
+ // https://problemkaputt.de/psx-spx.htm#mdecdecompression
+ .iq_y = {
+ 2, 16, 16, 19, 16, 19, 22, 22,
+ 22, 22, 22, 22, 26, 24, 26, 27,
+ 27, 27, 26, 26, 26, 26, 27, 27,
+ 27, 29, 29, 29, 34, 34, 34, 29,
+ 29, 29, 27, 27, 29, 29, 32, 32,
+ 34, 34, 37, 38, 37, 35, 35, 34,
+ 35, 38, 38, 40, 40, 40, 48, 48,
+ 46, 46, 56, 56, 58, 69, 69, 83
+ },
+ .iq_c = {
+ 2, 16, 16, 19, 16, 19, 22, 22,
+ 22, 22, 22, 22, 26, 24, 26, 27,
+ 27, 27, 26, 26, 26, 26, 27, 27,
+ 27, 29, 29, 29, 34, 34, 34, 29,
+ 29, 29, 27, 27, 29, 29, 32, 32,
+ 34, 34, 37, 38, 37, 35, 35, 34,
+ 35, 38, 38, 40, 40, 40, 48, 48,
+ 46, 46, 56, 56, 58, 69, 69, 83
+ },
+ /*.iq_y = {
+ 16, 11, 12, 14, 12, 10, 16, 14,
+ 13, 14, 18, 17, 16, 19, 24, 40,
+ 26, 24, 22, 22, 24, 49, 35, 37,
+ 29, 40, 58, 51, 61, 60, 57, 51,
+ 56, 55, 64, 72, 92, 78, 64, 68,
+ 87, 69, 55, 56, 80, 109, 81, 87,
+ 95, 98, 103, 104, 103, 62, 77, 113,
+ 121, 112, 100, 120, 92, 101, 103, 99
+ },
+ .iq_c = {
+ 17, 18, 18, 24, 21, 24, 47, 26,
+ 26, 47, 99, 66, 56, 66, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99,
+ 99, 99, 99, 99, 99, 99, 99, 99
+ },*/
+ .dct = {
+ S0, S0, S0, S0, S0, S0, S0, S0,
+ S1, S3, S5, S7, -S7, -S5, -S3, -S1,
+ S2, S6, -S6, -S2, -S2, -S6, S6, S2,
+ S3, -S7, -S1, -S5, S5, S1, S7, -S3,
+ S4, -S4, -S4, S4, S4, -S4, -S4, S4,
+ S5, -S1, S7, S3, -S3, -S7, S1, -S5,
+ S6, -S2, S2, -S6, -S6, S2, -S2, S6,
+ S7, -S5, S3, -S1, S1, -S3, S5, -S7
+ }
};
-static const uint8_t _jpeg_c_quant_table[] = {
- 17, 18, 18, 24, 21, 24, 47, 26,
- 26, 47, 99, 66, 56, 66, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99,
- 99, 99, 99, 99, 99, 99, 99, 99
-};*/
/* Public API */
@@ -88,24 +97,16 @@ void DecDCTReset(int32_t mode) {
}
void DecDCTPutEnv(const DECDCTENV *env, int32_t mono) {
- const int16_t *dct = env ? env->dct : _default_idct_matrix;
- const uint8_t *iq_y = env ? env->iq_y : _default_quant_table;
- const uint8_t *iq_c = env ? env->iq_c : _default_quant_table;
-
+ const DECDCTENV *_env = env ? env : &_default_mdec_env;
DecDCTinSync(0);
MDEC0 = 0x60000000; // Set IDCT matrix
- DecDCTinRaw((const uint32_t *) dct, 32);
+ DecDCTinRaw((const uint32_t *) _env->dct, 32);
DecDCTinSync(0);
MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s)
- DecDCTinRaw((const uint32_t *) iq_y, 16);
+ DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32);
DecDCTinSync(0);
-
- if (!mono) {
- DecDCTinRaw((const uint32_t *) iq_c, 16);
- DecDCTinSync(0);
- }
}
void DecDCTin(const uint32_t *data, int32_t mode) {