Add mdec/mdecimage example, psxpress fixes

author: spicyjpeg <88942473+spicyjpeg@users.noreply.github.com> 2022-03-20 14:02:42 +0100
committer: spicyjpeg <88942473+spicyjpeg@users.noreply.github.com> 2022-03-20 14:02:42 +0100
commit: 6c19e712e2588b52791f604feb31273acb074d41 (patch)
tree: f864f22e4eb10b15e8c247d9eccf0113f54a62aa
parent: 4bbfe640a8c357137524e797a8d2bd0a94d3abfa (diff)
download: psn00bsdk-6c19e712e2588b52791f604feb31273acb074d41.tar.gz
7 files changed, 381 insertions, 59 deletions
diff --git a/examples/mdec/mdecimage/CMakeLists.txt b/examples/mdec/mdecimage/CMakeLists.txt
new file mode 100644
index 0000000..b76adb4
--- /dev/null
+++ b/examples/mdec/mdecimage/CMakeLists.txt
@@ -0,0 +1,20 @@
+# PSn00bSDK example CMake script
+# (C) 2021 spicyjpeg - MPL licensed
+
+cmake_minimum_required(VERSION 3.20)
+
+project(
+	mdecimage
+	LANGUAGES    C ASM
+	VERSION      1.0.0
+	DESCRIPTION  "PSn00bSDK MDEC static image example"
+	HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk"
+)
+
+file(GLOB _sources *.c)
+psn00bsdk_add_executable(mdecimage STATIC ${_sources})
+#psn00bsdk_add_cd_image(mdecimage_iso mdecimage iso.xml DEPENDS mdecimage)
+
+psn00bsdk_target_incbin(mdecimage PRIVATE mdec_image image.bin)
+
+install(FILES ${PROJECT_BINARY_DIR}/mdecimage.exe TYPE BIN)
diff --git a/examples/mdec/mdecimage/bunpattern.png b/examples/mdec/mdecimage/bunpattern.png
new file mode 100644
index 0000000..61524f8
--- /dev/null
+++ b/examples/mdec/mdecimage/bunpattern.png
diff --git a/examples/mdec/mdecimage/encode_image.py b/examples/mdec/mdecimage/encode_image.py
new file mode 100644
index 0000000..3a5bcea
--- /dev/null
+++ b/examples/mdec/mdecimage/encode_image.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+# Simple MDEC image encoder (requires PIL/Pillow and NumPy to be installed)
+# (C) 2022 spicyjpeg - MPL licensed
+
+import math
+from warnings import warn
+from argparse import ArgumentParser, FileType
+
+import numpy
+from PIL import Image
+
+LUMA_SCALE   = 8
+CHROMA_SCALE = 16
+
+## Tables
+
+ZIGZAG_TABLE = numpy.array((
+	 0,  1,  5,  6, 14, 15, 27, 28,
+	 2,  4,  7, 13, 16, 26, 29, 42,
+	 3,  8, 12, 17, 25, 30, 41, 43,
+	 9, 11, 18, 24, 31, 40, 44, 53,
+	10, 19, 23, 32, 39, 45, 52, 54,
+	20, 22, 33, 38, 46, 51, 55, 60,
+	21, 34, 37, 47, 50, 56, 59, 61,
+	35, 36, 48, 49, 57, 58, 62, 63
+), numpy.uint8).argsort()
+
+# The default luma and chroma quantization table is based on the MPEG-1
+# quantization table, with the only difference being the first value (2 instead
+# of 8).
+QUANT_TABLE = numpy.array((
+	 2, 16, 19, 22, 26, 27, 29, 34,
+	16, 16, 22, 24, 27, 29, 34, 37,
+	19, 22, 26, 27, 29, 34, 34, 38,
+	22, 22, 26, 27, 29, 34, 37, 40,
+	22, 26, 27, 29, 32, 35, 40, 48,
+	26, 27, 29, 32, 35, 40, 48, 58,
+	26, 27, 29, 34, 38, 46, 56, 69,
+	27, 29, 35, 38, 46, 56, 69, 83
+), numpy.uint8).reshape(( 8, 8 ))
+
+S = [ math.cos((i or 4) / 16 * math.pi) / 2 for i in range(8) ]
+
+DCT_MATRIX = numpy.array((
+	 S[0],  S[0],  S[0],  S[0],  S[0],  S[0],  S[0],  S[0],
+	 S[1],  S[3],  S[5],  S[7], -S[7], -S[5], -S[3], -S[1],
+	 S[2],  S[6], -S[6], -S[2], -S[2], -S[6],  S[6],  S[2],
+	 S[3], -S[7], -S[1], -S[5],  S[5],  S[1],  S[7], -S[3],
+	 S[4], -S[4], -S[4],  S[4],  S[4], -S[4], -S[4],  S[4],
+	 S[5], -S[1],  S[7],  S[3], -S[3], -S[7],  S[1], -S[5],
+	 S[6], -S[2],  S[2], -S[6], -S[6],  S[2], -S[2],  S[6],
+	 S[7], -S[5],  S[3], -S[1],  S[1], -S[3],  S[5], -S[7]
+), numpy.float32).reshape(( 8, 8 ))
+
+## Helpers
+
+def to_int10(value):
+	clamped = min(max(int(value), -0x200), 0x1ff)
+
+	return clamped + (0 if clamped >= 0 else 0x400)
+
+def rgb_to_ycbcr_planar(image):
+	scaled  = image.astype(numpy.float32) / 255.0
+	r, g, b = scaled.transpose(( 2, 0, 1 ))
+
+	# https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+	y  =  16 + r *  65.481 + g * 128.553 + b *  24.966
+	cb = 128 - r *  37.797 - g *  74.203 + b * 112.000
+	cr = 128 + r * 112.000 - g *  93.786 - b *  18.214
+
+	return y, cb, cr
+
+## Block encoder
+
+def encode_block(buffer, block, scale):
+	# Perform discrete cosine transform on the block, divide the coefficients by
+	# the quantization table and reorder them in zigzag order.
+	_block = block.astype(numpy.float32) - 128.0
+	coeffs = (DCT_MATRIX @ _block @ DCT_MATRIX.T) / QUANT_TABLE
+	coeffs = coeffs.reshape(( 64, ))[ZIGZAG_TABLE]
+
+	buffer[0] = (scale << 10) | to_int10(round(coeffs[0]))
+	offset    = 1
+
+	# Divide the AC coefficients by the given quantization scale and encode them
+	# as run-length pairs by counting how many zeroes there are between each
+	# non-zero value.
+	ac_values  = coeffs[1:] * 8.0 / scale
+	encoded    = []
+	run_length = 0
+
+	for ac in ac_values.round().astype(numpy.int32):
+		if ac:
+			buffer[offset] = (run_length << 10) | to_int10(ac)
+			offset += 1
+
+			run_length  = 0
+		else:
+			run_length += 1
+
+	# Flush any remaining zeroes.
+	if run_length:
+		buffer[offset] = (run_length - 1) << 10
+		offset += 1
+
+	# Add 1 or 2 end-of-block codes depending on whether the number of 16-bit
+	# values output so far is odd or even. Some emulators will break if blocks
+	# are not 32-bit aligned.
+	buffer[offset] = 0xfe00
+	offset += 1
+	if offset % 2:
+		buffer[offset] = 0xfe00
+		offset += 1
+
+	return offset
+
+def encode_macroblock(buffer, block, y_scale, c_scale):
+	#y, cb, cr = rgb_to_ycbcr_planar(block)
+	y, cb, cr = block.transpose(( 2, 0, 1 ))
+	offset    = 0
+
+	# Split the macroblock into 6 monochrome 8x8 blocks (Cr, Cb at half
+	# resolution + Y1-4). The MDEC uses 4:2:0 chroma subsampling.
+	# TODO: use bilinear sampling instead of nearest-neighbor for chroma
+	offset += encode_block(buffer[offset:], cr[0:16:2, 0:16:2], c_scale)
+	offset += encode_block(buffer[offset:], cb[0:16:2, 0:16:2], c_scale)
+	offset += encode_block(buffer[offset:], y[0: 8, 0: 8], y_scale)
+	offset += encode_block(buffer[offset:], y[0: 8, 8:16], y_scale)
+	offset += encode_block(buffer[offset:], y[8:16, 0: 8], y_scale)
+	offset += encode_block(buffer[offset:], y[8:16, 8:16], y_scale)
+
+	return offset
+
+## Main
+
+def get_args():
+	parser = ArgumentParser(
+		description = "Generates uncompressed MDEC bitstream data from an image."
+	)
+	parser.add_argument(
+		"input_file",
+		type = FileType("rb"),
+		help = "input image file"
+	)
+	parser.add_argument(
+		"-o", "--output",
+		type    = FileType("wb"),
+		default = "image.bin",
+		help    = "where to output converted image data (image.bin by default)",
+		metavar = "file"
+	)
+	parser.add_argument(
+		"-m", "--monochrome",
+		action = "store_true",
+		help   = "encode image as monochrome (8x8 blocks) instead of color (16x16 macroblocks)"
+	)
+	parser.add_argument(
+		"-y", "--luma",
+		type    = int,
+		default = LUMA_SCALE,
+		help    = f"quantization scale for luma/monochrome blocks (0-63, default {LUMA_SCALE})",
+		metavar = "scale"
+	)
+	parser.add_argument(
+		"-c", "--chroma",
+		type    = int,
+		default = CHROMA_SCALE,
+		help    = f"quantization scale for chroma blocks (0-63, default {CHROMA_SCALE})",
+		metavar = "scale"
+	)
+
+	return parser.parse_args()
+
+def main():
+	args = get_args()
+	if args.luma < 0 or args.luma > 63:
+		raise ValueError("luma quantization scale must be in 0-63 range")
+	if args.chroma < 0 or args.chroma > 63:
+		raise ValueError("chroma quantization scale must be in 0-63 range")
+
+	image = Image.open(args.input_file, "r")
+	data  = numpy.array(image.convert("YCbCr"), numpy.uint8)
+	size  = 8 if args.monochrome else 16
+
+	if image.width % size:
+		warn(RuntimeWarning(f"image width is not a multiple of {size}, trimming"))
+	if image.height % size:
+		warn(RuntimeWarning(f"image height is not a multiple of {size}, trimming"))
+
+	# Preallocate 1 MB for the converted image data (faster than expanding an
+	# array dynamically -- this script is too slow already).
+	buffer = numpy.empty(0x80000, numpy.uint16)
+	offset = 0
+
+	# Split the image into 8x8 or 16x16 blocks and encode them in column-major
+	# order.
+	for x in range(0, image.width, size):
+		for y in range(0, image.height, size):
+			block = data[y:(y + size), x:(x + size)]
+
+			if args.monochrome:
+				offset += encode_block(buffer[offset:], block[:, :, 0], args.luma)
+			else:
+				offset += encode_macroblock(buffer[offset:], block, args.luma, args.chroma)
+
+	# Pad the generated data to the size of a DMA chunk (32x 32-bit words or
+	# 128 bytes).
+	length = (offset + 63) & 0xffffffc0
+	buffer[offset:length] = 0xfe00
+
+	if length > (0xffff * 2):
+		warn(RuntimeWarning("image is too large to be decoded with a single DecDCTin() call"))
+
+	with args.output as _file:
+		buffer[0:length].tofile(_file)
+
+if __name__ == "__main__":
+	main()
diff --git a/examples/mdec/mdecimage/image.bin b/examples/mdec/mdecimage/image.bin
new file mode 100644
index 0000000..976b4b6
--- /dev/null
+++ b/examples/mdec/mdecimage/image.bin
diff --git a/examples/mdec/mdecimage/main.c b/examples/mdec/mdecimage/main.c
new file mode 100644
index 0000000..b59fdaf
--- /dev/null
+++ b/examples/mdec/mdecimage/main.c
@@ -0,0 +1,82 @@
+/*
+ * PSn00bSDK MDEC static image example
+ * (C) 2022 spicyjpeg - MPL licensed
+ *
+ * This is a modified version of the graphics/rgb24 example showing how to feed
+ * run-length encoded data into the MDEC and retrieve a decoded 24bpp image. To
+ * keep the example simple no additional compression is applied (usually MDEC
+ * data would be Huffman encoded to save more space, with the initial
+ * decompression being done in software). A Python script is included to encode
+ * an image into the format expected by the MDEC; quality and file size can be
+ * tweaked by changing the quantization scales with the -y and -c arguments.
+ *
+ * Using the MDEC to decode static images can be useful for e.g. menu
+ * backgrounds or loading screens, where smaller file sizes are desirable even
+ * if quality is sacrificed.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <psxgpu.h>
+#include <psxpress.h>
+#include <hwregs_c.h>
+
+extern const uint32_t	mdec_image[];
+extern const size_t		mdec_image_size;
+
+#define SCREEN_XRES 640
+#define SCREEN_YRES 480
+
+//#define BLOCK_SIZE 8	// Monochrome (8x8), 15bpp display
+//#define BLOCK_SIZE 12	// Monochrome (8x8), 24bpp display
+//#define BLOCK_SIZE 16	// Color (16x16), 15bpp display
+#define BLOCK_SIZE 24	// Color (16x16), 24bpp display
+
+int main(int argc, const char* argv[]) {
+	DISPENV disp;
+
+	ResetGraph(0);
+	DecDCTReset(0);
+
+	// Set up the GPU for 640x480 interlaced 24bpp output.
+	SetDefDispEnv(&disp, 0, 0, SCREEN_XRES, SCREEN_YRES);
+	disp.isrgb24 = 1;
+	disp.isinter = 1;
+
+	PutDispEnv(&disp);
+	SetDispMask(1);
+
+	// Start feeding image data to the MDEC. This doesn't immediately start the
+	// decoding, instead the MDEC will wait until a destination buffer is also
+	// set up.
+	MDEC0 = 0x30000000 | (mdec_image_size / 4); // 0x38000000 for 15bpp
+	DecDCTinRaw(mdec_image, mdec_image_size / 4);
+
+	// Fetch decoded data from the MDEC in vertical 8x480 or 16x480 "slices".
+	// This is necessary as the MDEC doesn't buffer an entire frame but only
+	// returns a series of square macroblocks, which can't be placed into VRAM
+	// with a single LoadImage() call.
+	//for (uint32_t x = 0; x < SCREEN_XRES; x += BLOCK_SIZE) {			// 15bpp
+	for (uint32_t x = 0; x < (SCREEN_XRES * 3 / 2); x += BLOCK_SIZE) {	// 24bpp
+		RECT     rect;
+		uint32_t slice[BLOCK_SIZE * SCREEN_YRES / 2];
+
+		rect.x = x;
+		rect.y = 0;
+		rect.w = BLOCK_SIZE;
+		rect.h = SCREEN_YRES;
+
+		// Configure the MDEC to output to the slice buffer and let it finish
+		// decoding a slice, then upload it to the framebuffer.
+		DecDCTout(slice, BLOCK_SIZE * SCREEN_YRES / 2);
+		DecDCToutSync(0);
+
+		LoadImage(&rect, (u_long *) slice);
+		DrawSync(0);
+	}
+
+	for (;;)
+		__asm__ volatile("");
+
+	return 0;
+}
diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h
index ad5f6a3..b060170 100644
--- a/libpsn00b/include/psxpress.h
+++ b/libpsn00b/include/psxpress.h
@@ -7,6 +7,7 @@
 #define __PSXPRESS_H
 
 #include <stdint.h>
+#include <stddef.h>
 
 /* Structure definitions */
 
diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c
index 82e2465..ca4c75a 100644
--- a/libpsn00b/psxpress/mdec.c
+++ b/libpsn00b/psxpress/mdec.c
@@ -11,7 +11,7 @@
 
 #define MDEC_SYNC_TIMEOUT 0x1000000
 
-/* Default IDCT matrix */
+/* Default IDCT matrix and quantization tables */
 
 #define S0 0x5a82	// 0x4000 * cos(0/16 * pi) * sqrt(2)
 #define S1 0x7d8a	// 0x4000 * cos(1/16 * pi) * 2
@@ -22,54 +22,63 @@
 #define S6 0x30fb	// 0x4000 * cos(6/16 * pi) * 2
 #define S7 0x18f8	// 0x4000 * cos(7/16 * pi) * 2
 
-static const int16_t _default_idct_matrix[] = {
-	S0,  S0,  S0,  S0,  S0,  S0,  S0,  S0,
-	S1,  S3,  S5,  S7, -S7, -S5, -S3, -S1,
-	S2,  S6, -S6, -S2, -S2, -S6,  S6,  S2,
-	S3, -S7, -S1, -S5,  S5,  S1,  S7, -S3,
-	S4, -S4, -S4,  S4,  S4, -S4, -S4,  S4,
-	S5, -S1,  S7,  S3, -S3, -S7,  S1, -S5,
-	S6, -S2,  S2, -S6, -S6,  S2, -S2,  S6,
-	S7, -S5,  S3, -S1,  S1, -S3,  S5, -S7
-};
-
-/* Default quantization tables */
-
-// The default luma and chroma quantization table is based on the MPEG-1
-// quantization table, with the only difference being the first value (2
-// instead of 8). Note that quantization tables are stored in zigzag order
-// rather than row- or column-major.
-// https://problemkaputt.de/psx-spx.htm#mdecdecompression
-static const uint8_t _default_quant_table[] = {
-	  2,  16,  16,  19,  16,  19,  22,  22,
-	 22,  22,  22,  22,  26,  24,  26,  27,
-	 27,  27,  26,  26,  26,  26,  27,  27,
-	 27,  29,  29,  29,  34,  34,  34,  29,
-	 29,  29,  27,  27,  29,  29,  32,  32,
-	 34,  34,  37,  38,  37,  35,  35,  34,
-	 35,  38,  38,  40,  40,  40,  48,  48,
-	 46,  46,  56,  56,  58,  69,  69,  83
-};
-/*static const uint8_t _jpeg_y_quant_table[] = {
-	 16,  11,  12,  14,  12,  10,  16,  14,
-	 13,  14,  18,  17,  16,  19,  24,  40,
-	 26,  24,  22,  22,  24,  49,  35,  37,
-	 29,  40,  58,  51,  61,  60,  57,  51,
-	 56,  55,  64,  72,  92,  78,  64,  68,
-	 87,  69,  55,  56,  80, 109,  81,  87,
-	 95,  98, 103, 104, 103,  62,  77, 113,
-	121, 112, 100, 120,  92, 101, 103,  99
+static const DECDCTENV _default_mdec_env = {
+	// The default luma and chroma quantization table is based on the MPEG-1
+	// quantization table, with the only difference being the first value (2
+	// instead of 8). Note that quantization tables are stored in zigzag order
+	// rather than row- or column-major.
+	// https://problemkaputt.de/psx-spx.htm#mdecdecompression
+	.iq_y = {
+		 2, 16, 16, 19, 16, 19, 22, 22,
+		22, 22, 22, 22, 26, 24, 26, 27,
+		27, 27, 26, 26, 26, 26, 27, 27,
+		27, 29, 29, 29, 34, 34, 34, 29,
+		29, 29, 27, 27, 29, 29, 32, 32,
+		34, 34, 37, 38, 37, 35, 35, 34,
+		35, 38, 38, 40, 40, 40, 48, 48,
+		46, 46, 56, 56, 58, 69, 69, 83
+	},
+	.iq_c = {
+		 2, 16, 16, 19, 16, 19, 22, 22,
+		22, 22, 22, 22, 26, 24, 26, 27,
+		27, 27, 26, 26, 26, 26, 27, 27,
+		27, 29, 29, 29, 34, 34, 34, 29,
+		29, 29, 27, 27, 29, 29, 32, 32,
+		34, 34, 37, 38, 37, 35, 35, 34,
+		35, 38, 38, 40, 40, 40, 48, 48,
+		46, 46, 56, 56, 58, 69, 69, 83
+	},
+	/*.iq_y = {
+		 16,  11,  12,  14,  12,  10,  16,  14,
+		 13,  14,  18,  17,  16,  19,  24,  40,
+		 26,  24,  22,  22,  24,  49,  35,  37,
+		 29,  40,  58,  51,  61,  60,  57,  51,
+		 56,  55,  64,  72,  92,  78,  64,  68,
+		 87,  69,  55,  56,  80, 109,  81,  87,
+		 95,  98, 103, 104, 103,  62,  77, 113,
+		121, 112, 100, 120,  92, 101, 103,  99
+	},
+	.iq_c = {
+		 17,  18,  18,  24,  21,  24,  47,  26,
+		 26,  47,  99,  66,  56,  66,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99,
+		 99,  99,  99,  99,  99,  99,  99,  99
+	},*/
+	.dct = {
+		S0,  S0,  S0,  S0,  S0,  S0,  S0,  S0,
+		S1,  S3,  S5,  S7, -S7, -S5, -S3, -S1,
+		S2,  S6, -S6, -S2, -S2, -S6,  S6,  S2,
+		S3, -S7, -S1, -S5,  S5,  S1,  S7, -S3,
+		S4, -S4, -S4,  S4,  S4, -S4, -S4,  S4,
+		S5, -S1,  S7,  S3, -S3, -S7,  S1, -S5,
+		S6, -S2,  S2, -S6, -S6,  S2, -S2,  S6,
+		S7, -S5,  S3, -S1,  S1, -S3,  S5, -S7
+	}
 };
-static const uint8_t _jpeg_c_quant_table[] = {
-	 17,  18,  18,  24,  21,  24,  47,  26,
-	 26,  47,  99,  66,  56,  66,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99,
-	 99,  99,  99,  99,  99,  99,  99,  99
-};*/
 
 /* Public API */
 
@@ -88,24 +97,16 @@ void DecDCTReset(int32_t mode) {
 }
 
 void DecDCTPutEnv(const DECDCTENV *env, int32_t mono) {
-	const int16_t *dct  = env ? env->dct  : _default_idct_matrix;
-	const uint8_t *iq_y = env ? env->iq_y : _default_quant_table;
-	const uint8_t *iq_c = env ? env->iq_c : _default_quant_table;
-
+	const DECDCTENV *_env = env ? env : &_default_mdec_env;
 	DecDCTinSync(0);
 
 	MDEC0 = 0x60000000; // Set IDCT matrix
-	DecDCTinRaw((const uint32_t *) dct, 32);
+	DecDCTinRaw((const uint32_t *) _env->dct, 32);
 	DecDCTinSync(0);
 
 	MDEC0 = 0x40000000 | (mono ? 0 : 1); // Set table(s)
-	DecDCTinRaw((const uint32_t *) iq_y, 16);
+	DecDCTinRaw((const uint32_t *) _env->iq_y, mono ? 16 : 32);
 	DecDCTinSync(0);
-
-	if (!mono) {
-		DecDCTinRaw((const uint32_t *) iq_c, 16);
-		DecDCTinSync(0);
-	}
 }
 
 void DecDCTin(const uint32_t *data, int32_t mode) {
author	spicyjpeg <88942473+spicyjpeg@users.noreply.github.com>	2022-03-20 14:02:42 +0100
committer	spicyjpeg <88942473+spicyjpeg@users.noreply.github.com>	2022-03-20 14:02:42 +0100
commit	6c19e712e2588b52791f604feb31273acb074d41 (patch)
tree	f864f22e4eb10b15e8c247d9eccf0113f54a62aa
parent	4bbfe640a8c357137524e797a8d2bd0a94d3abfa (diff)
download	psn00bsdk-6c19e712e2588b52791f604feb31273acb074d41.tar.gz