examples/mdec/mdecimage/encode_image.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218

#!/usr/bin/env python3
# Simple MDEC image encoder (requires PIL/Pillow and NumPy to be installed)
# (C) 2022 spicyjpeg - MPL licensed

import math
from warnings import warn
from argparse import ArgumentParser, FileType

import numpy
from PIL import Image

LUMA_SCALE   = 8
CHROMA_SCALE = 16

## Tables

ZIGZAG_TABLE = numpy.array((
	 0,  1,  5,  6, 14, 15, 27, 28,
	 2,  4,  7, 13, 16, 26, 29, 42,
	 3,  8, 12, 17, 25, 30, 41, 43,
	 9, 11, 18, 24, 31, 40, 44, 53,
	10, 19, 23, 32, 39, 45, 52, 54,
	20, 22, 33, 38, 46, 51, 55, 60,
	21, 34, 37, 47, 50, 56, 59, 61,
	35, 36, 48, 49, 57, 58, 62, 63
), numpy.uint8).argsort()

# The default luma and chroma quantization table is based on the MPEG-1
# quantization table, with the only difference being the first value (2 instead
# of 8).
QUANT_TABLE = numpy.array((
	 2, 16, 19, 22, 26, 27, 29, 34,
	16, 16, 22, 24, 27, 29, 34, 37,
	19, 22, 26, 27, 29, 34, 34, 38,
	22, 22, 26, 27, 29, 34, 37, 40,
	22, 26, 27, 29, 32, 35, 40, 48,
	26, 27, 29, 32, 35, 40, 48, 58,
	26, 27, 29, 34, 38, 46, 56, 69,
	27, 29, 35, 38, 46, 56, 69, 83
), numpy.uint8).reshape(( 8, 8 ))

S = [ math.cos((i or 4) / 16 * math.pi) / 2 for i in range(8) ]

DCT_MATRIX = numpy.array((
	 S[0],  S[0],  S[0],  S[0],  S[0],  S[0],  S[0],  S[0],
	 S[1],  S[3],  S[5],  S[7], -S[7], -S[5], -S[3], -S[1],
	 S[2],  S[6], -S[6], -S[2], -S[2], -S[6],  S[6],  S[2],
	 S[3], -S[7], -S[1], -S[5],  S[5],  S[1],  S[7], -S[3],
	 S[4], -S[4], -S[4],  S[4],  S[4], -S[4], -S[4],  S[4],
	 S[5], -S[1],  S[7],  S[3], -S[3], -S[7],  S[1], -S[5],
	 S[6], -S[2],  S[2], -S[6], -S[6],  S[2], -S[2],  S[6],
	 S[7], -S[5],  S[3], -S[1],  S[1], -S[3],  S[5], -S[7]
), numpy.float32).reshape(( 8, 8 ))

## Helpers

def to_int10(value):
	clamped = min(max(int(value), -0x200), 0x1ff)

	return clamped + (0 if clamped >= 0 else 0x400)

def rgb_to_ycbcr_planar(image):
	scaled  = image.astype(numpy.float32) / 255.0
	r, g, b = scaled.transpose(( 2, 0, 1 ))

	# https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
	y  =  16 + r *  65.481 + g * 128.553 + b *  24.966
	cb = 128 - r *  37.797 - g *  74.203 + b * 112.000
	cr = 128 + r * 112.000 - g *  93.786 - b *  18.214

	return y, cb, cr

## Block encoder

def encode_block(buffer, block, scale):
	# Perform discrete cosine transform on the block, divide the coefficients by
	# the quantization table and reorder them in zigzag order.
	_block = block.astype(numpy.float32) - 128.0
	coeffs = (DCT_MATRIX @ _block @ DCT_MATRIX.T) / QUANT_TABLE
	coeffs = coeffs.reshape(( 64, ))[ZIGZAG_TABLE]

	buffer[0] = (scale << 10) | to_int10(round(coeffs[0]))
	offset    = 1

	# Divide the AC coefficients by the given quantization scale and encode them
	# as run-length pairs by counting how many zeroes there are between each
	# non-zero value.
	ac_values  = coeffs[1:] * 8.0 / scale
	encoded    = []
	run_length = 0

	for ac in ac_values.round().astype(numpy.int32):
		if ac:
			buffer[offset] = (run_length << 10) | to_int10(ac)
			offset += 1

			run_length  = 0
		else:
			run_length += 1

	# Flush any remaining zeroes.
	if run_length:
		buffer[offset] = (run_length - 1) << 10
		offset += 1

	# Add 1 or 2 end-of-block codes depending on whether the number of 16-bit
	# values output so far is odd or even. Some emulators will break if blocks
	# are not 32-bit aligned.
	buffer[offset] = 0xfe00
	offset += 1
	if offset % 2:
		buffer[offset] = 0xfe00
		offset += 1

	return offset

def encode_macroblock(buffer, block, y_scale, c_scale):
	#y, cb, cr = rgb_to_ycbcr_planar(block)
	y, cb, cr = block.transpose(( 2, 0, 1 ))
	offset    = 0

	# Split the macroblock into 6 monochrome 8x8 blocks (Cr, Cb at half
	# resolution + Y1-4). The MDEC uses 4:2:0 chroma subsampling.
	# TODO: use bilinear sampling instead of nearest-neighbor for chroma
	offset += encode_block(buffer[offset:], cr[0:16:2, 0:16:2], c_scale)
	offset += encode_block(buffer[offset:], cb[0:16:2, 0:16:2], c_scale)
	offset += encode_block(buffer[offset:], y[0: 8, 0: 8], y_scale)
	offset += encode_block(buffer[offset:], y[0: 8, 8:16], y_scale)
	offset += encode_block(buffer[offset:], y[8:16, 0: 8], y_scale)
	offset += encode_block(buffer[offset:], y[8:16, 8:16], y_scale)

	return offset

## Main

def get_args():
	parser = ArgumentParser(
		description = "Generates uncompressed MDEC bitstream data from an image."
	)
	parser.add_argument(
		"input_file",
		type = FileType("rb"),
		help = "input image file"
	)
	parser.add_argument(
		"-o", "--output",
		type    = FileType("wb"),
		default = "image.bin",
		help    = "where to output converted image data (image.bin by default)",
		metavar = "file"
	)
	parser.add_argument(
		"-m", "--monochrome",
		action = "store_true",
		help   = "encode image as monochrome (8x8 blocks) instead of color (16x16 macroblocks)"
	)
	parser.add_argument(
		"-y", "--luma",
		type    = int,
		default = LUMA_SCALE,
		help    = f"quantization scale for luma/monochrome blocks (0-63, default {LUMA_SCALE})",
		metavar = "scale"
	)
	parser.add_argument(
		"-c", "--chroma",
		type    = int,
		default = CHROMA_SCALE,
		help    = f"quantization scale for chroma blocks (0-63, default {CHROMA_SCALE})",
		metavar = "scale"
	)

	return parser.parse_args()

def main():
	args = get_args()
	if args.luma < 0 or args.luma > 63:
		raise ValueError("luma quantization scale must be in 0-63 range")
	if args.chroma < 0 or args.chroma > 63:
		raise ValueError("chroma quantization scale must be in 0-63 range")

	image = Image.open(args.input_file, "r")
	data  = numpy.array(image.convert("YCbCr"), numpy.uint8)
	size  = 8 if args.monochrome else 16

	if image.width % size:
		warn(RuntimeWarning(f"image width is not a multiple of {size}, trimming"))
	if image.height % size:
		warn(RuntimeWarning(f"image height is not a multiple of {size}, trimming"))

	# Preallocate 1 MB for the converted image data (faster than expanding an
	# array dynamically -- this script is too slow already).
	buffer = numpy.empty(0x80000, numpy.uint16)
	offset = 0

	# Split the image into 8x8 or 16x16 blocks and encode them in column-major
	# order.
	for x in range(0, image.width, size):
		for y in range(0, image.height, size):
			block = data[y:(y + size), x:(x + size)]

			if args.monochrome:
				offset += encode_block(buffer[offset:], block[:, :, 0], args.luma)
			else:
				offset += encode_macroblock(buffer[offset:], block, args.luma, args.chroma)

	# Pad the generated data to the size of a DMA chunk (32x 32-bit words or
	# 128 bytes).
	length = (offset + 63) & 0xffffffc0
	buffer[offset:length] = 0xfe00

	if length > (0xffff * 2):
		warn(RuntimeWarning("image is too large to be decoded with a single DecDCTin() call"))

	with args.output as _file:
		buffer[0:length].tofile(_file)

if __name__ == "__main__":
	main()