aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn "Lameguy" Wilbert Villamor <lameguy64@gmail.com>2022-11-03 10:14:22 +0800
committerGitHub <noreply@github.com>2022-11-03 10:14:22 +0800
commit4139331d233b7a962e747c5564fa68a285f81cc8 (patch)
treed4d3374afd5e36e8580cc424ab2c63ee9e7d357c
parente08a3d9366f8ca14a76b3dd569dac1fb9f569748 (diff)
parent37d963f724113e45d15aa9b8ee86baa9c4362b8f (diff)
downloadpsn00bsdk-4139331d233b7a962e747c5564fa68a285f81cc8.tar.gz
Merge pull request #60 from spicyjpeg/bugfix
Bugfixes, new serial port API and sound examples
-rw-r--r--CHANGELOG.md32
-rw-r--r--doc/dev_notes.md16
-rw-r--r--doc/known_bugs.md9
-rw-r--r--examples/README.md7
-rw-r--r--examples/graphics/gte/main.c6
-rw-r--r--examples/sound/cdstream/CMakeLists.txt23
-rw-r--r--examples/sound/cdstream/iso.xml (renamed from examples/sound/spustream/iso.xml)8
-rw-r--r--examples/sound/cdstream/main.c439
-rw-r--r--examples/sound/cdstream/stream.vagbin0 -> 4646912 bytes
-rw-r--r--examples/sound/cdstream/system.cnf (renamed from examples/sound/spustream/system.cnf)2
-rw-r--r--examples/sound/spustream/CMakeLists.txt16
-rw-r--r--examples/sound/spustream/convert_stream.py112
-rw-r--r--examples/sound/spustream/interleave.py152
-rw-r--r--examples/sound/spustream/main.c360
-rw-r--r--examples/sound/spustream/stream.binbin4685824 -> 0 bytes
-rw-r--r--examples/sound/spustream/stream.vagbin0 -> 1140736 bytes
-rw-r--r--examples/sound/vagsample/3dfx.vagbin227936 -> 227968 bytes
-rw-r--r--examples/sound/vagsample/main.c455
-rw-r--r--examples/sound/vagsample/proyt.vagbin189264 -> 189248 bytes
-rw-r--r--examples/system/dynlink/main.c44
-rw-r--r--libpsn00b/CMakeLists.txt4
-rw-r--r--libpsn00b/cmake/internal_setup.cmake2
-rw-r--r--libpsn00b/include/assert.h23
-rw-r--r--libpsn00b/include/dlfcn.h237
-rw-r--r--libpsn00b/include/hwregs_a.inc22
-rw-r--r--libpsn00b/include/hwregs_c.h21
-rw-r--r--libpsn00b/include/inline_c.h173
-rw-r--r--libpsn00b/include/psxcd.h828
-rw-r--r--libpsn00b/include/psxetc.h196
-rw-r--r--libpsn00b/include/psxgpu.h105
-rw-r--r--libpsn00b/include/psxgte.h205
-rw-r--r--libpsn00b/include/psxpress.h158
-rw-r--r--libpsn00b/include/psxsio.h319
-rw-r--r--libpsn00b/include/psxspu.h39
-rw-r--r--libpsn00b/include/stdlib.h12
-rw-r--r--libpsn00b/libc/_start.s5
-rw-r--r--libpsn00b/libc/abort.c21
-rw-r--r--libpsn00b/libc/memset.s3
-rw-r--r--libpsn00b/psxcd/getsector.c4
-rw-r--r--libpsn00b/psxcd/isofs.c110
-rw-r--r--libpsn00b/psxcd/psxcd.c12
-rw-r--r--libpsn00b/psxetc/dl.c434
-rw-r--r--libpsn00b/psxetc/interrupts.c8
-rw-r--r--libpsn00b/psxgpu/common.c78
-rw-r--r--libpsn00b/psxgpu/image.c6
-rw-r--r--libpsn00b/psxgte/applymatrixlv.s40
-rw-r--r--libpsn00b/psxgte/compmatrixlv.s100
-rw-r--r--libpsn00b/psxgte/hirotmatrix.c35
-rw-r--r--libpsn00b/psxgte/hisin.c33
-rw-r--r--libpsn00b/psxgte/initgeom.s4
-rw-r--r--libpsn00b/psxgte/isin.c50
-rw-r--r--libpsn00b/psxgte/matrix.c34
-rw-r--r--libpsn00b/psxgte/matrix.s439
-rw-r--r--libpsn00b/psxgte/mulmatrix.s74
-rw-r--r--libpsn00b/psxgte/mulmatrix0.s74
-rw-r--r--libpsn00b/psxgte/pushpopmatrix.s68
-rw-r--r--libpsn00b/psxgte/scalematrix.s68
-rw-r--r--libpsn00b/psxgte/scalematrixl.s68
-rw-r--r--libpsn00b/psxgte/square0.s27
-rw-r--r--libpsn00b/psxgte/squareroot.s75
-rw-r--r--libpsn00b/psxgte/vector.s123
-rw-r--r--libpsn00b/psxgte/vectornormals.s110
-rw-r--r--libpsn00b/psxpress/mdec.c12
-rw-r--r--libpsn00b/psxpress/vlc.s153
-rw-r--r--libpsn00b/psxsio/_sio_control.s184
-rw-r--r--libpsn00b/psxsio/sio.c269
-rw-r--r--libpsn00b/psxsio/siocons.c220
-rw-r--r--libpsn00b/psxsio/tty.c107
-rw-r--r--libpsn00b/psxspu/common.c117
69 files changed, 4415 insertions, 2775 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 461d2b8..d6b49e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,37 @@ to ensure the changelog can be parsed correctly.
-------------------------------------------------------------------------------
-## 2022-10-16: 0.21
+# 2022-10-27
+
+spicyjpeg:
+
+- psxspu: Fixed bugs in `SpuInit()` and in `SpuWrite()` when using manual
+ transfer mode (`SPU_TRANSFER_BY_IO`). Added `SpuWritePartly()`.
+
+- psxetc: Added `IRQ_Channel` and `DMA_Channel` enums.
+
+- examples: Refactored and bugfixed all sound examples. Renamed the old
+ `spustream` example to `cdstream` and added a new `spustream` example
+ demonstrating SPU audio streaming from main RAM. Both streaming examples now
+ make use of the interleaved .VAG file format.
+
+## 2022-10-21: 0.21
+
+spicyjpeg:
+
+- libpsn00b: Cleaned up the internal logging system.
+
+- psxgpu: Added `MoveImage()` as well as `MoveImage2()`, `LoadImage2()` and
+ `StoreImage2()`. `LoadImage()` and `StoreImage()` now make use of the
+ library's internal drawing queue instead of blocking. Added `EnqueueDrawOp()`
+ for more advanced control of the drawing queue. The `getTPage()` macro now
+ supports extended Y coordinates (512-1023) on systems with 2 MB of VRAM.
+
+- psxsio: Removed `_sio_control()` and replaced it with a completely new
+ asynchronous buffered serial port driver. Rewritten the serial TTY driver to
+ make use of the new API.
+
+## 2022-10-16
spicyjpeg:
diff --git a/doc/dev_notes.md b/doc/dev_notes.md
index 3c3aa55..fa01756 100644
--- a/doc/dev_notes.md
+++ b/doc/dev_notes.md
@@ -135,13 +135,19 @@ _- spicyjpeg_
4. If no valid response is received, assume no controller is connected and
reset the port's digital-only flag.
-- I haven't worked on `psxspu` but, for those willing to write some code, this
- is the formula to calculate SPU pitch values for playing musical notes (`^`
- is the power operator, not xor):
+- The SPU *really* doesn't like 32-bit register writes. It is connected to the
+ CPU through a 16-bit bus; 32-bit writes are automatically split into two
+ transactions, however the SPU has a tendency to miss one of them (perhaps due
+ to the bus controller issuing them too quickly). This might be why nocash
+ docs claim that writing to the SPU is unstable when in actual fact 16-bit
+ writes seem to be perfectly stable.
+
+- This is the formula to calculate SPU pitch values for playing musical notes
+ (`^` is the power operator, not xor):
```
frequency = (ref / 32) * (2 ^ ((note - 9) / 12))
- spu_pitch = frequency / 44100 * 4096
+ spu_pitch = (frequency * 4096) / 44100
ref = frequency the sample should be played at to play a middle A (MIDI note 69)
note = MIDI note number (usually 0-127, 60 is middle C)
@@ -280,4 +286,4 @@ _- spicyjpeg_
space.
-----------------------------------------
-_Last updated on 2022-03-25 by lameguy64_
+_Last updated on 2022-10-30 by spicyjpeg_
diff --git a/doc/known_bugs.md b/doc/known_bugs.md
index 620a805..5aabaf1 100644
--- a/doc/known_bugs.md
+++ b/doc/known_bugs.md
@@ -37,16 +37,9 @@ fixed.
due to the SPU status register being emulated incorrectly. They work as
expected on other emulators as well as on real hardware.
-`psxetc`:
-
-- `DL_LoadSymbolMapFromFile()`, `DL_LoadDLLFromFile()` and `dlopen()` have been
- disabled due to bugs in the BIOS file APIs. The dynamic linker can still be
- used by loading DLL binaries into RAM manually and calling `DL_CreateDLL()`
- on them (see the `system/dynlink` example).
-
## Examples
See [README.md in the examples directory](../examples/README.md#examples-summary).
-----------------------------------------
-_Last updated on 2022-10-13 by spicyjpeg_
+_Last updated on 2022-10-30 by spicyjpeg_
diff --git a/examples/README.md b/examples/README.md
index ade94b0..ae601f1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -26,8 +26,9 @@ Additional information may be found in the source code of each example.
| [`lowlevel/cartrom`](./lowlevel/cartrom) | ROM firmware for cheat devices written using GNU GAS | ROM | 4 |
| [`mdec/mdecimage`](./mdec/mdecimage) | Displays a (raw) MDEC format image | EXE | |
| [`mdec/strvideo`](./mdec/strvideo) | Plays a .STR video file using the MDEC | CD | 1 |
-| [`sound/spustream`](./sound/spustream) | Custom (non XA) CD-ROM audio streaming using the SPU | CD | |
-| [`sound/vagsample`](./sound/vagsample) | Demonstrates playing VAG sound files using the SPU | EXE | |
+| [`sound/cdstream`](./sound/cdstream) | Streams an interleaved .VAG file from the CD-ROM | CD | |
+| [`sound/spustream`](./sound/spustream) | Streams an interleaved .VAG file from main RAM | EXE | |
+| [`sound/vagsample`](./sound/vagsample) | Loads and plays .VAG sound files using the SPU | EXE | |
| [`system/childexec`](./system/childexec) | Loading a child program and returning to parent | EXE | |
| [`system/console`](./system/console) | TTY based text console that interrupts gameplay | EXE | |
| [`system/dynlink`](./system/dynlink) | Demonstrates dynamically linked libraries | CD | |
@@ -85,4 +86,4 @@ are for rebuilding the examples *after* the SDK has been installed.
CD images for each example.
-----------------------------------------
-_Last updated on 2022-10-16 by spicyjpeg_
+_Last updated on 2022-10-27 by spicyjpeg_
diff --git a/examples/graphics/gte/main.c b/examples/graphics/gte/main.c
index 6907c84..3c85d84 100644
--- a/examples/graphics/gte/main.c
+++ b/examples/graphics/gte/main.c
@@ -100,9 +100,9 @@ INDEX cube_indices[] = {
/* source color when using gte_nccs(). 4096 is 1.0 in this matrix */
/* A column of zeroes disables the light source. */
MATRIX color_mtx = {
- ONE / 2, 0, 0, /* Red */
- ONE / 2, 0, 0, /* Green */
- ONE / 2, 0, 0 /* Blue */
+ ONE * 3/4, 0, 0, /* Red */
+ ONE * 3/4, 0, 0, /* Green */
+ ONE * 3/4, 0, 0 /* Blue */
};
/* Light matrix */
diff --git a/examples/sound/cdstream/CMakeLists.txt b/examples/sound/cdstream/CMakeLists.txt
new file mode 100644
index 0000000..e569449
--- /dev/null
+++ b/examples/sound/cdstream/CMakeLists.txt
@@ -0,0 +1,23 @@
+# PSn00bSDK example CMake script
+# (C) 2021 spicyjpeg - MPL licensed
+
+cmake_minimum_required(VERSION 3.21)
+
+project(
+ cdstream
+ LANGUAGES C
+ VERSION 1.0.0
+ DESCRIPTION "PSn00bSDK SPU CD audio streaming example"
+ HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk"
+)
+
+file(GLOB _sources *.c)
+psn00bsdk_add_executable(cdstream GPREL ${_sources})
+psn00bsdk_add_cd_image(cdstream_iso cdstream iso.xml DEPENDS cdstream)
+
+install(
+ FILES
+ ${PROJECT_BINARY_DIR}/cdstream.bin
+ ${PROJECT_BINARY_DIR}/cdstream.cue
+ TYPE BIN
+)
diff --git a/examples/sound/spustream/iso.xml b/examples/sound/cdstream/iso.xml
index 050d673..66f1f74 100644
--- a/examples/sound/spustream/iso.xml
+++ b/examples/sound/cdstream/iso.xml
@@ -6,8 +6,8 @@
<track type="data">
<identifiers
system ="PLAYSTATION"
- volume ="SPUSTREAM"
- volume_set ="SPUSTREAM"
+ volume ="CDSTREAM"
+ volume_set ="CDSTREAM"
publisher ="MEIDOTEK"
data_preparer ="PSN00BSDK ${PSN00BSDK_VERSION}"
application ="PLAYSTATION"
@@ -16,9 +16,9 @@
<directory_tree>
<file name="SYSTEM.CNF" type="data" source="${PROJECT_SOURCE_DIR}/system.cnf" />
- <file name="SPUSTRM.EXE" type="data" source="spustream.exe" />
+ <file name="CDSTREAM.EXE" type="data" source="cdstream.exe" />
- <file name="STREAM.BIN" type="data" source="${PROJECT_SOURCE_DIR}/stream.bin" />
+ <file name="STREAM.VAG" type="data" source="${PROJECT_SOURCE_DIR}/stream.vag" />
<dummy sectors="1024"/>
</directory_tree>
diff --git a/examples/sound/cdstream/main.c b/examples/sound/cdstream/main.c
new file mode 100644
index 0000000..636ef10
--- /dev/null
+++ b/examples/sound/cdstream/main.c
@@ -0,0 +1,439 @@
+/*
+ * PSn00bSDK SPU CD-ROM streaming example
+ * (C) 2022 spicyjpeg - MPL licensed
+ *
+ * This is an extended version of the sound/spustream example demonstrating
+ * playback of a large multi-channel audio file from the CD using the SPU,
+ * without having to rely on the CD drive's own ability to play CD-DA or XA
+ * tracks.
+ *
+ * The main difference from spustream is that the SPU IRQ handler does not
+ * upload a chunk from main RAM to SPU RAM immediately, it only sets a flag.
+ * The main loop checks if the flag has been set and starts reading the next
+ * chunk from the CD into a buffer in RAM asynchronously; the chunk is then
+ * uploaded to the SPU and the IRQ is re-enabled.
+ *
+ * Chunks are read once again from an interleaved .VAG file, laid out on the
+ * disc as follows:
+ *
+ * +--Sector--+--Sector--+--Sector--+--Sector--+--Sector--+--Sector--+----
+ * | | +--------------------+---------------------+ |
+ * | .VAG | | Left channel data | Right channel data | Padding | ...
+ * | header | +--------------------+---------------------+ |
+ * +----------+----------+----------+----------+----------+----------+----
+ * \__________________Chunk___________________/
+ *
+ * Note that chunks have to be large enough to give the drive enough time to
+ * seek from one chunk to another. The included .VAG file has been encoded with
+ * a chunk size of 0x7000 bytes, however you might want to try smaller sizes to
+ * reduce SPU RAM usage. Chunk size can be set by passing the -b option to the
+ * .VAG interleaving script included in the spustream directory.
+ *
+ * Implementing SPU streaming might seem pointless, but it actually has a
+ * number of advantages over CD-DA or XA:
+ *
+ * - Any sample rate up to 44.1 kHz can be used. The sample rate can also be
+ * changed on-the-fly to play the stream at different speeds and pitches (as
+ * long as the CD drive can keep up), or even interpolated for effects like
+ * tape stops.
+ * - Manual streaming is not limited to mono or stereo but can be expanded to
+ * as many channels as needed, only limited by the amount of SPU RAM required
+ * for chunks and CD bandwidth. Having more than 2 channels can be useful for
+ * e.g. smoothly crossfading between tracks (not possible with XA) or
+ * controlling volume and panning of each instrument separately.
+ * - XA playback tends to skip on consoles with a worn out drive, as XA sectors
+ * cannot have any error correction data. SPU streaming is not subject to
+ * this limitation since sectors are read and processed in software.
+ * - Depending on how streaming/interleaving is implemented it is possible to
+ * have 500-1000ms idle periods during which the CD drive isn't buffering the
+ * stream, that can be used to read small amounts of other data without ever
+ * interrupting playback. This is different from XA-style interleaving as the
+ * drive is free to seek to *any* region of the disc during these periods (it
+ * must seek back to the stream's next chunk afterwards though).
+ * - It is also possible to seek back to the beginning of the stream and load
+ * the first chunk before the end is reached, allowing for seamless looping
+ * without having to resort to tricks like separate filler samples.
+ * - Finally, SPU streaming can be used on some PS1-based arcade boards that
+ * use IDE/SCSI drives or flash memory for storage and thus lack support for
+ * XA or CD-DA playback.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <psxetc.h>
+#include <psxapi.h>
+#include <psxgpu.h>
+#include <psxpad.h>
+#include <psxspu.h>
+#include <psxcd.h>
+#include <hwregs_c.h>
+
+extern const uint8_t stream_data[];
+
+#define NUM_CHANNELS 2
+
+/* Display/GPU context utilities */
+
+#define SCREEN_XRES 320
+#define SCREEN_YRES 240
+
+#define BGCOLOR_R 48
+#define BGCOLOR_G 24
+#define BGCOLOR_B 0
+
+typedef struct {
+ DISPENV disp;
+ DRAWENV draw;
+} Framebuffer;
+
+typedef struct {
+ Framebuffer db[2];
+ int db_active;
+} RenderContext;
+
+void init_context(RenderContext *ctx) {
+ Framebuffer *db;
+
+ ResetGraph(0);
+ ctx->db_active = 0;
+
+ db = &(ctx->db[0]);
+ SetDefDispEnv(&(db->disp), 0, 0, SCREEN_XRES, SCREEN_YRES);
+ SetDefDrawEnv(&(db->draw), SCREEN_XRES, 0, SCREEN_XRES, SCREEN_YRES);
+ setRGB0(&(db->draw), BGCOLOR_R, BGCOLOR_G, BGCOLOR_B);
+ db->draw.isbg = 1;
+ db->draw.dtd = 1;
+
+ db = &(ctx->db[1]);
+ SetDefDispEnv(&(db->disp), SCREEN_XRES, 0, SCREEN_XRES, SCREEN_YRES);
+ SetDefDrawEnv(&(db->draw), 0, 0, SCREEN_XRES, SCREEN_YRES);
+ setRGB0(&(db->draw), BGCOLOR_R, BGCOLOR_G, BGCOLOR_B);
+ db->draw.isbg = 1;
+ db->draw.dtd = 1;
+
+ PutDrawEnv(&(db->draw));
+ //PutDispEnv(&(db->disp));
+
+ // Create a text stream at the top of the screen.
+ FntLoad(960, 0);
+ FntOpen(8, 16, 304, 208, 2, 512);
+}
+
+void display(RenderContext *ctx) {
+ Framebuffer *db;
+
+ DrawSync(0);
+ VSync(0);
+ ctx->db_active ^= 1;
+
+ db = &(ctx->db[ctx->db_active]);
+ PutDrawEnv(&(db->draw));
+ PutDispEnv(&(db->disp));
+ SetDispMask(1);
+}
+
+/* .VAG header structure */
+
+typedef struct {
+ uint32_t magic; // 0x69474156 ("VAGi") for interleaved files
+ uint32_t version;
+ uint32_t interleave; // Little-endian, size of each channel buffer
+ uint32_t size; // Big-endian, in bytes
+ uint32_t sample_rate; // Big-endian, in Hertz
+ uint32_t _reserved[3];
+ char name[16];
+} VAG_Header;
+
+#define SWAP_ENDIAN(x) ( \
+ (((uint32_t) (x) & 0x000000ff) << 24) | \
+ (((uint32_t) (x) & 0x0000ff00) << 8) | \
+ (((uint32_t) (x) & 0x00ff0000) >> 8) | \
+ (((uint32_t) (x) & 0xff000000) >> 24) \
+)
+
+/* Interrupt callbacks */
+
+// The first 4 KB of SPU RAM are reserved for capture buffers and psxspu
+// additionally uploads a dummy sample (16 bytes) at 0x1000 by default, so the
+// chunks must be placed after those. The dummy sample is going to be used to
+// keep unused SPU channels busy, preventing them from accidentally triggering
+// the SPU IRQ and throwing off the timing (all channels are always reading
+// from SPU RAM, even when "stopped").
+// https://problemkaputt.de/psx-spx.htm#spuinterrupt
+#define DUMMY_BLOCK_ADDR 0x1000
+#define BUFFER_START_ADDR 0x1010
+
+typedef enum {
+ STATE_IDLE,
+ STATE_DATA_NEEDED,
+ STATE_READING,
+ STATE_BUFFERING
+} StreamState;
+
+typedef struct {
+ uint32_t *read_buffer;
+ int lba, chunk_secs;
+ int buffer_size, num_chunks, sample_rate;
+
+ volatile int next_chunk, spu_addr;
+ volatile int8_t db_active, state;
+} StreamContext;
+
+static StreamContext str_ctx;
+
+void spu_irq_handler(void) {
+ // Acknowledge the interrupt to ensure it can be triggered again. The only
+ // way to do this is actually to disable the interrupt entirely; we'll
+ // enable it again once the chunk is ready.
+ SPU_CTRL &= 0xffbf;
+
+ int chunk_size = str_ctx.buffer_size * NUM_CHANNELS;
+ int chunk = (str_ctx.next_chunk + 1) % (uint32_t) str_ctx.num_chunks;
+
+ str_ctx.db_active ^= 1;
+ str_ctx.state = STATE_DATA_NEEDED;
+ str_ctx.next_chunk = chunk;
+
+ // Configure to SPU to trigger an IRQ once the chunk that is going to be
+ // filled now starts playing (so the next buffer can be loaded) and
+ // override both channels' loop addresses to make them "jump" to the new
+ // buffers, rather than actually looping when they encounter the loop flag
+ // at the end of the currently playing buffers.
+ int addr = BUFFER_START_ADDR + (str_ctx.db_active ? chunk_size : 0);
+ str_ctx.spu_addr = addr;
+
+ SPU_IRQ_ADDR = getSPUAddr(addr);
+ for (int i = 0; i < NUM_CHANNELS; i++)
+ SPU_CH_LOOP_ADDR(i) = getSPUAddr(addr + str_ctx.buffer_size * i);
+
+ // Note that we can't call CdRead() here as it requires interrupts to be
+ // enabled. Instead, feed_stream() (called from the main loop) will check
+ // if str_ctx.state is set to STATE_DATA_NEEDED and fetch the next chunk.
+}
+
+void cd_read_handler(int event, uint8_t *payload) {
+ // Attempt to read the chunk again if an error has occurred, otherwise
+ // start uploading it to SPU RAM.
+ if (event == CdlDiskError) {
+ str_ctx.state = STATE_DATA_NEEDED;
+ return;
+ }
+
+ SpuSetTransferStartAddr(str_ctx.spu_addr);
+ SpuWrite(str_ctx.read_buffer, str_ctx.buffer_size * NUM_CHANNELS);
+
+ str_ctx.state = STATE_BUFFERING;
+}
+
+void spu_dma_handler(void) {
+ // Re-enable the SPU IRQ once the new chunk has been fully uploaded.
+ SPU_CTRL |= 0x0040;
+
+ str_ctx.state = STATE_IDLE;
+}
+
+/* Helper functions */
+
+// This isn't actually required for this example, however it is necessary if
+// you want to allocate the stream buffers into a region of SPU RAM that was
+// previously used (to make sure the IRQ isn't going to be triggered by any
+// inactive channels).
+void reset_spu_channels(void) {
+ SpuSetKey(0, 0x00ffffff);
+
+ for (int i = 0; i < 24; i++) {
+ SPU_CH_ADDR(i) = getSPUAddr(DUMMY_BLOCK_ADDR);
+ SPU_CH_FREQ(i) = 0x1000;
+ }
+
+ SpuSetKey(1, 0x00ffffff);
+}
+
+void feed_stream(void) {
+ if (str_ctx.state != STATE_DATA_NEEDED)
+ return;
+
+ // Start reading the next chunk from the CD.
+ int lba = str_ctx.lba + str_ctx.next_chunk * str_ctx.chunk_secs;
+
+ CdlLOC pos;
+ CdIntToPos(lba, &pos);
+ CdControl(CdlSetloc, &pos, 0);
+
+ CdReadCallback(&cd_read_handler);
+ CdRead(str_ctx.chunk_secs, str_ctx.read_buffer, CdlModeSpeed);
+
+ str_ctx.state = STATE_READING;
+}
+
+void init_stream(const CdlLOC *pos) {
+ EnterCriticalSection();
+ InterruptCallback(IRQ_SPU, &spu_irq_handler);
+ DMACallback(DMA_SPU, &spu_dma_handler);
+ ExitCriticalSection();
+
+ // Read the header. Note that in interleaved .VAG files the first sector.
+ uint32_t header[512];
+ CdControl(CdlSetloc, pos, 0);
+
+ CdReadCallback(0);
+ CdRead(1, header, CdlModeSpeed);
+ CdReadSync(0, 0);
+
+ VAG_Header *vag = (VAG_Header *) header;
+ int buf_size = vag->interleave;
+ int chunk_secs = ((buf_size * NUM_CHANNELS) + 2047) / 2048;
+
+ str_ctx.read_buffer = malloc(chunk_secs * 2048);
+ str_ctx.lba = CdPosToInt(pos) + 1;
+ str_ctx.chunk_secs = chunk_secs;
+ str_ctx.buffer_size = buf_size;
+ str_ctx.num_chunks = (SWAP_ENDIAN(vag->size) + buf_size - 1) / buf_size;
+ str_ctx.sample_rate = SWAP_ENDIAN(vag->sample_rate);
+
+ str_ctx.db_active = 1;
+ str_ctx.next_chunk = -1;
+
+ // Ensure at least one chunk is in SPU RAM by invoking the IRQ handler
+ // manually and blocking until the chunk has loaded.
+ spu_irq_handler();
+ while (str_ctx.state != STATE_IDLE)
+ feed_stream();
+}
+
+void start_stream(void) {
+ int bits = 0x00ffffff >> (24 - NUM_CHANNELS);
+
+ for (int i = 0; i < NUM_CHANNELS; i++) {
+ SPU_CH_ADDR(i) = getSPUAddr(str_ctx.spu_addr + str_ctx.buffer_size * i);
+ SPU_CH_FREQ(i) = getSPUSampleRate(str_ctx.sample_rate);
+ SPU_CH_ADSR1(i) = 0x80ff;
+ SPU_CH_ADSR2(i) = 0x1fee;
+ }
+
+ // Unmute the channels and route them for stereo output. You'll want to
+ // edit this if you are using more than 2 channels, and/or if you want to
+ // provide an option to output mono audio instead of stereo.
+ SPU_CH_VOL_L(0) = 0x3fff;
+ SPU_CH_VOL_R(0) = 0x0000;
+ SPU_CH_VOL_L(1) = 0x0000;
+ SPU_CH_VOL_R(1) = 0x3fff;
+
+ spu_irq_handler();
+ SpuSetKey(1, bits);
+}
+
+// This is basically a variant of reset_spu_channels() that only resets the
+// channels used to play the stream, to (again) prevent them from triggering
+// the SPU IRQ while the stream is paused.
+void stop_stream(void) {
+ int bits = 0x00ffffff >> (24 - NUM_CHANNELS);
+
+ SpuSetKey(0, bits);
+
+ for (int i = 0; i < NUM_CHANNELS; i++)
+ SPU_CH_ADDR(i) = getSPUAddr(DUMMY_BLOCK_ADDR);
+
+ SpuSetKey(1, bits);
+}
+
+/* Main */
+
+static RenderContext ctx;
+
+#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx); }
+#define SHOW_ERROR(...) { SHOW_STATUS(__VA_ARGS__); while (1) __asm__("nop"); }
+
+static const char *state_strings[] = { "IDLE", "DATA NEEDED", "READING", "BUFFERING" };
+
+int main(int argc, const char* argv[]) {
+ init_context(&ctx);
+ SpuInit();
+ CdInit();
+ reset_spu_channels();
+ SHOW_STATUS("");
+
+ // Set up controller polling.
+ uint8_t pad_buff[2][34];
+ InitPAD(pad_buff[0], 34, pad_buff[1], 34);
+ StartPAD();
+ ChangeClearPAD(0);
+
+ CdlFILE file;
+ SHOW_STATUS("OPENING STREAM FILE\n");
+ if (!CdSearchFile(&file, "\\STREAM.VAG"))
+ SHOW_ERROR("FAILED TO FIND STREAM.VAG\n");
+
+ SHOW_STATUS("BUFFERING STREAM\n");
+ init_stream(&file.pos);
+ start_stream();
+
+ int paused = 0, sample_rate = getSPUSampleRate(str_ctx.sample_rate);
+
+ uint16_t last_buttons = 0xffff;
+
+ while (1) {
+ feed_stream();
+
+ FntPrint(-1, "PLAYING SPU STREAM\n\n");
+ FntPrint(-1, "BUFFER: %d\n", str_ctx.db_active);
+ FntPrint(-1, "STATUS: %s\n\n", state_strings[str_ctx.state]);
+
+ FntPrint(-1, "POSITION: %d/%d\n", str_ctx.next_chunk, str_ctx.num_chunks);
+ FntPrint(-1, "SMP RATE: %5d HZ\n\n", (sample_rate * 44100) >> 12);
+
+ FntPrint(-1, "[START] %s\n", paused ? "RESUME" : "PAUSE");
+ FntPrint(-1, "[LEFT/RIGHT] SEEK\n");
+ FntPrint(-1, "[O] RESET POSITION\n");
+ FntPrint(-1, "[UP/DOWN] CHANGE SAMPLE RATE\n");
+ FntPrint(-1, "[X] RESET SAMPLE RATE\n");
+
+ FntFlush(-1);
+ display(&ctx);
+
+ // Check if a compatible controller is connected and handle button
+ // presses.
+ PADTYPE *pad = (PADTYPE *) pad_buff[0];
+ if (pad->stat)
+ continue;
+ if (
+ (pad->type != PAD_ID_DIGITAL) &&
+ (pad->type != PAD_ID_ANALOG_STICK) &&
+ (pad->type != PAD_ID_ANALOG)
+ )
+ continue;
+
+ if ((last_buttons & PAD_START) && !(pad->btn & PAD_START)) {
+ paused ^= 1;
+ if (paused)
+ stop_stream();
+ else
+ start_stream();
+ }
+
+ if (!(pad->btn & PAD_LEFT))
+ str_ctx.next_chunk--;
+ if (!(pad->btn & PAD_RIGHT))
+ str_ctx.next_chunk++;
+ if ((last_buttons & PAD_CIRCLE) && !(pad->btn & PAD_CIRCLE))
+ str_ctx.next_chunk = -1;
+
+ if (!(pad->btn & PAD_DOWN) && (sample_rate > 0x400))
+ sample_rate -= 0x40;
+ if (!(pad->btn & PAD_UP) && (sample_rate < 0x2000))
+ sample_rate += 0x40;
+ if ((last_buttons & PAD_CROSS) && !(pad->btn & PAD_CROSS))
+ sample_rate = getSPUSampleRate(str_ctx.sample_rate);
+
+ // Only set the sample rate registers if necessary.
+ if (pad->btn != 0xffff) {
+ for (int i = 0; i < NUM_CHANNELS; i++)
+ SPU_CH_FREQ(i) = sample_rate;
+ }
+
+ last_buttons = pad->btn;
+ }
+
+ return 0;
+}
diff --git a/examples/sound/cdstream/stream.vag b/examples/sound/cdstream/stream.vag
new file mode 100644
index 0000000..a6faf74
--- /dev/null
+++ b/examples/sound/cdstream/stream.vag
Binary files differ
diff --git a/examples/sound/spustream/system.cnf b/examples/sound/cdstream/system.cnf
index 0c4561a..11ca055 100644
--- a/examples/sound/spustream/system.cnf
+++ b/examples/sound/cdstream/system.cnf
@@ -1,4 +1,4 @@
-BOOT=cdrom:\spustrm.exe;1
+BOOT=cdrom:\cdstream.exe;1
TCB=4
EVENT=10
STACK=801FFFF0
diff --git a/examples/sound/spustream/CMakeLists.txt b/examples/sound/spustream/CMakeLists.txt
index 63d113b..465e291 100644
--- a/examples/sound/spustream/CMakeLists.txt
+++ b/examples/sound/spustream/CMakeLists.txt
@@ -5,20 +5,16 @@ cmake_minimum_required(VERSION 3.21)
project(
spustream
- LANGUAGES C
+ LANGUAGES C ASM
VERSION 1.0.0
- DESCRIPTION "PSn00bSDK SPU custom streaming example"
+ DESCRIPTION "PSn00bSDK SPU audio streaming example"
HOMEPAGE_URL "http://lameguy64.net/?page=psn00bsdk"
)
-# TODO: add rules to actually generate a valid STREAM.BIN file
file(GLOB _sources *.c)
psn00bsdk_add_executable(spustream GPREL ${_sources})
-psn00bsdk_add_cd_image(spustream_iso spustream iso.xml DEPENDS spustream)
+#psn00bsdk_add_cd_image(spustream_iso spustream iso.xml DEPENDS spustream)
-install(
- FILES
- ${PROJECT_BINARY_DIR}/spustream.bin
- ${PROJECT_BINARY_DIR}/spustream.cue
- TYPE BIN
-)
+psn00bsdk_target_incbin(spustream PRIVATE stream_data stream.vag)
+
+install(FILES ${PROJECT_BINARY_DIR}/spustream.exe TYPE BIN)
diff --git a/examples/sound/spustream/convert_stream.py b/examples/sound/spustream/convert_stream.py
deleted file mode 100644
index 1b1696f..0000000
--- a/examples/sound/spustream/convert_stream.py
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python3
-# Simple .VAG to STREAM.BIN interleaving tool
-# (C) 2021 spicyjpeg - MPL licensed
-
-import sys
-from warnings import warn
-from struct import Struct
-from itertools import zip_longest
-from argparse import ArgumentParser, FileType
-
-VAG_HEADER = Struct("> 4s I 4x 2I 12x 16s")
-VAG_MAGIC = b"VAGp"
-SAMPLE_RATE = 44100
-BUFFER_SIZE = 26624 # (26624 / 16 * 28) / 44100 = 1.05 seconds
-ALIGN_SIZE = 2048
-
-## Helpers
-
-def align(data, size):
- chunks = (len(data) + size - 1) // size
-
- return data.ljust(chunks * size, b"\x00")
-
-def set_loop_flag(data):
- last_block = bytearray(data[-16:])
- last_block[1] = 0x03 # Jump to loop point + sustain
-
- return data[:-16] + last_block
-
-## .VAG file reader
-
-def read_vag(_file, chunk_size):
- with _file:
- header = _file.read(VAG_HEADER.size)
- (
- magic,
- version,
- size,
- sample_rate,
- name
- ) = VAG_HEADER.unpack(header)
-
- #if magic != VAG_MAGIC:
- #raise RuntimeError(f"{_file.name} is not a valid .VAG file")
- if sample_rate != SAMPLE_RATE:
- warn(RuntimeWarning(f"{_file.name} sample rate is not {SAMPLE_RATE} Hz"))
-
- for i in range(0, size, chunk_size):
- chunk = _file.read(chunk_size)
-
- if len(chunk) % 16:
- warn(RuntimeWarning(f"{_file.name} is not 16-byte aligned, trimming"))
- chunk = chunk[0:len(chunk) // 16 * 16]
-
- chunk = set_loop_flag(chunk)
-
- yield chunk.ljust(chunk_size, b"\x00")
-
-## Main
-
-def get_args():
- parser = ArgumentParser(
- description = "Generates interleaved stream data from one or more .VAG files."
- )
- parser.add_argument(
- "input_file",
- nargs = "+",
- type = FileType("rb"),
- help = f"mono input files for each channel (must be {SAMPLE_RATE} Hz .VAG)"
- )
- parser.add_argument(
- "-o", "--output",
- type = FileType("wb"),
- default = "stream.bin",
- help = "where to output converted stream data (stream.bin by default)",
- metavar = "file"
- )
- parser.add_argument(
- "-b", "--buffer-size",
- type = int,
- default = BUFFER_SIZE,
- help = f"size of each interleaved chunk (one per channel, default {BUFFER_SIZE})",
- metavar = "bytes"
- )
- parser.add_argument(
- "-a", "--align",
- type = int,
- default = ALIGN_SIZE,
- help = f"align each group of chunks to N bytes (default {ALIGN_SIZE})",
- metavar = "bytes"
- )
-
- return parser.parse_args()
-
-def main():
- args = get_args()
- if args.buffer_size % 16:
- raise ValueError("buffer size must be a multiple of 16 bytes")
-
- interleave = zip_longest(
- *( read_vag(_file, args.buffer_size) for _file in args.input_file ),
- fillvalue = b"\x00" * args.buffer_size
- )
-
- with args.output as _file:
- for chunks in interleave:
- data = b"".join(chunks)
-
- _file.write(align(data, args.align))
-
-if __name__ == "__main__":
- main()
diff --git a/examples/sound/spustream/interleave.py b/examples/sound/spustream/interleave.py
new file mode 100644
index 0000000..4e68974
--- /dev/null
+++ b/examples/sound/spustream/interleave.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# Simple .VAG interleaving tool
+# (C) 2021-2022 spicyjpeg - MPL licensed
+
+import os, sys
+from warnings import warn
+from struct import Struct
+from itertools import zip_longest
+from argparse import ArgumentParser, FileType
+
+VAG_HEADER = Struct("> 4s I 4s 2I 12x 16s")
+VAG_MAGIC = b"VAGp"
+VAGI_MAGIC = b"VAGi"
+VAG_VERSION = 0x20
+BUFFER_SIZE = 0x1000
+CHUNK_ALIGN = 0x800
+
+## Helpers
+
+def align(data, size):
+ chunks = (len(data) + size - 1) // size
+
+ return data.ljust(chunks * size, b"\x00")
+
+def get_loop_offset(data):
+ for index, flag in enumerate(data[1::16]):
+ if flag & 0x01:
+ return index * 16
+
+ return len(data) - 16
+
+## .VAG file reader
+
+class VAGReader:
+ def __init__(self, _file):
+ self.file = _file
+ header = _file.read(VAG_HEADER.size)
+
+ (
+ magic, _, _,
+ self.size,
+ self.sample_rate,
+ self.name
+ ) = VAG_HEADER.unpack(header)
+
+ if magic == VAGI_MAGIC:
+ raise RuntimeError(f"{_file.name} is an interleaved .VAG file (must be mono)")
+ if magic != VAG_MAGIC:
+ raise RuntimeError(f"{_file.name} is not a valid .VAG file")
+
+ def read(self, chunk_size):
+ for _ in range(0, self.size, chunk_size):
+ chunk = self.file.read(chunk_size)
+
+ if len(chunk) < 16:
+ break
+ if len(chunk) % 16:
+ warn(RuntimeWarning(f"{self.file.name} is not 16-byte aligned, trimming"))
+ chunk = chunk[0:(len(chunk) // 16) * 16]
+
+ # If there already is an end flag in the chunk replace it with a
+ # loop flag, otherwise add a new loop flag at the end.
+ end = get_loop_offset(chunk)
+ chunk = bytearray(chunk)
+
+ chunk[end + 1] = 0x03 # Jump to loop point + sustain
+ yield chunk.ljust(chunk_size, b"\x00")
+
+## Main
+
+def get_args():
+ parser = ArgumentParser(
+ description = "Generates interleaved audio stream data from one or more .VAG files."
+ )
+ parser.add_argument(
+ "input_file",
+ nargs = "+",
+ type = FileType("rb"),
+ help = "mono input files for each channel in .VAG format"
+ )
+ parser.add_argument(
+ "output_file",
+ type = FileType("wb"),
+ help = "where to output converted stream data"
+ )
+ parser.add_argument(
+ "-b", "--buffer-size",
+ type = int,
+ default = BUFFER_SIZE,
+ help = f"size of each channel buffer in each chunk (default {BUFFER_SIZE})",
+ metavar = "bytes"
+ )
+ parser.add_argument(
+ "-a", "--align",
+ type = int,
+ default = CHUNK_ALIGN,
+ help = f"pad each chunk to a multiple of the given size (default {CHUNK_ALIGN})",
+ metavar = "bytes"
+ )
+ parser.add_argument(
+ "-r", "--raw",
+ action = "store_true",
+ help = "do not add an interleaved .VAG header to the output file"
+ )
+
+ return parser.parse_args()
+
+def main():
+ args = get_args()
+ if args.buffer_size % 16:
+ raise ValueError("buffer size must be a multiple of 16 bytes")
+ if args.buffer_size % args.align:
+ warn(RuntimeWarning(f"buffer size should be a multiple of {args.align}"))
+
+ input_files = tuple(map(VAGReader, args.input_file))
+ size = input_files[0].size
+ sample_rate = input_files[0].sample_rate
+
+ if (not args.raw) and (len(input_files) != 2):
+ warn(RuntimeWarning("interleaved .VAG only supports stereo (2 input files)"))
+
+ for vag in input_files[1:]:
+ if vag.size != size:
+ warn(RuntimeWarning(f"{vag.file.name} has a different file size"))
+ if vag.sample_rate != sample_rate:
+ warn(RuntimeWarning(f"{vag.file.name} has a different sample rate"))
+
+ interleave = zip_longest(
+ *( vag.read(args.buffer_size) for vag in input_files ),
+ fillvalue = b"\x00" * args.buffer_size
+ )
+
+ with args.output_file as _file:
+ if not args.raw:
+ header = VAG_HEADER.pack(
+ VAGI_MAGIC,
+ VAG_VERSION,
+ args.buffer_size.to_bytes(4, "little"),
+ size,
+ sample_rate,
+ os.path.basename(_file.name).encode()[0:16]
+ )
+
+ _file.write(align(header, args.align))
+
+ for chunks in interleave:
+ data = b"".join(chunks)
+
+ _file.write(align(data, args.align))
+
+if __name__ == "__main__":
+ main()
diff --git a/examples/sound/spustream/main.c b/examples/sound/spustream/main.c
index 1fee883..d240433 100644
--- a/examples/sound/spustream/main.c
+++ b/examples/sound/spustream/main.c
@@ -1,111 +1,60 @@
/*
- * PSn00bSDK SPU audio streaming example
- * (C) 2021 spicyjpeg - MPL licensed
+ * PSn00bSDK SPU .VAG streaming example
+ * (C) 2022 spicyjpeg - MPL licensed
*
- * This example demonstrates how to play a large multi-channel audio file
- * "manually" by streaming it through the SPU, without having to rely on the CD
- * drive's ability to play audio tracks or XA files.
+ * This example shows how to play arbitrarily long sounds, which normally would
+ * not fit into SPU RAM in their entirety, by streaming them to the SPU from
+ * main RAM. In this example audio data is streamed from an in-memory file,
+ * however the code can easily be modified to stream from the CD instead (see
+ * the cdstream example).
*
- * The way this works is by splitting the audio file into a series of ~1 second
- * "chunks", each of which in turn is an array of concatenated buffers holding
- * SPU ADPCM data (one for each channel, so a stereo stream would have 2
- * buffers per chunk). All buffers in a chunk are played simultaneously using
- * multiple SPU channels; each buffer has the loop flag set at the end, so each
- * channel will jump to its loop address (SPU_CH_LOOP_ADDR(n)) once the chunk
- * is played.
+ * The way SPU streaming works is by splitting the audio data into a series of
+ * small "chunks", each of which in turn is an array of concatenated buffers
+ * holding SPU ADPCM data (one for each channel, so a stereo stream would have
+ * 2 buffers per chunk). All buffers in a chunk are played simultaneously using
+ * multiple SPU channels; each buffer has the loop flag set at the end, so the
+ * SPU will jump to the loop point set in the SPU_CH_LOOP_ADDR registers after
+ * the chunk is played.
*
- * Since the loop point doesn't necessarily have to be within the chunk itself,
- * we can abuse it to "queue" another set of buffers to be played immediately
- * after the currently playing chunk. This allows us to fetch a chunk from the
- * CD, upload it to SPU RAM (2048 bytes at a time to avoid having to keep
- * another large buffer in main RAM) and queue it for playback while a
- * previously buffered chunk is playing in the background. SPU RAM always holds
- * two chunks, one of which is played while the other one is buffered. This is
- * the layout used in this example:
+ * As the loop point doesn't necessarily have to be within the chunk itself, it
+ * can be used to "queue" another chunk to be played immediately after the
+ * current one. This allows for double buffering: two chunks are always kept in
+ * SPU RAM and one is overwritten with a new chunk while the other is playing.
+ * Chunks are laid out in SPU RAM as follows:
*
- * /================================================\
- * | /==================\ |
- * v Loop point | v Loop point |
+ * ________________________________________________
+ * / __________________ \
+ * | / \ |
+ * v Loop point | Loop flag v Loop point | Loop flag
* +-------+----------------+----------------+----------------+----------------+
* | Dummy | Left buffer 0 | Right buffer 0 | Left buffer 1 | Right buffer 1 |
* +-------+----------------+----------------+----------------+----------------+
* \____________Chunk 0____________/ \____________Chunk 1____________/
*
- * It's pretty much the same thing as GPU double buffering (aka page flipping),
- * just with chunks instead of framebuffers.
+ * In order to keep streaming continuously we need to know when each chunk
+ * actually starts playing. The SPU can be configured to trigger an interrupt
+ * whenever a specific address in SPU RAM is read by a channel, so we can just
+ * point it to the beginning of the buffered chunk's first buffer and wait
+ * until the IRQ is fired before loading the next chunk.
*
- * We need to know when the chunk we've buffered actually starts playing in
- * order to start buffering the next one. The SPU can be configured to trigger
- * an interrupt whenever a specific address in SPU RAM is read by a channel, so
- * we can just point it to the beginning of the buffered chunk's first buffer.
- * The interrupt callback will then kick off CD reading and adjust the loop/IRQ
- * addresses to the ones of the chunk that is going to be buffered next.
- *
- * Chunks are read from a STREAM.BIN file which is just a series of sector
- * aligned chunks, arranged as follows:
- *
- * +--Sector--+--Sector--+--Sector--+--Sector--+--Sector--+--Sector--+----
- * | +--------------------------+--------------------------+ |
- * | | Left channel data | Right channel data | Padding | ...
- * | +--------------------------+--------------------------+ |
- * +----------+----------+----------+----------+----------+----------+----
- * \________________________Chunk________________________/
- *
- * A Python script is included to generate STREAM.BIN from one or more SPU
- * ADPCM (.VAG) files, one for each channel (the .VAG format only supports
- * mono).
- *
- * Of course SPU streaming isn't the only way to play music, as the CD drive
- * can play CD-DA tracks and XA files natively with zero CPU overhead. However
- * streaming has a number of advantages over CD audio or XA:
- *
- * - Any sample rate up to 44.1 kHz can be used. The sample rate can also be
- * changed on-the-fly to play the stream at different speeds and pitches (as
- * long as the CD drive can keep up of course), or even interpolated for
- * effects like tape stops or DJ scratches.
- * - Manual streaming is not limited to mono or stereo but can be expanded to
- * as many channels as needed, only limited by the amount of SPU RAM required
- * for chunks and CD bandwidth. Having more than 2 channels can be useful for
- * e.g. crossfading between tracks (not possible with XA) or controlling
- * volume and panning of each individual instrument.
- * - Depending on how streaming/interleaving is implemented it is possible to
- * have 500-1000ms idle periods during which the CD drive isn't buffering the
- * stream, that can be used to read small amounts of other data without ever
- * interrupting playback. This is different from XA-style interleaving as the
- * drive is free to seek to *any* region of the disc during these periods
- * (it must seek back to the stream's next chunk afterwards though).
- * - Thanks to the idle periods it is possible to seek back to the beginning of
- * the stream and preload the first chunk before the end is reached, allowing
- * the track to be looped seamlessly without having to resort to tricks like
- * filler samples.
- * - Unlike XA, SPU streaming can be used on some PS1-based arcade boards such
- * as the Konami System 573. These systems usually use IDE/SCSI CD drives or
- * flash memory, neither of which supports XA playback.
+ * Chunks are read from a special type of .VAG file which has been interleaved
+ * ahead-of-time and already contains the loop flags required to make streaming
+ * work. A Python script is provided to generate such file from one or more
+ * mono .VAG files.
*/
#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+#include <stddef.h>
#include <psxetc.h>
#include <psxapi.h>
#include <psxgpu.h>
#include <psxpad.h>
#include <psxspu.h>
-#include <psxcd.h>
#include <hwregs_c.h>
-// To maximize STREAM.BIN packing efficiency and get rid of padding between
-// chunks, buffer size should be a multiple of sector size (2048 bytes). Buffer
-// size can be increased to get more idle time between CD reads, however it is
-// usually best to keep it to 1-2 seconds as SPU RAM is only 512 KB.
-#define SAMPLE_RATE 0x1000 // 44100 Hz
-#define BUFFER_SIZE 0x6800 // (0x6800 / 16 * 28) / 44100 = 1.05 seconds
-
-#define NUM_CHANNELS 2
-#define CHANNEL_MASK 0x03
+extern const uint8_t stream_data[];
-#define SPU_RAM_ADDR(x) ((uint16_t) (((uint32_t) (x)) >> 3))
+#define NUM_CHANNELS 2
/* Display/GPU context utilities */
@@ -167,159 +116,137 @@ void display(RenderContext *ctx) {
SetDispMask(1);
}
-/* Stream interrupt handlers */
+/* .VAG header structure */
-// The first 4 KB of SPU RAM are reserved for capture buffers, so we have to
-// place stream buffers after those. A dummy sample is additionally placed by
-// default by the SPU library at 0x1000; it is going to be used here to keep
-// unused SPU channels busy, preventing them from accidentally triggering the
-// SPU RAM interrupt and throwing off the timing (all channels are always
-// reading sample data, even when "stopped").
+typedef struct {
+ uint32_t magic; // 0x69474156 ("VAGi") for interleaved files
+ uint32_t version;
+ uint32_t interleave; // Little-endian, size of each channel buffer
+ uint32_t size; // Big-endian, in bytes
+ uint32_t sample_rate; // Big-endian, in Hertz
+ uint32_t _reserved[3];
+ char name[16];
+} VAG_Header;
+
+#define SWAP_ENDIAN(x) ( \
+ (((uint32_t) (x) & 0x000000ff) << 24) | \
+ (((uint32_t) (x) & 0x0000ff00) << 8) | \
+ (((uint32_t) (x) & 0x00ff0000) >> 8) | \
+ (((uint32_t) (x) & 0xff000000) >> 24) \
+)
+
+/* Interrupt callbacks */
+
+// The first 4 KB of SPU RAM are reserved for capture buffers and psxspu
+// additionally uploads a dummy sample (16 bytes) at 0x1000 by default, so the
+// chunks must be placed after those. The dummy sample is going to be used to
+// keep unused SPU channels busy, preventing them from accidentally triggering
+// the SPU IRQ and throwing off the timing (all channels are always reading
+// from SPU RAM, even when "stopped").
// https://problemkaputt.de/psx-spx.htm#spuinterrupt
-#define DUMMY_BLOCK_ADDR 0x1000
-#define BUFFER_START_ADDR 0x1010
-#define CHUNK_SIZE (BUFFER_SIZE * NUM_CHANNELS)
+#define DUMMY_BLOCK_ADDR 0x1000
+#define BUFFER_START_ADDR 0x1010
+
+typedef enum {
+ STATE_IDLE,
+ STATE_BUFFERING
+} StreamState;
typedef struct {
- int lba, length;
+ const uint8_t *data;
+ int buffer_size, num_chunks, sample_rate;
- volatile int pos;
- volatile int spu_addr, spu_pos;
- volatile int db_active;
+ volatile int next_chunk, spu_addr;
+ volatile int8_t db_active, state;
} StreamContext;
static StreamContext str_ctx;
-// This buffer is used by cd_event_handler() as a temporary area for sectors
-// read from the CD and uploaded to SPU RAM. Due to DMA limitations it can't be
-// allocated on the stack (especially not in the interrupt callbacks' stack,
-// whose size is very limited).
-static uint32_t sector_buffer[512];
-
void spu_irq_handler(void) {
// Acknowledge the interrupt to ensure it can be triggered again. The only
// way to do this is actually to disable the interrupt entirely; we'll
- // enable it again once the buffer is ready.
+ // enable it again once the chunk is ready.
SPU_CTRL &= 0xffbf;
- str_ctx.db_active ^= 1;
- str_ctx.spu_pos = 0;
+ int chunk_size = str_ctx.buffer_size * NUM_CHANNELS;
+ int chunk = (str_ctx.next_chunk + 1) % (uint32_t) str_ctx.num_chunks;
- // Align the sector counter to the size of a chunk (to prevent glitches
- // after seeking) and reset it if it exceeds the stream's length.
- str_ctx.pos %= str_ctx.length;
- str_ctx.pos -= str_ctx.pos % ((CHUNK_SIZE + 2047) / 2048);
+ str_ctx.db_active ^= 1;
+ str_ctx.state = STATE_BUFFERING;
+ str_ctx.next_chunk = chunk;
- // Configure to SPU to trigger an IRQ once the buffer that is going to be
+ // Configure to SPU to trigger an IRQ once the chunk that is going to be
// filled now starts playing (so the next buffer can be loaded) and
// override both channels' loop addresses to make them "jump" to the new
- // buffer rather than actually looping when they encounter the loop flag at
- // the end of the currently playing buffer.
- str_ctx.spu_addr = BUFFER_START_ADDR + CHUNK_SIZE * str_ctx.db_active;
- SPU_IRQ_ADDR = SPU_RAM_ADDR(str_ctx.spu_addr);
+ // buffers, rather than actually looping when they encounter the loop flag
+ // at the end of the currently playing buffers.
+ int addr = BUFFER_START_ADDR + (str_ctx.db_active ? chunk_size : 0);
+ str_ctx.spu_addr = addr;
+ SPU_IRQ_ADDR = getSPUAddr(addr);
for (int i = 0; i < NUM_CHANNELS; i++)
- SPU_CH_LOOP_ADDR(i) = SPU_RAM_ADDR(str_ctx.spu_addr + BUFFER_SIZE * i);
+ SPU_CH_LOOP_ADDR(i) = getSPUAddr(addr + str_ctx.buffer_size * i);
- // Start loading the next chunk. cd_event_handler() will be called
- // repeatedly for each sector until the entire chunk is read.
- CdlLOC pos;
- CdIntToPos(str_ctx.lba + str_ctx.pos, &pos);
- CdControlF(CdlReadN, &pos);
+ // Start uploading the next chunk to the SPU.
+ SpuSetTransferStartAddr(addr);
+ SpuWrite((const uint32_t *) &str_ctx.data[chunk * chunk_size], chunk_size);
}
-void cd_event_handler(int event, uint8_t *payload) {
- // Ignore all events other than a sector being ready.
- // TODO: read errors should be handled properly
- if (event != CdlDataReady)
- return;
-
- // Fetch the sector that has been read from the drive.
- CdGetSector(sector_buffer, 512);
- str_ctx.pos++;
-
- // Set loop flags to make sure the buffer will loop (actually jump to the
- // other buffer, as we're overriding loop addresses) at the end.
- // NOTE: this isn't actually necessary here as the stream converter script
- // already sets these flags in the file.
- /*for (int i = 0; i < NUM_CHANNELS; i++) {
- if (
- str_ctx.spu_pos >= (BUFFER_SIZE * i - 2048) &&
- str_ctx.spu_pos < (BUFFER_SIZE * i)
- )
- sector_buffer[(BUFFER_SIZE * i - str_ctx.spu_pos) - 15] = 0x03;
- }*/
-
- // Copy the sector to SPU RAM, appending it to the buffer that is not
- // playing currently. As the left and right buffers are adjacent, we can
- // just treat the chunk as a single blob of data and copy it as-is; we only
- // have to trim the padding at the end (if any) to avoid overwriting other
- // data in SPU RAM.
- size_t length = CHUNK_SIZE - str_ctx.spu_pos;
- if (length > 2048)
- length = 2048;
-
- SpuSetTransferStartAddr(str_ctx.spu_addr + str_ctx.spu_pos);
- SpuWrite(sector_buffer, length);
- str_ctx.spu_pos += length;
-
- // If the buffer has been filled completely, stop reading and re-enable the
- // SPU IRQ.
- if (str_ctx.spu_pos >= CHUNK_SIZE) {
- CdControlF(CdlPause, 0);
- SPU_CTRL |= 0x0040;
- }
+void spu_dma_handler(void) {
+ // Re-enable the SPU IRQ once the new chunk has been fully uploaded.
+ SPU_CTRL |= 0x0040;
+
+ str_ctx.state = STATE_IDLE;
}
-/* Stream helpers */
+/* Helper functions */
// This isn't actually required for this example, however it is necessary if
// you want to allocate the stream buffers into a region of SPU RAM that was
// previously used (to make sure the IRQ isn't going to be triggered by any
// inactive channels).
void reset_spu_channels(void) {
- SPU_KEY_OFF = 0x00ffffff;
+ SpuSetKey(0, 0x00ffffff);
for (int i = 0; i < 24; i++) {
- SPU_CH_ADDR(i) = SPU_RAM_ADDR(DUMMY_BLOCK_ADDR);
+ SPU_CH_ADDR(i) = getSPUAddr(DUMMY_BLOCK_ADDR);
SPU_CH_FREQ(i) = 0x1000;
}
- SPU_KEY_ON = 0x00ffffff;
+ SpuSetKey(1, 0x00ffffff);
}
-void init_stream(CdlFILE *file) {
+void init_stream(const VAG_Header *vag) {
EnterCriticalSection();
- InterruptCallback(9, &spu_irq_handler);
- CdReadyCallback(&cd_event_handler);
+ InterruptCallback(IRQ_SPU, &spu_irq_handler);
+ DMACallback(DMA_SPU, &spu_dma_handler);
ExitCriticalSection();
- // Configure the CD drive to read 2048-byte sectors at 2x speed.
- uint8_t mode = CdlModeSpeed;
- CdControl(CdlSetmode, (const uint8_t *) &mode, 0);
+ int buf_size = vag->interleave;
+
+ str_ctx.data = &((const uint8_t *) vag)[2048];
+ str_ctx.buffer_size = buf_size;
+ str_ctx.num_chunks = (SWAP_ENDIAN(vag->size) + buf_size - 1) / buf_size;
+ str_ctx.sample_rate = SWAP_ENDIAN(vag->sample_rate);
- // Set the initial LBA of the stream file, which is going to be incremented
- // as the stream is played.
- str_ctx.lba = CdPosToInt(&(file->pos));
- str_ctx.length = file->size / 2048;
- str_ctx.pos = 0;
+ str_ctx.db_active = 1;
+ str_ctx.next_chunk = -1;
- // Ensure at least one chunk is in SPU RAM by invoking the SPU IRQ handler
+ // Ensure at least one chunk is in SPU RAM by invoking the IRQ handler
// manually and blocking until the chunk has loaded.
- str_ctx.db_active = 1;
spu_irq_handler();
-
- while (str_ctx.spu_pos < CHUNK_SIZE)
+ while (str_ctx.state != STATE_IDLE)
__asm__ volatile("");
}
void start_stream(void) {
- uint32_t addr = BUFFER_START_ADDR + CHUNK_SIZE * str_ctx.db_active;
+ int bits = 0x00ffffff >> (24 - NUM_CHANNELS);
for (int i = 0; i < NUM_CHANNELS; i++) {
- SPU_CH_ADDR(i) = SPU_RAM_ADDR(addr + BUFFER_SIZE * i);
- SPU_CH_FREQ(i) = SAMPLE_RATE;
- SPU_CH_ADSR(i) = 0x1fee80ff;
+ SPU_CH_ADDR(i) = getSPUAddr(str_ctx.spu_addr + str_ctx.buffer_size * i);
+ SPU_CH_FREQ(i) = getSPUSampleRate(str_ctx.sample_rate);
+ SPU_CH_ADSR1(i) = 0x80ff;
+ SPU_CH_ADSR2(i) = 0x1fee;
}
// Unmute the channels and route them for stereo output. You'll want to
@@ -330,35 +257,31 @@ void start_stream(void) {
SPU_CH_VOL_L(1) = 0x0000;
SPU_CH_VOL_R(1) = 0x3fff;
- SPU_KEY_ON = CHANNEL_MASK;
spu_irq_handler();
+ SpuSetKey(1, bits);
}
// This is basically a variant of reset_spu_channels() that only resets the
// channels used to play the stream, to (again) prevent them from triggering
// the SPU IRQ while the stream is paused.
void stop_stream(void) {
- SPU_KEY_OFF = CHANNEL_MASK;
+ int bits = 0x00ffffff >> (24 - NUM_CHANNELS);
+
+ SpuSetKey(0, bits);
for (int i = 0; i < NUM_CHANNELS; i++)
- SPU_CH_ADDR(i) = SPU_RAM_ADDR(DUMMY_BLOCK_ADDR);
+ SPU_CH_ADDR(i) = getSPUAddr(DUMMY_BLOCK_ADDR);
- SPU_KEY_ON = CHANNEL_MASK;
+ SpuSetKey(1, bits);
}
/* Main */
static RenderContext ctx;
-#define SHOW_STATUS(...) { FntPrint(-1, __VA_ARGS__); FntFlush(-1); display(&ctx); }
-#define SHOW_ERROR(...) { SHOW_STATUS(__VA_ARGS__); while (1) __asm__("nop"); }
-
int main(int argc, const char* argv[]) {
init_context(&ctx);
-
- SHOW_STATUS("INITIALIZING\n");
SpuInit();
- CdInit();
reset_spu_channels();
// Set up controller polling.
@@ -367,34 +290,19 @@ int main(int argc, const char* argv[]) {
StartPAD();
ChangeClearPAD(0);
- SHOW_STATUS("OPENING STREAM FILE\n");
-
- CdlFILE file;
- if (!CdSearchFile(&file, "\\STREAM.BIN"))
- SHOW_ERROR("FAILED TO FIND STREAM.BIN\n");
-
- SHOW_STATUS("BUFFERING STREAM\n");
- init_stream(&file);
+ init_stream((const VAG_Header *) stream_data);
start_stream();
- int paused = 0;
+ int paused = 0, sample_rate = getSPUSampleRate(str_ctx.sample_rate);
- uint16_t sample_rate = SAMPLE_RATE;
uint16_t last_buttons = 0xffff;
while (1) {
FntPrint(-1, "PLAYING SPU STREAM\n\n");
+ FntPrint(-1, "BUFFER: %d\n", str_ctx.db_active);
+ FntPrint(-1, "STATUS: %s\n\n", str_ctx.state ? "BUFFERING" : "IDLE");
- FntPrint(-1, "BUFFER: %d\nSTATUS: ", str_ctx.db_active);
- if (str_ctx.spu_pos >= CHUNK_SIZE)
- FntPrint(-1, "IDLE\n\n");
- else if (str_ctx.spu_pos)
- FntPrint(-1, "BUFFERING\n\n");
- else
- FntPrint(-1, "SEEKING\n\n");
-
- FntPrint(-1, "POSITION: %5d/%5d\n", str_ctx.pos, str_ctx.length);
- FntPrint(-1, "BUFFERED: %5d/%5d\n", str_ctx.spu_pos, CHUNK_SIZE);
+ FntPrint(-1, "POSITION: %d/%d\n", str_ctx.next_chunk, str_ctx.num_chunks);
FntPrint(-1, "SMP RATE: %5d HZ\n\n", (sample_rate * 44100) >> 12);
FntPrint(-1, "[START] %s\n", paused ? "RESUME" : "PAUSE");
@@ -411,7 +319,11 @@ int main(int argc, const char* argv[]) {
PADTYPE *pad = (PADTYPE *) pad_buff[0];
if (pad->stat)
continue;
- if ((pad->type != 4) && (pad->type != 5) && (pad->type != 7))
+ if (
+ (pad->type != PAD_ID_DIGITAL) &&
+ (pad->type != PAD_ID_ANALOG_STICK) &&
+ (pad->type != PAD_ID_ANALOG)
+ )
continue;
if ((last_buttons & PAD_START) && !(pad->btn & PAD_START)) {
@@ -422,21 +334,19 @@ int main(int argc, const char* argv[]) {
start_stream();
}
- // Seeking by an arbitrary number of sectors isn't a problem as
- // spu_irq_handler() always realigns the counter.
if (!(pad->btn & PAD_LEFT))
- str_ctx.pos -= 16;
+ str_ctx.next_chunk--;
if (!(pad->btn & PAD_RIGHT))
- str_ctx.pos += 16;
+ str_ctx.next_chunk++;
if ((last_buttons & PAD_CIRCLE) && !(pad->btn & PAD_CIRCLE))
- str_ctx.pos = 0;
+ str_ctx.next_chunk = -1;
if (!(pad->btn & PAD_DOWN) && (sample_rate > 0x400))
sample_rate -= 0x40;
if (!(pad->btn & PAD_UP) && (sample_rate < 0x2000))
sample_rate += 0x40;
if ((last_buttons & PAD_CROSS) && !(pad->btn & PAD_CROSS))
- sample_rate = SAMPLE_RATE;
+ sample_rate = getSPUSampleRate(str_ctx.sample_rate);
// Only set the sample rate registers if necessary.
if (pad->btn != 0xffff) {
diff --git a/examples/sound/spustream/stream.bin b/examples/sound/spustream/stream.bin
deleted file mode 100644
index e53b726..0000000
--- a/examples/sound/spustream/stream.bin
+++ /dev/null
Binary files differ
diff --git a/examples/sound/spustream/stream.vag b/examples/sound/spustream/stream.vag
new file mode 100644
index 0000000..e1cb4f4
--- /dev/null
+++ b/examples/sound/spustream/stream.vag
Binary files differ
diff --git a/examples/sound/vagsample/3dfx.vag b/examples/sound/vagsample/3dfx.vag
index 9284a9a..3a006bc 100644
--- a/examples/sound/vagsample/3dfx.vag
+++ b/examples/sound/vagsample/3dfx.vag
Binary files differ
diff --git a/examples/sound/vagsample/main.c b/examples/sound/vagsample/main.c
index c79e68e..6a60c19 100644
--- a/examples/sound/vagsample/main.c
+++ b/examples/sound/vagsample/main.c
@@ -1,280 +1,215 @@
-/*
- * LibPSn00b Example Programs
+/*
+ * PSn00bSDK SPU .VAG playback example
+ * (C) 2021-2022 Lameguy64, spicyjpeg - MPL licensed
*
- * VAG Playback Example
- * 2019-2021 Meido-Tek Productions / PSn00bSDK Project
+ * This example demonstrates basic usage of the SPU. Two mono audio samples (in
+ * the standard PS1 .VAG format) are uploaded from main memory to SPU RAM and
+ * played on one of the 24 channels by manipulating the SPU's registers. The
+ * .VAG header is parsed to obtain the sample rate and data size, while the
+ * actual audio data does not need any processing as it is already encoded in
+ * the ADPCM format expected by the SPU.
*
- * This example program demonstrates the basic use of the SPU; uploading sound
- * clips to SPU RAM and playing it back on one of 24 SPU voices (and possibly
- * leave you with ears ringing from the cacophony).
- *
- * The PS1 SPU only supports playing back of specially encoded ADPCM samples
- * natively and can play them at sample rates of up to 44.1KHz, so sound files
- * will have to be converted to 'VAG' format before it can be used on the PS1.
- * While it is possible to play plain PCM samples on the SPU, this requires
- * some special trickery that involves abusing the echo buffer and is not
- * supported by the (half-baked) SPU library of PSn00bSDK.
- *
- * Additionally, the SPU can only play ADPCM samples from its own local memory
- * called the SPU RAM, so sound samples will have to be uploaded to SPU RAM
- * before it can be played by the SPU.
- *
- * The included sound clips are by HighTreason610 (0proyt) and
- * Lameguy64 (threedeeffeggzz) respectively.
- *
- * Example by Lameguy64
- *
- *
- * Changelog:
- *
- * October 6, 2021 - Initial version
+ * Note that PSn00bSDK does not yet provide any tool for SPU ADPCM encoding, so
+ * you will have to use an external program to convert your samples to .VAG.
*
+ * The included sound clips are by HighTreason610 (proyt.vag) and Lameguy64
+ * (3dfx.vag) respectively.
*/
-
-#include <stdio.h>
+
#include <stdint.h>
-#include <psxetc.h>
-#include <psxgte.h>
#include <psxgpu.h>
-#include <psxpad.h>
#include <psxapi.h>
+#include <psxpad.h>
#include <psxspu.h>
#include <hwregs_c.h>
-extern const unsigned char proyt[];
-extern const int proyt_size;
-extern const unsigned char tdfx[];
-extern const int tdfx_size;
-
-// Define display/draw environments for double buffering
-DISPENV disp[2];
-DRAWENV draw[2];
-int db;
-
-unsigned char pad_buff[2][34];
-
-// SPU addresses of the uploaded sound clips
-int proyt_addr;
-int tdfx_addr;
-
-// Init function
-void init(void)
-{
- int addr_temp;
-
- // This not only resets the GPU but it also installs the library's
- // ISR subsystem to the kernel
+extern const uint8_t proyt[];
+extern const uint8_t tdfx[];
+
+/* Display/GPU context utilities */
+
+#define SCREEN_XRES 320
+#define SCREEN_YRES 240
+
+#define BGCOLOR_R 48
+#define BGCOLOR_G 24
+#define BGCOLOR_B 0
+
+typedef struct {
+ DISPENV disp;
+ DRAWENV draw;
+} Framebuffer;
+
+typedef struct {
+ Framebuffer db[2];
+ int db_active;
+} RenderContext;
+
+void init_context(RenderContext *ctx) {
+ Framebuffer *db;
+
ResetGraph(0);
-
- // Define display environments, first on top and second on bottom
- SetDefDispEnv(&disp[0], 0, 0, 320, 240);
- SetDefDispEnv(&disp[1], 0, 240, 320, 240);
-
- // Define drawing environments, first on bottom and second on top
- SetDefDrawEnv(&draw[0], 0, 240, 320, 240);
- SetDefDrawEnv(&draw[1], 0, 0, 320, 240);
-
- // Set and enable clear color
- setRGB0(&draw[0], 0, 96, 0);
- setRGB0(&draw[1], 0, 96, 0);
- draw[0].isbg = 1;
- draw[1].isbg = 1;
-
- // Clear double buffer counter
- db = 0;
-
- // Apply the GPU environments
- PutDispEnv(&disp[db]);
- PutDrawEnv(&draw[db]);
-
- // Load test font
+ ctx->db_active = 0;
+
+ db = &(ctx->db[0]);
+ SetDefDispEnv(&(db->disp), 0, 0, SCREEN_XRES, SCREEN_YRES);
+ SetDefDrawEnv(&(db->draw), SCREEN_XRES, 0, SCREEN_XRES, SCREEN_YRES);
+ setRGB0(&(db->draw), BGCOLOR_R, BGCOLOR_G, BGCOLOR_B);
+ db->draw.isbg = 1;
+ db->draw.dtd = 1;
+
+ db = &(ctx->db[1]);
+ SetDefDispEnv(&(db->disp), SCREEN_XRES, 0, SCREEN_XRES, SCREEN_YRES);
+ SetDefDrawEnv(&(db->draw), 0, 0, SCREEN_XRES, SCREEN_YRES);
+ setRGB0(&(db->draw), BGCOLOR_R, BGCOLOR_G, BGCOLOR_B);
+ db->draw.isbg = 1;
+ db->draw.dtd = 1;
+
+ PutDrawEnv(&(db->draw));
+ //PutDispEnv(&(db->disp));
+
+ // Create a text stream at the top of the screen.
FntLoad(960, 0);
-
- // Open up a test font text stream of 100 characters
- FntOpen(0, 8, 320, 224, 0, 100);
-
- // Initialize the SPU
- SpuInit();
-
- // Set SPU transfer mode to DMA (only mode currently supported)
+ FntOpen(8, 16, 304, 208, 2, 512);
+}
+
+void display(RenderContext *ctx) {
+ Framebuffer *db;
+
+ DrawSync(0);
+ VSync(0);
+ ctx->db_active ^= 1;
+
+ db = &(ctx->db[ctx->db_active]);
+ PutDrawEnv(&(db->draw));
+ PutDispEnv(&(db->disp));
+ SetDispMask(1);
+}
+
+/* .VAG header structure */
+
+typedef struct {
+ uint32_t magic; // 0x70474156 ("VAGp") for mono files
+ uint32_t version;
+ uint32_t interleave; // Unused in mono files
+ uint32_t size; // Big-endian, in bytes
+ uint32_t sample_rate; // Big-endian, in Hertz
+ uint32_t _reserved[3];
+ char name[16];
+} VAG_Header;
+
+#define SWAP_ENDIAN(x) ( \
+ (((uint32_t) (x) & 0x000000ff) << 24) | \
+ (((uint32_t) (x) & 0x0000ff00) << 8) | \
+ (((uint32_t) (x) & 0x00ff0000) >> 8) | \
+ (((uint32_t) (x) & 0xff000000) >> 24) \
+)
+
+/* Helper functions */
+
+// The first 4 KB of SPU RAM are reserved for capture buffers and psxspu
+// additionally uploads a dummy sample (16 bytes) at 0x1000 by default, so the
+// samples must be placed after those.
+#define ALLOC_START_ADDR 0x1010
+
+static int next_channel = 0;
+static int next_sample_addr = ALLOC_START_ADDR;
+
+int upload_sample(const void *data, int size) {
+ // Round the size up to the nearest multiple of 64, as SPU DMA transfers
+ // are done in 64-byte blocks.
+ int _addr = next_sample_addr;
+ int _size = (size + 63) & 0xffffffc0;
+
SpuSetTransferMode(SPU_TRANSFER_BY_DMA);
-
- // Set SPU transfer address (start address for sample upload)
- addr_temp = 0x1000;
- SpuSetTransferStartAddr(addr_temp);
-
- // Upload first sound clip and wait for transfer to finish
- SpuWrite((const uint32_t *) &proyt[48], proyt_size-48);
- SpuIsTransferCompleted(SPU_TRANSFER_WAIT);
-
- // Obtain the address of the sound and advance address for the next one
- // Samples are addressed in 8-byte units, so it'll have to be divided by 8
- proyt_addr = addr_temp/8;
- addr_temp += proyt_size-48;
-
- printf("proyt.vag\t= %02x\n", proyt_addr);
-
- // Upload second sound clip
- SpuSetTransferStartAddr(addr_temp);
- SpuWrite((const uint32_t *) &tdfx[48], tdfx_size-48);
+ SpuSetTransferStartAddr(_addr);
+
+ SpuWrite((const uint32_t *) data, _size);
SpuIsTransferCompleted(SPU_TRANSFER_WAIT);
-
- // Obtain the address of the second sound clip
- tdfx_addr = addr_temp/8;
- addr_temp += tdfx_size-48;
-
- printf("3dfx.vag\t= %02x\n", tdfx_addr);
-
- // Begin pad polling
- InitPAD( pad_buff[0], 34, pad_buff[1], 34 );
+
+ next_sample_addr = _addr + _size;
+ return _addr;
+}
+
+void play_sample(int addr, int sample_rate) {
+ int ch = next_channel;
+
+ // Make sure the channel is stopped.
+ SpuSetKey(0, 1 << ch);
+
+ // Set the channel's sample rate and start address. Note that the SPU
+ // expects the sample rate to be in 4.12 fixed point format (with
+ // 1.0 = 44100 Hz) and the address in 8-byte units; psxspu.h provides the
+ // getSPUSampleRate() and getSPUAddr() macros to convert values to these
+ // units.
+ SPU_CH_FREQ(ch) = getSPUSampleRate(sample_rate);
+ SPU_CH_ADDR(ch) = getSPUAddr(addr);
+
+ // Set the channel's volume and ADSR parameters (0x80ff and 0x1fee are
+ // dummy values that disable the ADSR envelope entirely).
+ SPU_CH_VOL_L(ch) = 0x3fff;
+ SPU_CH_VOL_R(ch) = 0x3fff;
+ SPU_CH_ADSR1(ch) = 0x80ff;
+ SPU_CH_ADSR2(ch) = 0x1fee;
+
+ // Start the channel.
+ SpuSetKey(1, 1 << ch);
+
+ next_channel = (ch + 1) % 24;
+}
+
+/* Main */
+
+static RenderContext ctx;
+
+int main(int argc, const char* argv[]) {
+ init_context(&ctx);
+ SpuInit();
+
+ // Upload the samples to the SPU and parse their headers.
+ VAG_Header *proyt_vag = (VAG_Header *) proyt;
+ VAG_Header *tdfx_vag = (VAG_Header *) tdfx;
+
+ int proyt_addr = upload_sample(&proyt_vag[1], SWAP_ENDIAN(proyt_vag->size));
+ int tdfx_addr = upload_sample(&tdfx_vag[1], SWAP_ENDIAN(tdfx_vag->size));
+ int proyt_sr = SWAP_ENDIAN(proyt_vag->sample_rate);
+ int tdfx_sr = SWAP_ENDIAN(tdfx_vag->sample_rate);
+
+ // Set up controller polling.
+ uint8_t pad_buff[2][34];
+ InitPAD(pad_buff[0], 34, pad_buff[1], 34);
StartPAD();
ChangeClearPAD(0);
-} /* init */
-
-// Display function
-void display(void)
-{
- // Flip buffer index
- db = !db;
-
- // Wait for all drawing to complete
- DrawSync(0);
-
- // Wait for vertical sync to cap the logic to 60fps (or 50 in PAL mode)
- // and prevent screen tearing
- VSync(0);
- // Switch pages
- PutDispEnv(&disp[db]);
- PutDrawEnv(&draw[db]);
-
- // Enable display output, ResetGraph() disables it by default
- SetDispMask(1);
-
-} /* main */
-
-// Main function, program entrypoint
-int main(int argc, const char *argv[])
-{
- int counter,nextchan;
- int cross_pressed;
- int circle_pressed;
- PADTYPE *pad;
-
- // Init stuff
- init();
-
- // Main loop
- counter = 0;
- nextchan = 0;
- cross_pressed = 0;
- circle_pressed = 0;
-
- while(1)
- {
- pad = (PADTYPE*)&pad_buff[0][0];
-
- if( pad->stat == 0 )
- {
- // For digital pad, dual-analog and dual-shock
- if( ( pad->type == 0x4 ) || ( pad->type == 0x5 ) || ( pad->type == 0x7 ) )
- {
- // Plays the first sound
- if( !(pad->btn&PAD_CROSS) )
- {
- if( !cross_pressed )
- {
- // Voice frequency
- // (800h = 22.05KHz)
- SPU_CH_FREQ(nextchan) = 0x800;
- // Voice start playback address
- // (transfer address / 8)
- SPU_CH_ADDR(nextchan) = proyt_addr;
- // Voice loop address
- // (transfer address / 8)
- SPU_CH_LOOP_ADDR(nextchan) = proyt_addr;
- // Voice volume and envelope
- SPU_CH_VOL_L(nextchan) = 0x3fff;
- SPU_CH_VOL_R(nextchan) = 0x3fff;
- SPU_CH_ADSR(nextchan) = 0x1fee80ff;
-
- // Set voice to key-off to allow restart
- SPU_KEY_OFF = 1 << nextchan;
- // Set voice to key-on
- SPU_KEY_ON = 1 << nextchan;
-
- // Advance to next voice
- nextchan++;
- if( nextchan > 23 )
- nextchan = 0;
-
- cross_pressed = 1;
- }
- }
- else
- {
- cross_pressed = 0;
- }
-
- // Plays the second sound
- if( !(pad->btn&PAD_CIRCLE) )
- {
- if( !circle_pressed )
- {
- // Voice frequency
- // (1000h = 44.1KHz)
- SPU_CH_FREQ(nextchan) = 0x1000;
- // Voice start playback address
- // (transfer address / 8)
- SPU_CH_ADDR(nextchan) = tdfx_addr;
- // Voice loop address
- // (transfer address / 8)
- SPU_CH_LOOP_ADDR(nextchan) = tdfx_addr;
- // Voice volume and envelope
- SPU_CH_VOL_L(nextchan) = 0x3fff;
- SPU_CH_VOL_R(nextchan) = 0x3fff;
- SPU_CH_ADSR(nextchan) = 0x1fee80ff;
-
- // Set voice to key-off to allow restart
- SPU_KEY_OFF = 1 << nextchan;
- // Set voice to key-on
- SPU_KEY_ON = 1 << nextchan;
-
- // Advance to next voice
- nextchan++;
- if( nextchan > 23 )
- nextchan = 0;
-
- circle_pressed = 1;
- }
- }
- else
- {
- circle_pressed = 0;
- }
- }
- }
- else
- {
- cross_pressed = 0;
- circle_pressed = 0;
- }
-
- // Print the obligatory hello world and counter to show that the
- // program isn't locking up to the last created text stream
- FntPrint(-1, "VAG SAMPLE - PRESS X OR O TO PLAY\n");
- FntPrint(-1, "COUNTER=%d\n", counter);
-
- // Draw the last created text stream
+ uint16_t last_buttons = 0xffff;
+
+ while (1) {
+ FntPrint(-1, "SPU SAMPLE PLAYBACK DEMO\n\n");
+ FntPrint(-1, "[X] PLAY FIRST SAMPLE\n");
+ FntPrint(-1, "[O] PLAY SECOND SAMPLE\n");
+
FntFlush(-1);
-
- // Update display
- display();
-
- // Increment the counter
- counter++;
+ display(&ctx);
+
+ // Check if a compatible controller is connected and handle button
+ // presses.
+ PADTYPE *pad = (PADTYPE *) pad_buff[0];
+ if (pad->stat)
+ continue;
+ if (
+ (pad->type != PAD_ID_DIGITAL) &&
+ (pad->type != PAD_ID_ANALOG_STICK) &&
+ (pad->type != PAD_ID_ANALOG)
+ )
+ continue;
+
+ if ((last_buttons & PAD_CROSS) && !(pad->btn & PAD_CROSS))
+ play_sample(proyt_addr, proyt_sr);
+ if ((last_buttons & PAD_CIRCLE) && !(pad->btn & PAD_CIRCLE))
+ play_sample(tdfx_addr, tdfx_sr);
+
+ last_buttons = pad->btn;
}
-
+
return 0;
-
-} /* main */
+}
diff --git a/examples/sound/vagsample/proyt.vag b/examples/sound/vagsample/proyt.vag
index 663828d..b8d68d6 100644
--- a/examples/sound/vagsample/proyt.vag
+++ b/examples/sound/vagsample/proyt.vag
Binary files differ
diff --git a/examples/system/dynlink/main.c b/examples/system/dynlink/main.c
index fcce5b1..d813c07 100644
--- a/examples/system/dynlink/main.c
+++ b/examples/system/dynlink/main.c
@@ -7,8 +7,8 @@
* symbol map file, which is generated at compile time by GCC's nm command and
* included into the CD image. The symbol map lists all functions/variables in
* the executable and their type, address and size. Currently only searching
- * for a symbol's address by its name (DL_GetSymbolByName()) is supported,
- * however this may be expanded in the future.
+ * for a symbol's address by its name (DL_GetMapSymbol()) is supported, however
+ * this may be expanded in the future.
*
* Being able to introspect local symbols at runtime, in turn, allows us to use
* another set of APIs to load, link and execute code from an external file
@@ -140,18 +140,18 @@ void display(RenderContext *ctx) {
/* Symbol overriding example */
-static volatile uint32_t resolve_counter = 0;
+static volatile int resolve_counter = 0;
// This function will override printf(), i.e. DLLs will use this instead of the
// "real" printf() present in the executable, thanks to the custom resolver
-// defined below. We'll use this to redirect the DLL's output to the debug text
-// window.
+// defined below. We'll use this to redirect the DLL's output to be shown on
+// screen.
int dll_printf(const char *format, ...) {
va_list args;
va_start(args, format);
- char buffer[256];
- int32_t return_value = vsprintf(buffer, format, args);
+ char buffer[256];
+ int return_value = vsprintf(buffer, format, args);
va_end(args);
FntPrint(-1, "DLL: %s", buffer);
@@ -163,7 +163,7 @@ int dll_printf(const char *format, ...) {
// This function will be called by the linker for each undefined symbol
// (function or variable) in the DLL, and should return the address of the
// symbol so the dynamic linker can patch it in. The default resolver tries to
-// find them in the currently loaded symbol map using DL_GetSymbolByName().
+// find them in the currently loaded symbol map using DL_GetMapSymbol().
void *custom_resolver(DLL *dll, const char *name) {
if (!strcmp(name, "printf")) {
printf("Resolving printf() -> dll_printf() (#%d)\n", resolve_counter++);
@@ -173,7 +173,7 @@ void *custom_resolver(DLL *dll, const char *name) {
printf("Resolving %s() (#%d)\n", name, resolve_counter++);
// Custom resolvers should always fall back to the default behavior.
- return DL_GetSymbolByName(name);
+ return DL_GetMapSymbol(name);
}
/* Global variables and structs */
@@ -187,7 +187,7 @@ typedef struct {
void (*render)(RenderContext *, uint16_t buttons);
} DLL_API;
-static DLL *dll = 0;
+static DLL dll;
static DLL_API dll_api;
static RenderContext ctx;
@@ -225,20 +225,18 @@ size_t load_file(const char *filename, void **ptr) {
}
void load_dll(const char *filename) {
- // As we're passing RTLD_FREE_ON_DESTROY to DL_CreateDLL(), calling
+ // As we're passing DL_FREE_ON_DESTROY to DL_CreateDLL(), calling
// DL_DestroyDLL() will also deallocate the buffer the DLL was loaded into.
- if (dll)
- DL_DestroyDLL(dll);
+ DL_DestroyDLL(&dll);
void *ptr;
size_t len = load_file(filename, &ptr);
- dll = DL_CreateDLL(ptr, len, RTLD_LAZY | RTLD_FREE_ON_DESTROY);
- if (!dll)
- SHOW_ERROR("FAILED TO PARSE %s\nERROR=%d\n", filename, (int32_t) DL_GetLastError());
+ if (!DL_CreateDLL(&dll, ptr, len, DL_LAZY | DL_FREE_ON_DESTROY))
+ SHOW_ERROR("FAILED TO PARSE %s\n", filename);
- dll_api.init = DL_GetDLLSymbol(dll, "init");
- dll_api.render = DL_GetDLLSymbol(dll, "render");
+ dll_api.init = DL_GetDLLSymbol(&dll, "init");
+ dll_api.render = DL_GetDLLSymbol(&dll, "render");
printf("DLL init() @ %08x, render() @ %08x\n", dll_api.init, dll_api.render);
@@ -266,14 +264,14 @@ int main(int argc, const char* argv[]) {
size_t len = load_file("\\MAIN.MAP;1", &ptr);
if (!DL_ParseSymbolMap(ptr, len))
- SHOW_ERROR("FAILED TO PARSE SYMBOL MAP\nERROR=%d\n", (int32_t) DL_GetLastError());
+ SHOW_ERROR("FAILED TO PARSE SYMBOL MAP\n");
free(ptr);
// Try to obtain a reference to a local function.
- void (*_display)() = DL_GetSymbolByName("display");
+ void (*_display)() = DL_GetMapSymbol("display");
if (!_display)
- SHOW_ERROR("FAILED TO LOOK UP LOCAL FUNCTION\nERROR=%d\n", (int32_t) DL_GetLastError());
+ SHOW_ERROR("FAILED TO LOOK UP LOCAL FUNCTION\n");
printf("Symbol map test, display() @ %08x\n", _display);
@@ -295,7 +293,7 @@ int main(int argc, const char* argv[]) {
DL_PRE_CALL(dll_api.render);
dll_api.render(&ctx, last_buttons);
- FntPrint(-1, "MAIN: DLL ADDR=%08x SIZE=%d\n", dll->ptr, dll->size);
+ FntPrint(-1, "MAIN: DLL ADDR=%08x SIZE=%d\n", dll.ptr, dll.size);
FntPrint(-1, "MAIN: %d FUNCTIONS RESOLVED\n", resolve_counter);
FntPrint(-1, "[START] LOAD NEXT DLL\n");
FntFlush(-1);
@@ -320,7 +318,7 @@ int main(int argc, const char* argv[]) {
last_buttons = pad->btn;
}
- //DL_DestroyDLL(dll);
+ //DL_DestroyDLL(&dll);
//DL_UnloadSymbolMap();
return 0;
}
diff --git a/libpsn00b/CMakeLists.txt b/libpsn00b/CMakeLists.txt
index 602b3c8..a6b6df3 100644
--- a/libpsn00b/CMakeLists.txt
+++ b/libpsn00b/CMakeLists.txt
@@ -48,6 +48,8 @@ foreach(_library IN LISTS PSN00BSDK_LIBRARIES)
${_library} INTERFACE
$<$<STREQUAL:$<UPPER_CASE:$<TARGET_PROPERTY:PSN00BSDK_TARGET_TYPE>>,${_type}>:${_name}>
)
+
+ target_compile_definitions(${_name} PRIVATE SDK_LIBRARY_NAME="${_library}")
endforeach()
endforeach()
@@ -102,6 +104,6 @@ install(
# once the debug and release builds are merged into the same installation tree.
install(
EXPORT libpsn00b
- DESTINATION ${CMAKE_INSTALL_LIBDIR}/libpsn00b
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/libpsn00b/cmake
#EXPORT_LINK_INTERFACE_LIBRARIES
)
diff --git a/libpsn00b/cmake/internal_setup.cmake b/libpsn00b/cmake/internal_setup.cmake
index e78355f..b21451e 100644
--- a/libpsn00b/cmake/internal_setup.cmake
+++ b/libpsn00b/cmake/internal_setup.cmake
@@ -34,7 +34,7 @@ if(NOT DEFINED PSN00BSDK_VERSION)
string(JSON PSN00BSDK_GIT_COMMIT GET ${_json} git_commit)
endif()
-include(${CMAKE_CURRENT_LIST_DIR}/../libpsn00b.cmake OPTIONAL)
+include(${CMAKE_CURRENT_LIST_DIR}/libpsn00b.cmake OPTIONAL)
if(TARGET psn00bsdk)
link_libraries(psn00bsdk)
endif()
diff --git a/libpsn00b/include/assert.h b/libpsn00b/include/assert.h
index 32301e2..12212af 100644
--- a/libpsn00b/include/assert.h
+++ b/libpsn00b/include/assert.h
@@ -1,20 +1,35 @@
/*
- * PSn00bSDK assert macro
+ * PSn00bSDK assert macro and internal logging
* (C) 2022 spicyjpeg - MPL licensed
+ *
+ * Note that the _sdk_log() macro is used internally by PSn00bSDK to output
+ * debug messages and warnings.
*/
#ifndef __ASSERT_H
#define __ASSERT_H
+#include <stdio.h>
+
void _assert_abort(const char *file, int line, const char *expr);
#ifdef NDEBUG
-#define assert(x)
+
+#define assert(expr)
+#define _sdk_log(fmt, ...)
+
#else
+
#define assert(expr) { \
- if (!(expr)) \
- _assert_abort(__FILE__, __LINE__, #expr); \
+ if (!(expr)) _assert_abort(__FILE__, __LINE__, #expr); \
}
+
+#ifdef SDK_LIBRARY_NAME
+#define _sdk_log(fmt, ...) printf(SDK_LIBRARY_NAME ": " fmt, ##__VA_ARGS__)
+#else
+#define _sdk_log(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#endif
+
#endif
#endif
diff --git a/libpsn00b/include/dlfcn.h b/libpsn00b/include/dlfcn.h
index 5848a95..3c5260d 100644
--- a/libpsn00b/include/dlfcn.h
+++ b/libpsn00b/include/dlfcn.h
@@ -7,38 +7,31 @@
#define __DLFCN_H
#include <stdint.h>
+#include <stddef.h>
#include <elf.h>
-/* Helper macro for setting $t9 before calling a function */
+/* Macros */
-#define DL_PRE_CALL(func) { \
- __asm__ volatile("move $t9, %0;" :: "r"(func) : "$t9"); \
-}
+/**
+ * @brief Prepares for a DLL function call.
+ *
+ * @details Sets the $t9 register to the specified value (which should be a
+ * pointer to a DLL function obtained using DL_GetDLLSymbol()). This must be
+ * done prior to calling a DLL function from the main executable to ensure the
+ * DLL can correctly invoke the symbol resolver if necessary.
+ *
+ * This macro is not required when calling a DLL function from another DLL, as
+ * GCC will generate code to set $t9 appropriately.
+ */
+#define DL_PRE_CALL(func) \
+ __asm__ volatile("move $t9, %0;" :: "r"(func) : "$t9");
-/* Types */
-
-#define RTLD_DEFAULT ((DLL *) 0)
-
-typedef enum _DL_Error {
- RTLD_E_NONE = 0, // No error
- RTLD_E_FILE_OPEN = 1, // Unable to find or open file
- RTLD_E_FILE_ALLOC = 2, // Unable to allocate buffer to load file into
- RTLD_E_FILE_READ = 3, // Failed to read file
- RTLD_E_NO_MAP = 4, // No symbol map has been loaded yet
- RTLD_E_MAP_ALLOC = 5, // Unable to allocate symbol map structures
- RTLD_E_NO_SYMBOLS = 6, // No symbols found in symbol map
- RTLD_E_DLL_NULL = 7, // Unable to initialize DLL from null pointer
- RTLD_E_DLL_ALLOC = 8, // Unable to allocate DLL metadata structures
- RTLD_E_DLL_FORMAT = 9, // Unsupported DLL type or format
- RTLD_E_MAP_SYMBOL = 10, // Symbol not found in symbol map
- RTLD_E_DLL_SYMBOL = 11, // Symbol not found in DLL
- RTLD_E_HASH_LOOKUP = 12 // Hash table lookup failed due to internal error
-} DL_Error;
+/* Structure and enum definitions */
typedef enum _DL_ResolveMode {
- RTLD_LAZY = 1, // Resolve functions when they are first called (default)
- RTLD_NOW = 2, // Resolve all symbols immediately on load
- RTLD_FREE_ON_DESTROY = 4 // Automatically free DLL buffer when closing DLL
+ DL_LAZY = 1, // Resolve functions when they are first called (default)
+ DL_NOW = 2, // Resolve all symbols immediately on load
+ DL_FREE_ON_DESTROY = 4 // Automatically free DLL buffer when closing DLL
} DL_ResolveMode;
// Members of this struct should not be accessed directly in most cases, but
@@ -55,151 +48,171 @@ typedef struct _DLL {
uint16_t got_length;
} DLL;
-/* API */
+/* Public API */
#ifdef __cplusplus
extern "C" {
#endif
/**
- * @brief Reads the symbol table from the provided string buffer (which may or
- * may not be null-terminated), parses it and stores the parsed entries into a
- * private hash table; the buffer won't be further referenced and can be safely
- * deallocated after parsing. Returns the number of entries successfully parsed
- * or -1 if an error occurred.
+ * @brief Creates an empty symbol map in memory.
*
- * This function expects the string buffer to contain one more lines, each of
- * which must follow this format:
+ * @details Initializes the internal symbol hash table to contain at most the
+ * given number of symbols. Once this function is called, symbols can be
+ * registered using DL_AddMapSymbol() and then looked up using
+ * DL_GetMapSymbol(). The default DLL resolver will search the hash table for
+ * external symbols required by DLLs.
*
- * <SYMBOL_NAME> <T|R|D|B> <HEX_ADDRESS> <HEX_SIZE> [DEBUG_INFO...]
+ * This function is normally not required when loading a map file through
+ * DL_ParseSymbolMap(), but it can be used alongside DL_AddMapSymbol() to
+ * implement a custom symbol map parser.
*
- * The "nm" tool included in the GCC toolchain can be used to generate a map
- * file in the appropriate format after building the executable, by using this
- * command:
+ * @param num_entries
+ * @return 0 or -1 in case of error
*
- * mipsel-none-elf-nm -f posix -l -n executable.elf >executable.map
+ * @see DL_AddMapSymbol(), DL_GetMapSymbol()
+ */
+int DL_InitSymbolMap(int num_entries);
+
+/**
+ * @brief Destroys the currently loaded symbol map.
*
- * @param ptr
- * @param size
- * @return -1 or number of entries parsed
+ * @details Frees the internal hash table allocated by DL_InitSymbolMap() or
+ * DL_ParseSymbolMap(), containing the currently loaded symbol map. Freeing the
+ * table manually before loading a new symbol map is normally unnecessary as it
+ * is done automatically, however this function can be useful to recover heap
+ * space once the map is no longer needed.
*/
-int32_t DL_ParseSymbolMap(const char *ptr, size_t size);
+void DL_UnloadSymbolMap(void);
/**
- * @brief File wrapper around DL_ParseSymbolMap(). Allocates a temporary buffer
- * then loads the specified map file into it (using BIOS APIs) and calls
- * DL_ParseSymbolMap() to parse it. The buffer is deallocated immediately after
- * parsing.
+ * @brief Adds a symbol to the currently loaded symbol map.
*
- * @param filename Must always contain device name, e.g. "cdrom:MODULE.DLL;1"
- * @return -1 or number of entries parsed
+ * @details Registers a new symbol (function or variable) with the given name
+ * and address, and adds it to the internal hash table. The symbol can then be
+ * looked up using DL_GetMapSymbol(). The default DLL resolver will search the
+ * hash table for external symbols required by DLLs.
+ *
+ * This function shall only be called after DL_InitSymbolMap() or
+ * DL_ParseSymbolMap() is called.
+ *
+ * @param name
+ * @param ptr
+ *
+ * @see DL_GetMapSymbol()
*/
-//int32_t DL_LoadSymbolMapFromFile(const char *filename);
+void DL_AddMapSymbol(const char *name, void *ptr);
/**
- * @brief Frees internal buffers containing the currently loaded symbol map.
- * This is automatically done before loading a new symbol map so there is no
- * need to call this function in most cases, however it can still be useful to
- * free up space on the heap once the symbol map is no longer needed.
+ * @brief Creates a symbol map in memory from a map file in text format.
+ *
+ * @details Initializes the internal symbol hash table, then parses entries
+ * from the provided string buffer (which may or may not be null-terminated)
+ * and adds each one to the table. The string buffer won't be further
+ * referenced and can be safely deallocated after parsing. Returns the number
+ * of entries successfully parsed.
+ *
+ * The string buffer shall contain one or more lines, each of which must follow
+ * this format:
+ *
+ * <SYMBOL_NAME> <T|R|D|B> <HEX_ADDRESS> <HEX_SIZE> [...]
+ *
+ * The "nm" tool included in the GCC toolchain can be used to generate a map
+ * file in the appropriate format after building the executable:
+ *
+ * mipsel-none-elf-nm -f posix -l -n executable.elf >executable.map
+ *
+ * @param ptr
+ * @param size
+ * @return Number of entries parsed, -1 in case of failure
+ *
+ * @see DL_UnloadSymbolMap(), DL_GetMapSymbol()
*/
-void DL_UnloadSymbolMap(void);
+int DL_ParseSymbolMap(const char *ptr, size_t size);
/**
- * @brief Queries the currently loaded symbol map for the symbol with the given
- * name and returns a pointer to it, which can then be used to directly access
- * the symbol. If the symbol can't be found, null is returned instead.
+ * @brief Gets a pointer to a symbol in the currently loaded map by its name.
+ *
+ * @details Queries the currently loaded symbol map for the symbol with the
+ * given name and returns a pointer to it, which can then be used to directly
+ * access the symbol. If the symbol can't be found, a null pointer is returned.
*
* @param name
- * @return NULL or pointer to symbol (any type)
+ * @return NULL or pointer to symbol
*/
-void *DL_GetSymbolByName(const char *name);
+void *DL_GetMapSymbol(const char *name);
/**
- * @brief Sets a custom function to be called for resolving symbols in DLLs.
+ * @brief Sets a custom handler for resolving symbols in DLLs.
+ *
+ * @details Sets a custom function to be called for resolving symbols in DLLs.
* The function will be given a pointer to the current DLL and the unresolved
* symbol's name, and should return the address of the symbol in the executable
* (the dynamic linker will lock up if it returns null). Passing null instead
- * of a function resets the default behavior of calling DL_GetSymbolByName() to
+ * of a function resets the default behavior of calling DL_GetMapSymbol() to
* find the symbol in the currently loaded symbol map.
- *
+ *
* @param callback NULL or pointer to callback function
+ * @return Previously set callback or NULL
*/
-void DL_SetResolveCallback(void *(*callback)(DLL *, const char *));
+void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *));
/**
- * @brief Initializes a buffer holding the contents of a dynamically-loaded
+ * @brief Initializes a DLL structure.
+ *
+ * @details Initializes a buffer holding the contents of a dynamically-loaded
* library file (compiled with the dll.ld linker script and converted to a raw
- * binary) *in-place*. A new DLL struct is allocated to store metadata but,
+ * binary) *in-place*. Metadata is written to the provided DLL struct but,
* unlike DL_ParseSymbolMap(), the DLL's actual code, data and tables are
* referenced directly from the provided buffer. The buffer must not be moved
* or deallocated, at least not before calling DL_DestroyDLL() on the DLL
* struct returned by this function.
*
* The third argument specifies when symbols in the DLL should be resolved.
- * Setting it to RTLD_LAZY defers resolution of undefined functions to when
- * they are first called, while RTLD_NOW forces all symbols to be resolved
- * immediately. If a custom resolver has been set via DL_SetResolveCallback(),
- * it will be called for each symbol to resolve.
+ * Setting it to DL_LAZY defers resolution of undefined functions to when they
+ * are first called, while DL_NOW forces all symbols to be resolved
+ * immediately. Either mode can be OR'd with DL_FREE_ON_DESTROY to
+ * automatically deallocate the provided buffer when DL_DestroyDLL() is called.
*
+ * If a custom resolver has been set via DL_SetResolveCallback(), it will be
+ * called for each symbol to resolve.
+ *
+ * @param dll
* @param ptr
* @param size
- * @param mode RTLD_LAZY or RTLD_NOW
- * @return NULL or pointer to a new DLL struct
- */
-DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode);
-
-/**
- * @brief File wrapper around dlinit(). Allocates a new buffer, loads the
- * specified file into it (using BIOS APIs) and calls dlinit() on that. When
- * calling dlclose() on a DLL loaded from a file, the file buffer is
- * automatically destroyed.
- *
- * @param filename Must always contain device name, e.g. "cdrom:MODULE.DLL;1"
- * @param mode RTLD_LAZY or RTLD_NOW + optionally RTLD_FREE_ON_DESTROY
- * @return NULL or pointer to a new DLL struct
+ * @param mode DL_LAZY or DL_NOW, optionally with DL_FREE_ON_DESTROY
+ * @return Pointer to DLL structure or NULL in case of failure
+ *
+ * @see DL_DestroyDLL(), DL_GetDLLSymbol()
*/
-//DLL *DL_LoadDLLFromFile(const char *filename, DL_ResolveMode mode);
+DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode);
/**
- * @brief Destroys a loaded DLL by calling its global destructors and freeing
- * the buffer it's loaded in. Any pointer passed to DL_DestroyDLL() should no
- * longer be used after the call. If the DLL was initialized in-place using
- * DL_CreateDLL(), DL_DestroyDLL() will only free the buffer initially passed
- * to DL_CreateDLL() if RTLD_FREE_ON_DESTROY was used.
+ * @brief Destroys a DLL structure.
+ *
+ * @details Destroys a loaded DLL by calling its global destructors. If the DLL
+ * was initialized with the DL_FREE_ON_DESTROY flag, the buffer associated with
+ * the DLL is also deallocated. Note that the DLL structure itself is *not*
+ * deallocated.
*
* @param dll
*/
void DL_DestroyDLL(DLL *dll);
/**
- * @brief Returns a pointer to the DLL symbol with the given name, or null if
- * it can't be found. If null or RTLD_DEFAULT is passed as first argument, the
- * executable itself is searched instead using the symbol map (behaving the
- * same as DL_GetSymbolByName()).
+ * @brief Gets a pointer to a symbol in a DLL by its name.
+ *
+ * @details Returns a pointer to the DLL symbol with the given name, or null if
+ * it can't be found. If a null pointer is passed as first argument, the
+ * executable itself is searched instead using the symbol map (behaving
+ * identically to DL_GetMapSymbol()).
*
- * @param dll DLL struct or RTLD_DEFAULT
+ * @param dll Pointer to DLL structure or NULL
* @param name
* @return NULL or pointer to symbol (any type)
*/
void *DL_GetDLLSymbol(const DLL *dll, const char *name);
-/**
- * @brief Returns a code describing the last error that occurred, or DL_E_NONE
- * if no error has occurred since the last call to dlerror() (i.e. calling this
- * also resets the internal error flags).
- *
- * @return NULL or member of DL_Error enum
- */
-DL_Error DL_GetLastError(void);
-
-/* POSIX "compatibility" macros */
-
-#define dlinit(ptr, size, mode) DL_CreateDLL(ptr, size, mode)
-//#define dlopen(filename, mode) DL_LoadDLLFromFile(filename, mode)
-#define dlsym(dll, name) DL_GetDLLSymbol(dll, name)
-#define dlclose(dll) DL_DestroyDLL(dll)
-#define dlerror() DL_GetLastError()
-
#ifdef __cplusplus
}
#endif
diff --git a/libpsn00b/include/hwregs_a.inc b/libpsn00b/include/hwregs_a.inc
index c78b41a..ca38542 100644
--- a/libpsn00b/include/hwregs_a.inc
+++ b/libpsn00b/include/hwregs_a.inc
@@ -32,12 +32,18 @@
.set SPU_MASTER_VOL_R, 0x1d82
.set SPU_REVERB_VOL_L, 0x1d84
.set SPU_REVERB_VOL_R, 0x1d86
-.set SPU_KEY_ON, 0x1d88
-.set SPU_KEY_OFF, 0x1d8c
-.set SPU_FM_MODE, 0x1d90
-.set SPU_NOISE_MODE, 0x1d94
-.set SPU_REVERB_ON, 0x1d98
-.set SPU_CHAN_STATUS, 0x1d9c
+.set SPU_KEY_ON1, 0x1d88
+.set SPU_KEY_ON1, 0x1d8a
+.set SPU_KEY_OFF1, 0x1d8c
+.set SPU_KEY_OFF2, 0x1d8e
+.set SPU_FM_MODE1, 0x1d90
+.set SPU_FM_MODE2, 0x1d92
+.set SPU_NOISE_MODE1, 0x1d94
+.set SPU_NOISE_MODE2, 0x1d96
+.set SPU_REVERB_ON1, 0x1d98
+.set SPU_REVERB_ON2, 0x1d9a
+.set SPU_CHAN_STATUS1, 0x1d9c
+.set SPU_CHAN_STATUS2, 0x1d9e
.set SPU_REVERB_ADDR, 0x1da2
.set SPU_IRQ_ADDR, 0x1da4
@@ -59,8 +65,8 @@
.set SPU_VOICE_VOL_R, 0x02
.set SPU_VOICE_FREQ, 0x04
.set SPU_VOICE_ADDR, 0x06
-.set SPU_VOICE_ADSR_L, 0x08
-.set SPU_VOICE_ADSR_H, 0x0a
+.set SPU_VOICE_ADSR1, 0x08
+.set SPU_VOICE_ADSR2, 0x0a
.set SPU_VOICE_LOOP, 0x0e
## MDEC
diff --git a/libpsn00b/include/hwregs_c.h b/libpsn00b/include/hwregs_c.h
index b205b87..0e21922 100644
--- a/libpsn00b/include/hwregs_c.h
+++ b/libpsn00b/include/hwregs_c.h
@@ -38,12 +38,18 @@
#define SPU_MASTER_VOL_R _MMIO16(IOBASE | 0x1d82)
#define SPU_REVERB_VOL_L _MMIO16(IOBASE | 0x1d84)
#define SPU_REVERB_VOL_R _MMIO16(IOBASE | 0x1d86)
-#define SPU_KEY_ON _MMIO32(IOBASE | 0x1d88)
-#define SPU_KEY_OFF _MMIO32(IOBASE | 0x1d8c)
-#define SPU_FM_MODE _MMIO32(IOBASE | 0x1d90)
-#define SPU_NOISE_MODE _MMIO32(IOBASE | 0x1d94)
-#define SPU_REVERB_ON _MMIO32(IOBASE | 0x1d98)
-#define SPU_CHAN_STATUS _MMIO32(IOBASE | 0x1d9c)
+#define SPU_KEY_ON1 _MMIO16(IOBASE | 0x1d88)
+#define SPU_KEY_ON2 _MMIO16(IOBASE | 0x1d8a)
+#define SPU_KEY_OFF1 _MMIO16(IOBASE | 0x1d8c)
+#define SPU_KEY_OFF2 _MMIO16(IOBASE | 0x1d8e)
+#define SPU_FM_MODE1 _MMIO16(IOBASE | 0x1d90)
+#define SPU_FM_MODE2 _MMIO16(IOBASE | 0x1d92)
+#define SPU_NOISE_MODE1 _MMIO16(IOBASE | 0x1d94)
+#define SPU_NOISE_MODE2 _MMIO16(IOBASE | 0x1d96)
+#define SPU_REVERB_ON1 _MMIO16(IOBASE | 0x1d98)
+#define SPU_REVERB_ON2 _MMIO16(IOBASE | 0x1d9a)
+#define SPU_CHAN_STATUS1 _MMIO16(IOBASE | 0x1d9c)
+#define SPU_CHAN_STATUS2 _MMIO16(IOBASE | 0x1d9e)
#define SPU_REVERB_ADDR _MMIO16(IOBASE | 0x1da2)
#define SPU_IRQ_ADDR _MMIO16(IOBASE | 0x1da4)
@@ -67,7 +73,8 @@
#define SPU_CH_VOL_R(N) _MMIO16(IOBASE | 0x1c02 + 16 * (N))
#define SPU_CH_FREQ(N) _MMIO16(IOBASE | 0x1c04 + 16 * (N))
#define SPU_CH_ADDR(N) _MMIO16(IOBASE | 0x1c06 + 16 * (N))
-#define SPU_CH_ADSR(N) _MMIO32(IOBASE | 0x1c08 + 16 * (N))
+#define SPU_CH_ADSR1(N) _MMIO16(IOBASE | 0x1c08 + 16 * (N))
+#define SPU_CH_ADSR2(N) _MMIO16(IOBASE | 0x1c0a + 16 * (N))
#define SPU_CH_LOOP_ADDR(N) _MMIO16(IOBASE | 0x1c0e + 16 * (N))
/* MDEC */
diff --git a/libpsn00b/include/inline_c.h b/libpsn00b/include/inline_c.h
index c5eaa59..5facc1c 100644
--- a/libpsn00b/include/inline_c.h
+++ b/libpsn00b/include/inline_c.h
@@ -3,9 +3,17 @@
* (C) 2019 Lameguy64
* (C) 2021-2022 Soapy (tweaked by spicyjpeg)
*
- * This header is basically identical to Nugget's inline_n.h. All GTE commands
- * can be used right away without having to run DMPSX or any other tool on
- * object files.
+ * This header is basically identical to Nugget's inline_n.h.
+ */
+
+/**
+ * @file inline_c.h
+ * @brief Inline GTE macro header
+ *
+ * @details This header provides a set of macros for making use of GTE commands
+ * and registers from C or C++ code. Unlike the official SDK, all commands can
+ * be used right away without having to run any other post-processing tool on
+ * compiled object files.
*/
#ifndef _INLINE_C_H
@@ -13,7 +21,11 @@
/* GTE load macros */
-/* Load a SVECTOR (passed as a pointer) to GTE V0
+/**
+ * @brief Loads a single SVECTOR to GTE vector register V0
+ *
+ * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY0
+ * and C2_VZ0.
*/
#define gte_ldv0( r0 ) __asm__ volatile ( \
"lwc2 $0, 0( %0 );" \
@@ -22,7 +34,11 @@
: "r"( r0 ) \
: "$t0" )
-/* Load a SVECTOR (passed as a pointer) to GTE V1
+/**
+ * @brief Loads a single SVECTOR to GTE vector register V1
+ *
+ * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY1
+ * and C2_VZ1.
*/
#define gte_ldv1( r0 ) __asm__ volatile ( \
"lwc2 $2, 0( %0 );" \
@@ -31,7 +47,11 @@
: "r"( r0 ) \
: "$t0" )
-/* Load a SVECTOR (passed as a pointer) to GTE V2
+/**
+ * @brief Loads a single SVECTOR to GTE vector register V2
+ *
+ * @details Loads values from an SVECTOR struct to GTE data registers C2_VXY2
+ * and C2_VZ2.
*/
#define gte_ldv2( r0 ) __asm__ volatile ( \
"lwc2 $4, 0( %0 );" \
@@ -40,7 +60,11 @@
: "r"( r0 ) \
: "$t0" )
-/* Load three SVECTORs (passed as a pointer) to the GTE at once
+/**
+ * @brief Load three SVECTORs to GTE vector registers at once
+ *
+ * @details Loads values from three SVECTOR structs to GTE data registers
+ * C2_VXY0 and C2_VZ0, C2_VXY1 and C2_VZ1, C2_VXY2 and C2_VZ2 at once.
*/
#define gte_ldv3( r0, r1, r2 ) __asm__ volatile ( \
"lwc2 $0, 0( %0 );" \
@@ -88,6 +112,14 @@
: \
: "r"( r0 ) )
+/**
+ * @brief Load a CVECTOR to GTE register C2_RGBC
+ *
+ * @details Loads a CVECTOR value to GTE data register C2_RGBC. The primitive
+ * code (the last byte of a CVECTOR) is passed to the color FIFO registers when
+ * performing lighting compute operations, so it can be stored to the RGBC
+ * field of a primitive directly without any additional operation required.
+ */
#define gte_ldrgb( r0 ) __asm__ volatile ( \
"lwc2 $6 , 0( %0 );" \
: \
@@ -224,6 +256,12 @@
: "r"( r0 ) \
: "$12", "$13", "$14" )
+/**
+ * @brief Loads values to GTE registers C2_IR1-3
+ *
+ * @details Loads three 32-bit values to GTE data registers C2_IR1, C2_IR2 and
+ * C2_IR3.
+ */
#define gte_ldopv2( r0 ) __asm__ volatile ( \
"lwc2 $11, 8( %0 );" \
"lwc2 $9 , 0( %0 );" \
@@ -253,6 +291,14 @@
: \
: "r"( r0 ), "r"( r1 ), "r"( r2 ) )
+/**
+ * @brief Sets an RGB color value to the GTE
+ *
+ * @details Sets the specified RGB value to GTE control registers C2_RBK,
+ * C2_GBK and C2_BBK. This specifies the color value to use when a normal faces
+ * away from the direction of the light source. This can be considered as the
+ * ambient light color.
+ */
#define gte_SetBackColor( r0, r1, r2 ) __asm__ volatile ( \
"sll $t0, %0, 4;" \
"sll $t1, %1, 4;" \
@@ -282,6 +328,13 @@
: "r"( r0 ), "r"( r1 ), "r"( r2 ) \
: "$12", "$13", "$14" )
+/**
+ * @brief Sets the GTE screen offset
+ *
+ * @details Sets the values of the GTE screen offset which is applied to 2D
+ * projected coordinates when performing perspective transformation. The values
+ * are set to GTE control registers C2_OFX and C2_OFY.
+ */
#define gte_SetGeomOffset( r0, r1 ) __asm__ volatile ( \
"sll $t0, %0, 16;" \
"sll $t1, %1, 16;" \
@@ -291,6 +344,13 @@
: "r"( r0 ), "r"( r1 ) \
: "$t0", "$t1" )
+/**
+ * @brief Sets the distance of the projection plane
+ *
+ * @details Sets the specified value to GTE control register C2_H which
+ * determines the projection plane distance, otherwise known as the field of
+ * view.
+ */
#define gte_SetGeomScreen( r0 ) __asm__ volatile ( \
"ctc2 %0, $26;" \
: \
@@ -305,6 +365,12 @@
: "r"( r0 ) \
: "$12", "$13" )
+/**
+ * @brief Sets a 3x3 rotation matrix portion from a MATRIX to the GTE
+ *
+ * @details Sets the 3x3 rotation matrix coordinates from a MATRIX struct to
+ * GTE control registers C2_R11R12, C2_R13R21, C2_R22R23, C2_R31R32 and C2_R33.
+ */
#define gte_SetRotMatrix( r0 ) __asm__ volatile ( \
"lw $t0, 0( %0 );" \
"lw $t1, 4( %0 );" \
@@ -329,6 +395,17 @@
: "r"( r0 ) \
: "$12", "$13" )
+/**
+ * @brief Sets a 3x3 lighting matrix from a MATRIX to the GTE
+ *
+ * @details Sets the 3x3 lighting matrix coordinates from a MATRIX struct to
+ * GTE control registers C2_L11L12, C2_L13L21, C2_L22L23, C2_L31L32 and C2_L33.
+ *
+ * The lighting matrix is essentially a triplet of three light direction
+ * vectors. L11, L12 and L13 represents the X, Y and Z coordinates of light
+ * source 0 for example. Coordinates must be normalized to ensure correct
+ * results.
+ */
#define gte_SetLightMatrix( r0 ) __asm__ volatile ( \
"lw $t0, 0( %0 );" \
"lw $t1, 4( %0 );" \
@@ -353,6 +430,17 @@
: "r"( r0 ) \
: "$12", "$13" )
+/**
+ * @brief Sets a 3x3 color matrix from a MATRIX to the GTE
+ *
+ * @details Sets the 3x3 color matrix values from a MATRIX struct to GTE
+ * control registers C2_LR1LR2, C2_LR3LG1, C2_LG2LG3, C2_LB1LB2 and C2_LB3.
+ *
+ * The light color matrix is essentially a triplet of three RGB colors for each
+ * of the three light sources. LR1, LG1 and LB1 represents the R, G and B color
+ * values for light source 0 for example. Values are of range 0 to 4095, higher
+ * values will be saturated.
+ */
#define gte_SetColorMatrix( r0 ) __asm__ volatile ( \
"lw $t0, 0( %0 );" \
"lw $t1, 4( %0 );" \
@@ -368,6 +456,12 @@
: "r"( r0 ) \
: "$t2" )
+/**
+ * @brief Sets the translation portion of a MATRIX to the GTE
+ *
+ * @details Sets the translation coordinates from a MATRIX struct to GTE
+ * control registers C2_TRX, C2_TRY and C2_TRZ respectively.
+ */
#define gte_SetTransMatrix( r0 ) __asm__ volatile ( \
"lw $t0, 20( %0 );" \
"lw $t1, 24( %0 );" \
@@ -1044,11 +1138,39 @@
/* GTE operation macros */
+/**
+ * @brief Rotate, Translate and Perspective Single (15 cycles)
+ *
+ * @details Performs rotation, translation and perspective calculation of a
+ * single vertex. Divide overflows are simply saturated allowing for crude Z
+ * clipping. Check C2_FLAG to determine which overflow error has occurred
+ * during calculation.
+ *
+ * The following equation is performed when executing this GTE command:
+ *
+ * IR1 = MAC1 = (TRX*4096 + R11*VX0 + R12*VY0 + R13*VZ0) / 4096
+ * IR2 = MAC2 = (TRY*4096 + R21*VX0 + R22*VY0 + R23*VZ0) / 4096
+ * IR3 = MAC3 = (TRZ*4096 + R31*VX0 + R32*VY0 + R33*VZ0) / 4096
+ * SZ3 = MAC3
+ *
+ * MAC0 = (((H*131072/SZ3)+1)/2) * IR1 + OFX, SX2 = MAC0 / 65536
+ * MAC0 = (((H*131072/SZ3)+1)/2) * IR2 + OFY, SY2 = MAC0 / 65536
+ * MAC0 = (((H*131072/SZ3)+1)/2) * DQA + DQB, IR0 = MAC0 / 4096
+ */
#define gte_rtps() __asm__ volatile ( \
"nop;" \
"nop;" \
"cop2 0x0180001;" )
+/**
+ * @brief Rotate, Translate and Perspective Triple (23 cycles)
+ *
+ * @details Performs rotation, translation and perspective calculation of three
+ * vertices at once. The equation performed is the same as gte_rtps() only
+ * repeated three times for each vertex. The result of the first vertex is
+ * stored in GTE data register C2_SXY0, the second vector in C2_SXY1 then
+ * C2_SXY2.
+ */
#define gte_rtpt() __asm__ volatile ( \
"nop;" \
"nop;" \
@@ -1325,16 +1447,53 @@
"nop;" \
"cop2 0x0138041C;" )
+/**
+ * @brief Normal clipping (8 cycles)
+ *
+ * @details Computes the sign of three screen coordinates (C2_SXY0-3) used for
+ * backface culling. If the value of C2_MAC0 is negative, the coordinates are
+ * inverted and thus the triangle is back facing.
+ *
+ * The following equation is performed when executing this GTE command:
+ *
+ * MAC0 = SX0*SY1 + SX1*SY2 + SX2*SY0 - SX0*SY2 - SX1*SY0 - SX2*SY1
+ */
#define gte_nclip() __asm__ volatile ( \
"nop;" \
"nop;" \
"cop2 0x01400006;" )
+/**
+ * @brief Average screen Z result (5 cycles)
+ *
+ * @details Averages the values of GTE registers C2_SZ1, C2_SZ2 and C2_SZ3,
+ * multiplies it by C2_ZSF3 and divides the result by 0x1000 before storing to
+ * C2_OTZ. Used to compute the ordering table depth level for a three-vertex
+ * primitive.
+ *
+ * The following equation is performed when executing this GTE command:
+ *
+ * MAC0 = ZSF3 * (SZ1+SZ2+SZ3)
+ * OTZ = MAC0 / 4096
+ */
#define gte_avsz3() __asm__ volatile ( \
"nop;" \
"nop;" \
"cop2 0x0158002D;" )
+/**
+ * @brief Average screen Z result (6 cycles)
+ *
+ * @details Averages the values of GTE registers C2_SZ1, C2_SZ2, C2_SZ3 and
+ * C2_SZ4, multiplies it by C2_ZSF4 and divides the result by 0x1000 before
+ * storing to C2_OTZ. Used to compute the ordering table depth level for a
+ * four-vertex primitive.
+ *
+ * The following equation is performed when executing this GTE command:
+ *
+ * MAC0 = ZSF4 * (SZ1+SZ2+SZ3+SZ4)
+ * OTZ = MAC0 / 4096
+ */
#define gte_avsz4() __asm__ volatile ( \
"nop;" \
"nop;" \
diff --git a/libpsn00b/include/psxcd.h b/libpsn00b/include/psxcd.h
index 0460f20..8150703 100644
--- a/libpsn00b/include/psxcd.h
+++ b/libpsn00b/include/psxcd.h
@@ -3,174 +3,792 @@
* (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
*/
+/**
+ * @file psxcd.h
+ * @brief CD-ROM library header
+ *
+ * @details The PSn00bSDK CD-ROM library provides facilities for using the
+ * CD-ROM hardware of the PS1. Unlike the CD-ROM library of the official SDK,
+ * psxcd is immune to the 30 file and directory limit and is capable of parsing
+ * directories containing as many files as the ISO9660 file system can support,
+ * unless the records are too large to be loaded into memory. However, to
+ * maintain compatibility with the PS1 BIOS, the root directory must not exceed
+ * the 30 file limit and the entire disc should contain no more than 45
+ * directories total.
+ *
+ * Whilst psxcd is not constrained by the 30 file per directory limit, it does
+ * not support Joliet CD-ROM extensions to support long file names. However, a
+ * library extension is considered for future development.
+ */
+
#ifndef __PSXCD_H
#define __PSXCD_H
#include <stdint.h>
-/*
- * CD-ROM control commands
- */
-#define CdlNop 0x01 /* a.k.a. Getstat */
-#define CdlSetloc 0x02
-#define CdlPlay 0x03
-#define CdlForward 0x04
-#define CdlBackward 0x05
-#define CdlReadN 0x06
-#define CdlStandby 0x07 /* a.k.a. MotorOn */
-#define CdlStop 0x08
-#define CdlPause 0x09
-#define CdlInit 0x0A
-#define CdlMute 0x0B
-#define CdlDemute 0x0C
-#define CdlSetfilter 0x0D
-#define CdlSetmode 0x0E
-#define CdlGetparam 0x0F
-#define CdlGetlocL 0x10
-#define CdlGetlocP 0x11
-#define CdlSetsession 0x12 /* ORIGINAL CODE */
-#define CdlGetTN 0x13
-#define CdlGetTD 0x14
-#define CdlSeekL 0x15
-#define CdlSeekP 0x16
-#define CdlTest 0x19 /* ORIGINAL CODE */
-#define CdlReadS 0x1B
+/* Enum definitions */
-/*
- * CD-ROM status bits
- */
-#define CdlStatError 0x01
-#define CdlStatStandby 0x02
-#define CdlStatSeekError 0x04
-#define CdlStatIdError 0x08 /* ORIGINAL CODE */
-#define CdlStatShellOpen 0x10
-#define CdlStatRead 0x20
-#define CdlStatSeek 0x40
-#define CdlStatPlay 0x80
+typedef enum _CdlCommand {
+ CdlNop = 0x01,
+ CdlSetloc = 0x02,
+ CdlPlay = 0x03,
+ CdlForward = 0x04,
+ CdlBackward = 0x05,
+ CdlReadN = 0x06,
+ CdlStandby = 0x07,
+ CdlStop = 0x08,
+ CdlPause = 0x09,
+ CdlInit = 0x0a,
+ CdlMute = 0x0b,
+ CdlDemute = 0x0c,
+ CdlSetfilter = 0x0d,
+ CdlSetmode = 0x0e,
+ CdlGetparam = 0x0f,
+ CdlGetlocL = 0x10,
+ CdlGetlocP = 0x11,
+ CdlSetsession = 0x12,
+ CdlGetTN = 0x13,
+ CdlGetTD = 0x14,
+ CdlSeekL = 0x15,
+ CdlSeekP = 0x16,
+ CdlTest = 0x19,
+ CdlGetID = 0x1a,
+ CdlReadS = 0x1b,
+ CdlReset = 0x1c
+} CdlCommand;
-/*
- * CD-ROM mode bits
- */
-#define CdlModeDA 0x01
-#define CdlModeAP 0x02
-#define CdlModeRept 0x04
-#define CdlModeSF 0x08
-//#define CdlModeSize0 0x10
-//#define CdlModeSize1 0x20
-#define CdlModeIgnore 0x10
-#define CdlModeSize 0x20
-#define CdlModeRT 0x40
-#define CdlModeSpeed 0x80
+typedef enum _CdlStatFlag {
+ CdlStatError = 1 << 0,
+ CdlStatStandby = 1 << 1,
+ CdlStatSeekError = 1 << 2,
+ CdlStatIdError = 1 << 3,
+ CdlStatShellOpen = 1 << 4,
+ CdlStatRead = 1 << 5,
+ CdlStatSeek = 1 << 6,
+ CdlStatPlay = 1 << 7
+} CdlStatFlag;
-/*
- * CD-ROM interrupt result values
+typedef enum _CdlModeFlag {
+ CdlModeDA = 1 << 0,
+ CdlModeAP = 1 << 1,
+ CdlModeRept = 1 << 2,
+ CdlModeSF = 1 << 3,
+ //CdlModeSize0 = 1 << 4,
+ //CdlModeSize1 = 1 << 5,
+ CdlModeIgnore = 1 << 4,
+ CdlModeSize = 1 << 5,
+ CdlModeRT = 1 << 6,
+ CdlModeSpeed = 1 << 7
+} CdlModeFlag;
+
+typedef enum _CdlIntrResult {
+ CdlNoIntr = 0,
+ CdlDataReady = 1,
+ CdlComplete = 2,
+ CdlAcknowledge = 3,
+ CdlDataEnd = 4,
+ CdlDiskError = 5
+} CdlIntrResult;
+
+typedef enum _CdlIsoError {
+ CdlIsoOkay = 0,
+ CdlIsoSeekError = 1,
+ CdlIsoReadError = 2,
+ CdlIsoInvalidFs = 3,
+ CdlIsoLidOpen = 4
+} CdlIsoError;
+
+/**
+ * @brief Translates a BCD format value to decimal
+ *
+ * @details Translates a specified value in BCD format (ie. 32/0x20 = 20) into
+ * a decimal integer, as the CD-ROM controller returns integer values only in
+ * BCD format.
*/
-#define CdlNoIntr 0x00
-#define CdlDataReady 0x01
-#define CdlComplete 0x02
-#define CdlAcknowledge 0x03
-#define CdlDataEnd 0x04
-#define CdlDiskError 0x05
+#define btoi(b) ((b)/16*10+(b)%16)
-/*
- * CD-ROM file system error codes (original)
+/**
+ * @brief Translates a decimal value to BCD
+ *
+ * @details Translates a decimal integer into a BCD format value (ie.
+ * 20 = 32/0x20), as the CD-ROM controller only accepts values in BCD format.
*/
-#define CdlIsoOkay 0x00
-#define CdlIsoSeekError 0x01
-#define CdlIsoReadError 0x02
-#define CdlIsoInvalidFs 0x03
-#define CdlIsoLidOpen 0x04
+#define itob(i) ((i)/10*16+(i)%10)
-#define btoi(b) ((b)/16*10+(b)%16) /* Convert BCD value to integer */
-#define itob(i) ((i)/10*16+(i)%10) /* Convert integer to BCD value */
+/* Structure and type definitions */
-/*
- * CD-ROM disc location struct
- */
-typedef struct _CdlLOC
-{
- uint8_t minute;
- uint8_t second;
- uint8_t sector;
- uint8_t track;
+/**
+ * @brief CD-ROM positional coordinates
+ *
+ * @details This structure is used to specify CD-ROM positional coordinates for
+ * CdlSetloc, CdlReadN and CdlReadS CD-ROM commands. Use CdIntToPos() to set
+ * parameters from a logical sector number.
+ *
+ * @see CdIntToPos(), CdControl()
+ */
+typedef struct _CdlLOC {
+ uint8_t minute; // Minutes (BCD)
+ uint8_t second; // Seconds (BCD)
+ uint8_t sector; // Sector or frame (BCD)
+ uint8_t track; // Track number (not used)
} CdlLOC;
-/*
- * CD-ROM audio attenuation struct (volume)
- */
-typedef struct _CdlATV
-{
- uint8_t val0; /* L -> SPU L */
- uint8_t val1; /* L -> SPU R */
- uint8_t val2; /* R -> SPU R */
- uint8_t val3; /* R -> SPU L */
+/**
+ * @brief CD-ROM attenuation parameters
+ *
+ * @details This structure specifies parameters for the CD-ROM attenuation.
+ * Values must be of range 0 to 127.
+ *
+ * The CD-ROM attenuation can be used to set the CD-ROM audio output to mono
+ * (0x40, 0x40, 0x40, 0x40) or reversed stereo (0x00, 0x80, 0x00, 0x80). It can
+ * also be used to play one of two stereo channels to both speakers.
+ *
+ * The CD-ROM attenuation affects CD-DA and CD-XA audio.
+ *
+ * @see CdMix()
+ */
+typedef struct _CdlATV {
+ uint8_t val0; // CD to SPU L-to-L volume
+ uint8_t val1; // CD to SPU L-to-R volume
+ uint8_t val2; // CD to SPU R-to-R volume
+ uint8_t val3; // CD to SPU R-to-L volume
} CdlATV;
-/*
- * CD-ROM file information struct
+/**
+ * @brief File entry structure
+ *
+ * @details Used to store basic information of a file such as logical block
+ * location and size. Currently, CdSearchFile() is the only function that uses
+ * this struct but it will be used in directory listing functions that may be
+ * implemented in the future.
+ *
+ * @see CdSearchFile()
*/
-typedef struct _CdlFILE
-{
- CdlLOC pos;
- uint32_t size;
- char name[16];
+typedef struct _CdlFILE {
+ CdlLOC pos; // CD-ROM position coordinates of file
+ uint32_t size; // Size of file in bytes
+ char name[16]; // File name
} CdlFILE;
-typedef struct _CdlFILTER
-{
- uint8_t file;
- uint8_t chan;
- uint16_t pad;
+/**
+ * @brief Structure used to set CD-ROM XA filter
+ *
+ * @details This structure is used to specify stream filter parameters for
+ * CD-ROM XA audio streaming using the CdlSetfilter command. This only affects
+ * CD-ROM XA audio streaming.
+ *
+ * CD-ROM XA audio is normally comprised of up to 8 or more ADPCM compressed
+ * audio streams interleaved into one continuous stream of data. The data
+ * stream is normally read at 2x speed but only one of eight XA audio streams
+ * can be played at a time. The XA stream to play is specified by the
+ * CdlSetfilter command and this struct.
+ *
+ * The CD-ROM XA filter can be changed during CD-ROM XA audio playback with
+ * zero audio interruption. This can be used to achieve dynamic music effects
+ * by switching to alternate versions of a theme to fit specific scenes
+ * seamlessly.
+ *
+ * @see CdControl()
+ */
+typedef struct _CdlFILTER {
+ uint8_t file; // File number to fetch (usually 1)
+ uint8_t chan; // Channel number (0 through 7)
+ uint16_t pad; // Padding
} CdlFILTER;
-/* Directory query context */
-typedef void* CdlDIR;
+/**
+ * @brief CD-ROM directory query context handle
+ *
+ * @details Used to store a directory context created by CdOpenDir(). An open
+ * context can then be used with CdReadDir() and closed with CdCloseDir().
+ *
+ * @see CdOpenDir()
+ */
+typedef void *CdlDIR;
-/* Data callback */
typedef void (*CdlCB)(int, uint8_t *);
+/* Public API */
+
#ifdef __cplusplus
extern "C" {
#endif
+/**
+ * @brief Initializes the CD-ROM library
+ *
+ * @details Initializes the CD-ROM subsystem which includes hooking the
+ * required IRQ handler, sets up internal variables of the CD-ROM library and
+ * attempts to initialize the CD-ROM controller. The mode parameter does
+ * nothing but may be used in future updates of this library.
+ *
+ * This function must be called after ResetGraph and before any other CD-ROM
+ * library function that interfaces with the CD-ROM controller. This function
+ * may not be called twice as it may cause instability or would just crash.
+ *
+ * @return Always 1. May change in the future.
+ */
int CdInit(void);
+/**
+ * @brief Translates a logical sector number to CD-ROM positional coordinates
+ *
+ * @details This function translates the logical sector number from i to CD-ROM
+ * positional coordinates stored to a CdlLOC struct specified by p. The
+ * translation takes the lead-in offset into account so the first logical
+ * sector begins at 0 and the result will be offset by 150 sectors.
+ *
+ * @param i Logical sector number
+ * @param p Pointer to a CdlLOC structure
+ * @return Pointer to the specified CdlLOC struct plus 150 sectors.
+ */
CdlLOC* CdIntToPos(int i, CdlLOC *p);
-int CdPosToInt(CdlLOC *p);
+
+/**
+ * @brief Translates CD-ROM positional coordinates to a logical sector number
+ *
+ * @details Translates the CD-ROM position parameters from a CdlLOC struct
+ * specified by p to a logical sector number. The translation takes the lead-in
+ * offset of 150 sectors into account so the logical sector number returned
+ * would begin at zero.
+ *
+ * @param p Pointer to a CdlLOC struct
+ * @return Logical sector number minus the 150 sector lead-in.
+ */
+int CdPosToInt(const CdlLOC *p);
+
+/**
+ * @brief Gets CD-ROM TOC information
+ *
+ * @details Retrieves the track entries from a CD's table of contents (TOC). The
+ * function can return up to 99 track entries, which is the maximum number of
+ * audio tracks the CD standard supports.
+ *
+ * This function only retrieve the minutes and seconds of an audio track's
+ * position as the CD-ROM controller only returns the minutes and seconds of a
+ * track, which may result in the end of the previous track being played
+ * instead of the intended track to be played. This can be remedied by having a
+ * 2 second pregap on each audio track on your disc.
+ *
+ * @param toc Pointer to an array of CdlLOC entries
+ * @return Number of tracks on the disc, zero on error.
+ *
+ * @see CdControl()
+ */
int CdGetToc(CdlLOC *toc);
+/**
+ * @brief Issues a control command to the CD-ROM controller
+ *
+ * @details Sends a CD-ROM command specified by com to the CD-ROM controller,
+ * waits for an acknowledge interrupt (very fast) then returns. It will also
+ * issue parameters from param to the CD-ROM controller if the command accepts
+ * parameters. Response data from the CD-ROM controller is stored to result on
+ * commands that produce response data.
+ *
+ * Because this function waits for an acknowledge interrupt from the CD-ROM
+ * controller, this function should not be used in a callback. Instead, use
+ * CdControlF().
+ *
+ * Commands that are blocking require the use of CdSync() to wait for the
+ * command to fully complete.
+ *
+ * CD-ROM control commands:
+ *
+ * | Command | Value | Parameter | Blocking | Description |
+ * | :------------ | ----: | :-------- | :------- | :----------------------------------------------------------------------------------------------------------------- |
+ * | CdlNop | 0x01 | | No | Also known as Getstat. Normally used to query the CD-ROM status, which is retrieved using CdStatus(). |
+ * | CdlSetloc | 0x02 | CdlLOC | No | Sets the seek target location, but does not seek. Actual seeking begins upon issuing a seek or read command. |
+ * | CdlPlay | 0x03 | uint8_t | No | Begins CD Audio playback. Parameter specifies an optional track number to play (some emulators do not support it). |
+ * | CdlForward | 0x04 | | No | Fast forward (CD Audio only), issue CdlPlay to stop fast forward. |
+ * | CdlBackward | 0x05 | | No | Rewind (CD Audio only), issue CdlPlay to stop rewind. |
+ * | CdlReadN | 0x06 | CdlLOC | No | Begin reading data sectors. Used in conjunction with CdReadCallback(). |
+ * | CdlStandby | 0x07 | | Yes | Also known as MotorOn, starts CD motor and remains idle. |
+ * | CdlStop | 0x08 | | Yes | Stops playback and the disc itself. |
+ * | CdlPause | 0x09 | | Yes | Stops playback or data reading, but leaves the disc on standby. |
+ * | CdlInit | 0x0A | | Yes | Initialize the CD-ROM controller. |
+ * | CdlMute | 0x0B | | No | Mutes CD audio (both DA and XA). |
+ * | CdlDemute | 0x0C | | No | Unmutes CD audio (both DA and XA). |
+ * | CdlSetfilter | 0x0D | CdlFILTER | No | Set XA audio filter. |
+ * | CdlSetmode | 0x0E | uint8_t | No | Set CD-ROM mode. |
+ * | CdlGetparam | 0x0F | | No | Returns current CD-ROM mode and file/channel filter settings. |
+ * | CdlGetlocL | 0x10 | | No | Returns current logical CD position, mode and XA filter parameters. |
+ * | CdlGetlocP | 0x11 | | No | Returns current physical CD position (using SubQ location data). |
+ * | CdlSetsession | 0x12 | uint8_t | Yes | Seek to specified session on a multi-session disc. |
+ * | CdlGetTN | 0x13 | | No | Get CD-ROM track count. |
+ * | CdlGetTD | 0x14 | uint8_t | No | Get specified track position. |
+ * | CdlSeekL | 0x15 | | Yes | Logical seek to target position, set by last CdlSetloc command. |
+ * | CdlSeekP | 0x16 | | Yes | Physical seek to target position, set by last CdlSetloc command. |
+ * | CdlTest | 0x19 | (varies) | Yes | Special test command not disclosed to official developers (see nocash documents for more info). |
+ * | CdlReadS | 0x1B | CdlLOC | No | Begin reading data sectors without pausing for error correction. |
+ *
+ * CD-ROM return values:
+ *
+ * | Command | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
+ * | :---------- | :---- | :---- | :----- | :--- | :------ | :------ | :--- | :----- |
+ * | CdlGetparam | stat | mode | 0 | file | channel | | | |
+ * | CdlGetlocL | amin | asec | aframe | mode | file | channel | sm | ci |
+ * | CdlGetlocP | track | index | min | sec | frame | amin | asec | aframe |
+ * | CdlGetTN | stat | first | last | | | | | |
+ * | CdlGetTD | stat | min | sec | | | | | |
+ *
+ * NOTE: Values are in BCD format.
+ *
+ * @param com Command value
+ * @param param Command parameters
+ * @param result Pointer of buffer to store result
+ * @return 1 if the command was issued successfully. Otherwise 0 if a
+ * previously issued command has not yet finished processing.
+ *
+ * @see CdSync(), CdControlF()
+ */
int CdControl(uint8_t com, const void *param, uint8_t *result);
+
+/**
+ * @brief Issues a CD-ROM command to the CD-ROM controller (blocking)
+ *
+ * @details This function works just like CdControl(), but blocks on blocking
+ * commands until said blocking command has completed.
+ *
+ * Because this function waits for an acknowledge interrupt from the CD-ROM
+ * controller, this function should not be used in a callback. Use CdControlF()
+ * instead.
+ *
+ * @param com Command value
+ * @param param Command parameters
+ * @param result Pointer of buffer to store result
+ * @return 1 if the command was issued successfully. Otherwise 0 if a
+ * previously issued command has not yet finished processing.
+ *
+ * @see CdControl(), CdControlF()
+ */
int CdControlB(uint8_t com, const void *param, uint8_t *result);
+
+/**
+ * @brief Issues a CD-ROM command to the CD-ROM controller (does not block)
+ *
+ * @details This function works more or less the same as CdControl() but it
+ * does not block even for the acknowledge interrupt from the CD-ROM
+ * controller. Since this function is non-blocking it can be used in a callback
+ * function.
+ *
+ * When using this function in a callback, a maximum of two commands can be
+ * issued at once and only the first command can have parameters. This is
+ * because the CD-ROM controller can only queue up to two commands and the
+ * parameter FIFO is not cleared until the last command is acknowledged. But
+ * waiting for acknowledgment in a callback is not possible.
+ *
+ * @param com Command value
+ * @param param Command parameters
+ * @return 1 if the command was issued successfully. Otherwise 0 if a
+ * previously issued command has not yet finished processing.
+ *
+ * @see CdControl()
+ */
int CdControlF(uint8_t com, const void *param);
+
+/**
+ * @brief Waits for blocking command or blocking status
+ *
+ * @details If mode is zero the function blocks if a blocking command was
+ * issued earlier until the command has finished. If mode is non-zero the
+ * function returns a command status value.
+ *
+ * A buffer specified by result will be set with the most recent CD-ROM status
+ * value from the last command issued.
+ *
+ * @param mode Mode
+ * @param result Pointer to store most recent CD-ROM status
+ * @return Command status is returned as one of the following definitions:
+ *
+ * | Definition | Description |
+ * | :----------- | :-------------------------- |
+ * | CdlComplete | Command completed. |
+ * | CdlNoIntr | No interrupt, command busy. |
+ * | CdlDiskError | CD-ROM error occurred. |
+ *
+ * @see CdControl()
+ */
int CdSync(int mode, uint8_t *result);
+
+/**
+ * @brief Sets a callback function
+ *
+ * @details Sets a callback with the specified function func. The callback is
+ * executed whenever a blocking command has completed.
+ *
+ * status is the CD-ROM status from the command that has completed processing.
+ * *result corresponds to the *result parameter on CdControl()/CdControlB() and
+ * returns the pointer to the buffer last set with that function.
+ *
+ * @param func Callback function
+ * @return Pointer to last callback function set, or NULL if none was set.
+ *
+ * @see CdControl, CdControlB, CdSync
+ */
uint32_t CdSyncCallback(CdlCB func);
+/**
+ * @brief Sets a callback function
+ *
+ * @details Sets a callback with the specified function func. The callback is
+ * executed whenever there's an incoming data sector from the CD-ROM controller
+ * during CdlReadN or CdlReadS. The pending sector data can be retrieved using
+ * CdGetSector().
+ *
+ * status is the CD-ROM status code from the last CD command that has finished
+ * processing. *result corresponds to the result pointer that was passed by the
+ * last CdControl()/CdControlB() call.
+ *
+ * This callback cannot be used in conjunction with CdRead() because it also
+ * uses this callback hook for its own internal use. The previously set
+ * callback is restored after read completion however.
+ *
+ * @param func Callback function
+ * @return Pointer to last callback function set, or NULL if none was set.
+ *
+ * @see CdControl(), CdControlB(), CdGetSector()
+ */
int CdReadyCallback(CdlCB func);
+
+/**
+ * @brief Gets data from the CD-ROM sector buffered
+ *
+ * @details Reads sector data that is pending in the CD-ROM sector buffer and
+ * stores it to *madr. Uses DMA to transfer the sector data and blocks very
+ * briefly until said transfer completes.
+ *
+ * This function is intended to be called within a callback routine set using
+ * CdReadyCallback() to fetch read data sectors from the CD-ROM sector buffer.
+ *
+ * @param madr Pointer to memory buffer to store sector data
+ * @param size Number of 32-bit words to retrieve
+ * @return Always 1.
+ *
+ * @see CdReadyCallback()
+ */
int CdGetSector(void *madr, int size);
+
+/**
+ * @brief Gets data from the CD-ROM sector buffered (non-blocking)
+ *
+ * @details Reads sector data that is pending in the CD-ROM sector buffer and
+ * stores it to *madr. Uses DMA to transfer the sector data in the background
+ * while keeping the CPU running (one word is transferred every 16 CPU cycles).
+ * Note this is much slower than the blocking transfer performed by
+ * CdGetSector().
+ *
+ * This function is intended to be called within a callback routine set using
+ * CdReadyCallback() to fetch read data sectors from the CD-ROM sector buffer.
+ * Since the transfer is asynchronous, CdDataSync() should be used to wait
+ * until the whole sector has been read.
+ *
+ * @param madr Pointer to memory buffer to store sector data
+ * @param size Number of 32-bit words to retrieve
+ * @return Always 1.
+ *
+ * @see CdReadyCallback(), CdDataSync()
+ */
int CdGetSector2(void *madr, int size);
+
+/**
+ * @brief Waits for sector transfer to finish
+ *
+ * @details If mode is zero the function blocks until any sector DMA transfer
+ * initiated by calling CdGetSector2() has finished. If mode is non-zero the
+ * function returns a boolean value representing whether a transfer is
+ * currently in progress.
+ *
+ * @param mode Mode
+ * @return 0 if the transfer has finished, 1 if it is still in progress or -1
+ * in case of a timeout.
+ *
+ * @see CdGetSector2()
+ */
int CdDataSync(int mode);
+/**
+ * @brief Locates a file in the CD-ROM file system
+ *
+ * @details Searches a file specified by filename by path and name in the
+ * CD-ROM file system and returns information of the file if found. The file
+ * information acquired will be stored to loc.
+ *
+ * Directories can be separated with slashes (/) or backslashes (\), a leading
+ * slash or backslash is optional but paths must be absolute. File version
+ * identifier (;1) at the end of the file name is also optional. File and
+ * directory names are case insensitive.
+ *
+ * The ISO9660 file system routines of libpsxcd do not support long file names
+ * currently. Only MS-DOS style 8.3 file names are supported; extensions such
+ * as Joliet and Rock Ridge are ignored.
+ *
+ * Upon calling this function for the first time, the ISO descriptor of the
+ * disc is read and the whole path table is cached into memory. Next the
+ * directory descriptor of the particular directory specified is loaded and
+ * cached to locate the file specified. The directory descriptor is kept in
+ * memory as long as the consecutive files to be searched are stored in the
+ * same directory until a file in another directory is to be searched. On which
+ * the directory descriptor is unloaded and a new directory descriptor is read
+ * from the disc and cached. Therefore, locating files in the same directory is
+ * faster as the relevant directory descriptor is already in memory and no disc
+ * reads are issued.
+ *
+ * As of Revision 66 of PSn00bSDK, media change is detected by checking the
+ * CD-ROM lid open status bit and attempting to acknowledge it with a CdlNop
+ * command, to discriminate the status from an open lid or changed disc.
+ *
+ * @param loc Pointer to a CdlFILE struct to store file information
+ * @param filename Path and name of file to locate
+ * @return Pointer to the specified CdlFILE struct. Otherwise NULL is returned
+ * when the file is not found.
+ */
CdlFILE* CdSearchFile(CdlFILE *loc, const char *filename);
+/**
+ * @brief Reads sectors from the CD-ROM
+ *
+ * @details Reads a number sectors specified by sectors from the location set
+ * by the last CdlSetloc command, the read sectors are then stored to a buffer
+ * specified by buf. mode specifies the CD-ROM mode to use for the read
+ * operation.
+ *
+ * The size of the sector varies depending on the sector read mode specified by
+ * mode. For standard data sectors it is multiples of 2048 bytes. If
+ * CdlModeSize0 is specified the sector size is 2328 bytes which includes the
+ * whole sector minus sync, adress, mode and sub header bytes. CdlModeSize1
+ * makes the sector size 2340 which is the entire sector minus sync bytes.
+ * Ideally, CdlModeSpeed must be specified to read data sectors at double
+ * CD-ROM speed.
+ *
+ * This function blocks very briefly to issue the necessary commands to start
+ * CD-ROM reading. To determine if reading has completed use CdReadSync or
+ * CdReadCallback.
+ *
+ * @param sectors Number of sectors to read
+ * @param buf Pointer to buffer to store sectors read
+ * @param mode CD-ROM mode for reading
+ * @return Always returns 0 even on errors. This may change in future versions.
+ *
+ * @see CdReadSync(), CdReadCallback()
+ */
int CdRead(int sectors, uint32_t *buf, int mode);
+
+/**
+ * @brief Waits for CD-ROM read completion or returns read status
+ *
+ * @details This function works more or less like CdSync() but for CdRead(). If
+ * mode is zero the function blocks if CdRead() was issued earlier until
+ * reading has completed. If mode is non-zero the function completes
+ * immediately and returns number of sectors remaining.
+ *
+ * A buffer specified by result will be set with the most recent CD-ROM status
+ * value from the last read issued.
+ *
+ * @param mode Mode
+ * @param result Pointer to store most recent CD-ROM status
+ * @return Number of sectors remaining. If reading is completed, 0 is returned.
+ * On error, -1 is returned.
+ *
+ * @see CdRead()
+ */
int CdReadSync(int mode, uint8_t *result);
+
+/**
+ * @brief Sets a callback function for read completion
+ *
+ * @details Works much the same as CdSyncCallback() but for CdRead(). Sets a
+ * callback with the specified function func. The callback is executed whenever
+ * a read operation initiated by CdRead() has completed.
+ *
+ * status is the CD-ROM status from the command that has completed processing.
+ * *result points to a read result buffer.
+ *
+ * @param func Callback function
+ * @return Pointer to last callback function set, or NULL if none was set.
+ *
+ * @see CdRead()
+ */
uint32_t CdReadCallback(CdlCB func);
+/**
+ * @brief Gets the most recent CD-ROM status
+ *
+ * @details Returns the CD-ROM status since the last command issued. The status
+ * value is updated by most CD-ROM commands.
+ *
+ * To get the current CD-ROM status you can issue CdlNop commands at regular
+ * intervals to update the CD-ROM status this function returns.
+ *
+ * @return CD-ROM status from last comand issued.
+ *
+ * @see CdControl()
+ */
int CdStatus(void);
+
+/**
+ * @brief Gets the last CD-ROM mode
+ *
+ * @details Returns the CD-ROM mode last set when issuing a CdlSetmode command.
+ * The function returns instantly as it merely returns a value stored in an
+ * internal variable.
+ *
+ * Since the value is simply a copy of what was specified from the last
+ * CdlSetmode command, the mode value may become inaccurate if CdlInit or other
+ * commands that affect the CD-ROM mode have been issued previously.
+ *
+ * @return Last CD-ROM mode value.
+ */
int CdMode(void);
-int CdMix(CdlATV *vol);
+/**
+ * @brief Sets CD-ROM mixer or attenuation
+ *
+ * @details Sets the CD-ROM attenuation parameters from a CdlATV struct
+ * specified by vol. The CD-ROM attenuation settings are different from the SPU
+ * CD-ROM volume.
+ *
+ * Normally used to configure CD and XA audio playback for mono or reverse
+ * stereo output, though this was rarely used in practice.
+ *
+ * @param vol CD-ROM attenuation parameters
+ * @return Always 1.
+ */
+int CdMix(const CdlATV *vol);
-/* ORIGINAL CODE */
+/**
+ * @brief Opens a directory on the CD-ROM file system
+ *
+ * @details Opens a directory on the CD-ROM file system to read the contents of
+ * a directory.
+ *
+ * A path name can use a slash (/) or backslash character (\) as the directory
+ * name separator. The path must be absolute and should begin with a slash or
+ * backslash. It should also not be prefixed with a device name (ie.
+ * \MYDIR1\MYDIR2 will work but not cdrom:\MYDIR1\MYDIR2). The file system
+ * routines in libpsxcd can query directory paths of up to 128 characters.
+ *
+ * The ISO9660 file system routines of libpsxcd do not support long file names
+ * currently. Only MS-DOS style 8.3 file names are supported; extensions such
+ * as Joliet and Rock Ridge are ignored.
+ *
+ * @param path Directory path to open
+ * @return Pointer of a CdlDIR context, NULL if an error occurred.
+ *
+ * @see CdReadDir(), CdCloseDir()
+ */
CdlDIR* CdOpenDir(const char* path);
+
+/**
+ * @brief Reads a directory entry from an open directory context
+ *
+ * @details Retrieves a file entry from an open directory context and stores it
+ * to a CdlFILE struct specified by file. Repeated calls of this function
+ * retrieves the next directory entry available until there are no more
+ * directory entries that follow.
+ *
+ * @param dir Open directory context (from CdOpenDir())
+ * @param file Pointer to a CdlFILE struct
+ * @return 1 if there are proceeding directory entries that follow, otherwise 0.
+ *
+ * @see CdOpenDir()
+ */
int CdReadDir(CdlDIR* dir, CdlFILE* file);
+
+/**
+ * @brief Closes a directory context created by CdOpenDir()
+ *
+ * @details Closes a directory query context created by CdOpenDir(). Behavior
+ * is undefined when closing a previously closed directory context.
+ *
+ * @param dir Directory context
+ *
+ * @see CdOpenDir()
+ */
void CdCloseDir(CdlDIR* dir);
int CdGetVolumeLabel(char* label);
+/**
+ * @brief Sets a callback function for auto pause
+ *
+ * @details The callback function specified in *func is executed when an auto
+ * pause interrupt occurs when the current CD-ROM mode is set with CdlModeAP.
+ * Auto pause interrupt occurs when CD Audio playback reaches the end of the
+ * audio track. Specifying 0 disables the callback.
+ *
+ * This can be used to easily loop CD audio automatically without requiring any
+ * intervention in your software loop.
+ *
+ * @param func Callback function
+ * @return Pointer to the last callback function set. Zero if no callback was
+ * set previously.
+ *
+ * @see CdControl()
+ */
int* CdAutoPauseCallback(void(*func)());
-int CdIsoError();
+/**
+ * @brief Retrieves CD-ROM ISO9660 parser status
+ *
+ * @details Returns the status of the file system parser from the last call of
+ * a file system related function, such as CdSearchFile(), CdGetVolumeLabel()
+ * and CdOpenDir(). Use this function to retrieve the exact error occurred when
+ * either of those functions fail.
+ *
+ * @return CD-ROM ISO9660 parser error code, as listed below:
+ *
+ * | Value | Description |
+ * | :-------------- | :-------------------------------------------------------------------------------------------------- |
+ * | CdlIsoOkay | File system parser okay. |
+ * | CdlIsoSeekError | Logical seek error occurred. May occur when attempting to query the filesystem on an audio-only CD. |
+ * | CdlIsoReadError | Read error occurred while reading the CD-ROM file system descriptor. |
+ * | CdlIsoInvalidFs | Disc does not contain a standard ISO9660 file system. |
+ * | CdlIsoLidOpen | Lid is open when attempting to parse the CD-ROM file system. |
+ */
+int CdIsoError(void);
+
+/**
+ * @brief Locates and parses the specified disc session
+ *
+ * @details Loads a session specified by session on a multi-session disc. Uses
+ * CdlSetsession to seek to the specified disc session, then scans the
+ * following 512 sectors for an ISO volume descriptor. If a volume descriptor
+ * is found the file system of that session is parsed and files inside the new
+ * session can be accessed using regular CD-ROM file and directory querying
+ * functions (CdSearchFile(), CdOpenDir(), CdReadDir(), CdCloseDir()). No
+ * special consideration is required when reading files from a new session.
+ *
+ * Loading a session takes 5-10 seconds to complete depending on the distance
+ * between the beginning of the disc and the start of the specified session. If
+ * the session specified does not exist, the disc will stop and would take
+ * 15-20 seconds to restart. The function does not support loading the most
+ * recent session of a disc automatically due to limitations of the CD-ROM
+ * hardware, so the user must be prompted to specify which session to load and
+ * to keep a record of the number of sessions that have been written to the
+ * disc.
+ *
+ * This function can also be used to update the Table of Contents (TOC) and
+ * reparse the file system regardless of the media change status by simply
+ * loading the first session. This is most useful for accessing files or audio
+ * tracks on a disc that was inserted using the swap trick method (it is
+ * recommended to stop the disc using CdlStop then restart it with CdlStandby
+ * after a button prompt for convenience, if you wish to implement this
+ * capability). Seeking to sessions other than the first session does not work
+ * with the swap trick however, so a chipped or unlockable console is desired
+ * for reading multi-session discs.
+ *
+ * NOTE: When the lid has been opened, the current CD-ROM session is reset to
+ * the first session on the disc. The console may produce an audible click
+ * sound when executing this function. This is normal, and the click sound is
+ * no different to the click heard on disc spin-up in older models of the
+ * console.
+ *
+ * @param session Session number (1 = first session)
+ * @return 0 on success. On failure due to open lid, bad session number or no
+ * volume descriptor found in specified session, returns -1 and return value of
+ * CdIsoError() is updated.
+ */
int CdLoadSession(int session);
#ifdef __cplusplus
diff --git a/libpsn00b/include/psxetc.h b/libpsn00b/include/psxetc.h
index fcfec06..ae4611e 100644
--- a/libpsn00b/include/psxetc.h
+++ b/libpsn00b/include/psxetc.h
@@ -3,20 +3,45 @@
* (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
*/
+/**
+ * @file psxetc.h
+ * @brief Interrupt management library header
+ *
+ * @details This library provides basic facilities (such as interrupt handling)
+ * used by all other PSn00bSDK libraries, as well as some additional
+ * functionality including a dynamic linker (whose API is however defined in a
+ * separate header).
+ */
+
#ifndef __PSXETC_H
#define __PSXETC_H
-/* Macros */
+/* IRQ and DMA channel definitions */
-// This macro is used internally by PSn00bSDK to log debug messages to a buffer
-// which is then printed to stdout when calling VSync().
-#ifdef NDEBUG
-#define _sdk_log(...)
-#define _sdk_dump_log()
-#else
-#define _sdk_log(...) _sdk_log_inner(__VA_ARGS__)
-#define _sdk_dump_log() _sdk_dump_log_inner()
-#endif
+typedef enum _IRQ_Channel {
+ IRQ_VBLANK = 0,
+ IRQ_GPU = 1,
+ IRQ_CD = 2,
+ IRQ_DMA = 3,
+ IRQ_TIMER0 = 4,
+ IRQ_TIMER1 = 5,
+ IRQ_TIMER2 = 6,
+ IRQ_SIO0 = 7,
+ IRQ_SIO1 = 8,
+ IRQ_SPU = 9,
+ IRQ_GUN = 10,
+ IRQ_PIO = 10
+} IRQ_Channel;
+
+typedef enum _DMA_Channel {
+ DMA_MDEC_IN = 0,
+ DMA_MDEC_OUT = 1,
+ DMA_GPU = 2,
+ DMA_CD = 3,
+ DMA_SPU = 4,
+ DMA_PIO = 5,
+ DMA_OTC = 6
+} DMA_Channel;
/* Public API */
@@ -24,16 +49,155 @@
extern "C" {
#endif
-void _sdk_log_inner(const char *fmt, ...);
-void _sdk_dump_log_inner(void);
+/**
+ * @brief Sets a callback for an interrupt.
+ *
+ * @details Registers a function to be called whenever the specified interrupt
+ * is fired. A previously registered callback can be removed by passing a null
+ * pointer instead. The IRQ controller is automatically configured to only
+ * enable interrupts for which a callback is registered.
+ *
+ * The callback will run in the exception handler's context, so it should be as
+ * fast as possible and shall not call any function that relies on interrupts
+ * being enabled. Each interrupt is acknowledged automatically before the
+ * callback is invoked.
+ *
+ * The following interrupt channels are available (the ones already used
+ * internally by libraries shall not be overridden to avoid conflicts):
+ *
+ * | ID | Channel | Used by |
+ * | --: | :--------------- | :-------------------------------------- |
+ * | 0 | IRQ_VBLANK | psxgpu (use VSyncCallback() instead) |
+ * | 1 | IRQ_GPU | |
+ * | 2 | IRQ_CD | psxcd (use CdReadyCallback() instead) |
+ * | 3 | IRQ_DMA | psxetc (use DMACallback() instead) |
+ * | 4 | IRQ_TIMER0 | |
+ * | 5 | IRQ_TIMER1 | |
+ * | 6 | IRQ_TIMER2 | |
+ * | 7 | IRQ_SIO0 | |
+ * | 8 | IRQ_SIO1 | psxsio (use SIO_ReadCallback() instead) |
+ * | 9 | IRQ_SPU | |
+ * | 10 | IRQ_GUN, IRQ_PIO | |
+ *
+ * WARNING: even though interrupts are acknowledged automatically at the IRQ
+ * controller side, most IRQ channels (1, 2, 3, 7, 8, 9) additionally require
+ * acknowledging at the device side, which must be done by the callback. The
+ * exact way to acknowledge interrupts varies for each device, however it
+ * usually involves setting or clearing a bit in a register. See the nocash
+ * documentation for more details.
+ *
+ * @param irq
+ * @param func
+ * @return Previously set callback for the channel or NULL
+ */
+void *InterruptCallback(IRQ_Channel irq, void (*func)(void));
+
+/**
+ * @brief Gets the callback for an interrupt.
+ *
+ * @details Returns a pointer to the callback currently registered to handle
+ * the specified interrupt, or a null pointer if none is set.
+ *
+ * @param irq
+ * @return Currently set callback for the channel or NULL
+ *
+ * @see InterruptCallback()
+ */
+void *GetInterruptCallback(IRQ_Channel irq);
-void *InterruptCallback(int irq, void (*func)(void));
-void *GetInterruptCallback(int irq);
-void *DMACallback(int dma, void (*func)(void));
-void *GetDMACallback(int dma);
+/**
+ * @brief Sets a callback for a DMA interrupt.
+ *
+ * @details Registers a function to be called whenever the specified DMA
+ * channel goes from busy to idle, i.e. when a transfer is completed. A
+ * previously registered callback can be removed by passing a null pointer
+ * instead. The DMA controller is automatically configured to only enable DMA
+ * interrupts for which a callback is registered.
+ *
+ * This function uses InterruptCallback() to register a "master handler" for
+ * DMA interrupts, which then dispatches the IRQ to depending on the channel
+ * that triggered it.
+ *
+ * The callback will run in the exception handler's context, so it should be as
+ * fast as possible and shall not call any function that relies on interrupts
+ * being enabled. Each interrupt is acknowledged automatically before the
+ * callback is invoked.
+ *
+ * The following DMA channels are available (the ones already used internally
+ * by libraries shall not be overridden to avoid conflicts):
+ *
+ * | ID | Channel | Used by |
+ * | --: | :----------- | :-------------------------------------- |
+ * | 0 | DMA_MDEC_IN | |
+ * | 1 | DMA_MDEC_OUT | |
+ * | 2 | DMA_GPU | psxgpu (use DrawSyncCallback() instead) |
+ * | 3 | DMA_CD | |
+ * | 4 | DMA_SPU | |
+ * | 5 | DMA_PIO | |
+ * | 6 | DMA_OTC | |
+ *
+ * @param dma
+ * @param func
+ * @return Previously set callback for the channel or NULL
+ */
+void *DMACallback(DMA_Channel dma, void (*func)(void));
+/**
+ * @brief Gets the callback for a DMA interrupt.
+ *
+ * @details Returns a pointer to the callback currently registered to handle
+ * the specified DMA interrupt, or a null pointer if none is set.
+ *
+ * @param dma
+ * @return Currently set callback for the channel or NULL
+ *
+ * @see DMACallback()
+ */
+void *GetDMACallback(DMA_Channel dma);
+
+/**
+ * @brief Initializes the interrupt dispatcher.
+ *
+ * @details Sets up the interrupt handling system, hooks the BIOS to dispatch
+ * interrupts to the library and clears all registered callbacks. This function
+ * must be called once at the beginning of the program, prior to registering
+ * any IRQ or DMA callbacks.
+ *
+ * ResetCallback() is called by psxgpu's ResetGraph(), so invoking it manually
+ * is usually not required. Calling ResetCallback() after ResetGraph() will
+ * actually result in improper initialization, as ResetGraph() registers
+ * several callbacks used internally by psxgpu.
+ *
+ * @return 0 or -1 if the was already initialized
+ */
int ResetCallback(void);
+
+/**
+ * @brief Restores the interrupt dispatcher.
+ *
+ * @details Restores the IRQ and DMA controller state saved by StopCallback()
+ * and reinstalls BIOS hooks for interrupt dispatching. All callbacks
+ * previously set before StopCallback() was called are preserved.
+ *
+ * @see StopCallback()
+ */
void RestartCallback(void);
+
+/**
+ * @brief Temporarily disables the interrupt dispatcher.
+ *
+ * @details Saves the state of the IRQ and DMA controllers, then disables them
+ * and removes BIOS hooks. This function must be called prior to launching a
+ * new executable or DLL that calls ResetCallback() or ResetGraph(), or an
+ * executable not built with PSn00bSDK that uses its own interrupt handling
+ * subsystem (such as a retail game). The saved state can be restored after the
+ * executable returns using RestartCallback().
+ *
+ * Note that interrupts are (obviously) disabled until RestartCallback() is
+ * called.
+ *
+ * @see RestartCallback()
+ */
void StopCallback(void);
#ifdef __cplusplus
diff --git a/libpsn00b/include/psxgpu.h b/libpsn00b/include/psxgpu.h
index f2568b0..68e3bff 100644
--- a/libpsn00b/include/psxgpu.h
+++ b/libpsn00b/include/psxgpu.h
@@ -31,65 +31,65 @@ typedef enum _GPU_VideoMode {
/* Structure macros */
#define setVector(v, _x, _y, _z) \
- (v)->vx = _x, (v)->vy = _y, (v)->vz = _z
+ (v)->vx = (_x), (v)->vy = (_y), (v)->vz = (_z)
#define setRECT(v, _x, _y, _w, _h) \
- (v)->x = _x, (v)->y = _y, (v)->w = _w, (v)->h = _h
+ (v)->x = (_x), (v)->y = (_y), (v)->w = (_w), (v)->h = (_h)
#define setTPage(p, tp, abr, x, y) ((p)->tpage = getTPage(tp, abr, x, y))
#define setClut(p, x, y) ((p)->clut = getClut(x, y))
-#define setRGB0(p, r, g, b) ((p)->r0 = r, (p)->g0 = g, (p)->b0 = b)
-#define setRGB1(p, r, g, b) ((p)->r1 = r, (p)->g1 = g, (p)->b1 = b)
-#define setRGB2(p, r, g, b) ((p)->r2 = r, (p)->g2 = g, (p)->b2 = b)
-#define setRGB3(p, r, g, b) ((p)->r3 = r, (p)->g3 = g, (p)->b3 = b)
+#define setRGB0(p, r, g, b) ((p)->r0 = (r), (p)->g0 = (g), (p)->b0 = (b))
+#define setRGB1(p, r, g, b) ((p)->r1 = (r), (p)->g1 = (g), (p)->b1 = (b))
+#define setRGB2(p, r, g, b) ((p)->r2 = (r), (p)->g2 = (g), (p)->b2 = (b))
+#define setRGB3(p, r, g, b) ((p)->r3 = (r), (p)->g3 = (g), (p)->b3 = (b))
#define setXY0(p, _x0, _y0) \
- (p)->x0 = _x0, (p)->y0 = _y0
+ (p)->x0 = (_x0), (p)->y0 = (_y0)
#define setXY2(p, _x0, _y0, _x1, _y1) \
- (p)->x0 = _x0, (p)->y0 = _y0, \
- (p)->x1 = _x1, (p)->y1 = _y1
+ (p)->x0 = (_x0), (p)->y0 = (_y0), \
+ (p)->x1 = (_x1), (p)->y1 = (_y1)
#define setXY3(p, _x0, _y0, _x1, _y1, _x2, _y2) \
- (p)->x0 = _x0, (p)->y0 = _y0, \
- (p)->x1 = _x1, (p)->y1 = _y1, \
- (p)->x2 = _x2, (p)->y2 = _y2
+ (p)->x0 = (_x0), (p)->y0 = (_y0), \
+ (p)->x1 = (_x1), (p)->y1 = (_y1), \
+ (p)->x2 = (_x2), (p)->y2 = (_y2)
#define setXY4(p, _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3) \
- (p)->x0 = _x0, (p)->y0 = _y0, \
- (p)->x1 = _x1, (p)->y1 = _y1, \
- (p)->x2 = _x2, (p)->y2 = _y2, \
- (p)->x3 = _x3, (p)->y3 = _y3
+ (p)->x0 = (_x0), (p)->y0 = (_y0), \
+ (p)->x1 = (_x1), (p)->y1 = (_y1), \
+ (p)->x2 = (_x2), (p)->y2 = (_y2), \
+ (p)->x3 = (_x3), (p)->y3 = (_y3)
#define setWH(p, _w, _h) \
- (p)->w = _w, (p)->h = _h
+ (p)->w = (_w), (p)->h = (_h)
#define setXYWH(p, _x0, _y0, _w, _h) \
- (p)->x0 = _x0, (p)->y0 = _y0, \
- (p)->x1 = (_x0 + (_w)), (p)->y1 = _y0, \
- (p)->x2 = _x0, (p)->y2 = (_y0 + (_h)), \
- (p)->x3 = (_x0 + (_w)), (p)->y3 = (_y0 + (_h))
+ (p)->x0 = (_x0), (p)->y0 = (_y0), \
+ (p)->x1 = ((_x0) + (_w)), (p)->y1 = (_y0), \
+ (p)->x2 = (_x0), (p)->y2 = ((_y0) + (_h)), \
+ (p)->x3 = ((_x0) + (_w)), (p)->y3 = ((_y0) + (_h))
#define setUV0(p, _u0, _v0) \
- (p)->u0 = _u0, (p)->v0 = _v0
+ (p)->u0 = (_u0), (p)->v0 = (_v0)
#define setUV3(p, _u0, _v0, _u1, _v1, _u2, _v2) \
- (p)->u0 = _u0, (p)->v0 = _v0, \
- (p)->u1 = _u1, (p)->v1 = _v1, \
- (p)->u2 = _u2, (p)->v2 = _v2
+ (p)->u0 = (_u0), (p)->v0 = (_v0), \
+ (p)->u1 = (_u1), (p)->v1 = (_v1), \
+ (p)->u2 = (_u2), (p)->v2 = (_v2)
#define setUV4(p, _u0, _v0, _u1, _v1, _u2, _v2, _u3, _v3) \
- (p)->u0 = _u0, (p)->v0 = _v0, \
- (p)->u1 = _u1, (p)->v1 = _v1, \
- (p)->u2 = _u2, (p)->v2 = _v2, \
- (p)->u3 = _u3, (p)->v3 = _v3
+ (p)->u0 = (_u0), (p)->v0 = (_v0), \
+ (p)->u1 = (_u1), (p)->v1 = (_v1), \
+ (p)->u2 = (_u2), (p)->v2 = (_v2), \
+ (p)->u3 = (_u3), (p)->v3 = (_v3)
#define setUVWH(p, _u0, _v0, _w, _h) \
- (p)->u0 = _u0, (p)->v0 = _v0, \
- (p)->u1 = (_u0 + (_w)), (p)->v1 = _v0, \
- (p)->u2 = _u0, (p)->v2 = (_v0 + (_h)), \
- (p)->u3 = (_u0 + (_w)), (p)->v3 = (_v0 + (_h))
+ (p)->u0 = (_u0), (p)->v0 = (_v0), \
+ (p)->u1 = ((_u0) + (_w)), (p)->v1 = (_v0), \
+ (p)->u2 = (_u0), (p)->v2 = ((_v0) + (_h)), \
+ (p)->u3 = ((_u0) + (_w)), (p)->v3 = ((_v0) + (_h))
/* Primitive handling macros */
@@ -113,8 +113,13 @@ typedef enum _GPU_VideoMode {
#define setShadeTex(p, tge) \
((tge) ? (getcode(p) |= 1) : (getcode(p) &= ~1))
-#define getTPage(tp, abr, x, y) \
- ((((x) & 0x3ff) >> 6) | (((y) >> 8) << 4) | (((abr) & 3) << 5) | (((tp) & 3) << 7))
+#define getTPage(tp, abr, x, y) ( \
+ (((x) / 64) & 15) | \
+ ((((y) / 256) & 1) << 4) | \
+ (((abr) & 3) << 5) | \
+ (((tp) & 3) << 7) | \
+ ((((y) / 512) & 1) << 11) \
+)
#define getClut(x, y) (((y) << 6) | (((x) >> 4) & 0x3f))
@@ -151,24 +156,42 @@ typedef enum _GPU_VideoMode {
#define setDrawTPage(p, dfe, dtd, tpage) \
setlen(p, 1), \
- (p)->code[0] = 0xe1000000 | tpage | (dfe << 10) | (dtd << 9)
+ (p)->code[0] = (0xe1000000 | \
+ (tpage) | \
+ ((dtd) << 9) | \
+ ((dfe) << 10) \
+ )
#define setDrawOffset(p, _x, _y) \
setlen(p, 1), \
- (p)->code[0] = 0xe5000000 | (_x & 0x3ff) | ((_y & 0x3ff) << 11)
+ (p)->code[0] = (0xe5000000 | \
+ ((_x) % 1024) | \
+ (((_y) % 1024) << 11) \
+ )
#define setDrawMask(p, sb, mt) \
setlen(p, 1), \
- (p)->code[0] = 0xe6000000 | sb | (mt << 1)
+ (p)->code[0] = (0xe6000000 | (sb) | ((mt) << 1))
#define setDrawArea(p, r) \
setlen(p, 2), \
- (p)->code[0] = 0xe3000000 | ((r)->x & 0x3ff) | (((r)->y & 0x1ff) << 10), \
- (p)->code[1] = 0xe4000000 | (((r)->x + (r)->w - 1) & 0x3ff) | ((((r)->y + (r)->h - 1) & 0x1ff) << 10)
+ (p)->code[0] = (0xe3000000 | \
+ ((r)->x % 1024) | \
+ (((r)->y % 1024) << 10) \
+ ), \
+ (p)->code[1] = (0xe4000000 | \
+ (((r)->x + (r)->w - 1) % 1024) | \
+ ((((r)->y + (r)->h - 1) % 1024) << 10) \
+ )
#define setTexWindow(p, r) \
setlen(p, 1), \
- (p)->code[0] = 0xe2000000 | ((r)->w & 0x1f) | (((r)->h & 0x1f) << 5) | (((r)->x & 0x1f) << 10) | (((r)->y & 0x1f) << 15)
+ (p)->code[0] = (0xe2000000 | \
+ ((r)->w % 32) | \
+ (((r)->h % 32) << 5) | \
+ (((r)->x % 32) << 10) | \
+ (((r)->y % 32) << 15) \
+ )
/* Primitive structure definitions */
diff --git a/libpsn00b/include/psxgte.h b/libpsn00b/include/psxgte.h
index ddc988d..3c1d5a4 100644
--- a/libpsn00b/include/psxgte.h
+++ b/libpsn00b/include/psxgte.h
@@ -3,6 +3,17 @@
* (C) 2019-2022 Lameguy64 - MPL licensed
*/
+/**
+ * @file psxgte.h
+ * @brief GTE library header
+ *
+ * @details The Geometry Transformation Engine, often referred to as the GTE,
+ * is most responsible for providing 3D capabilities to the PS1. This is
+ * effectively an all-integer math co-processor connected directly to the CPU,
+ * as it is accessed using COP2 and related MIPS instructions to access
+ * registers and issue commands to the GTE.
+ */
+
#ifndef __PSXGTE_H
#define __PSXGTE_H
@@ -35,50 +46,216 @@ typedef struct _DVECTOR {
/* Public API */
+#define csin(a) isin(a)
+#define ccos(a) icos(a)
+#define rsin(a) isin(a)
+#define rcos(a) icos(a)
+
#ifdef __cplusplus
extern "C" {
#endif
-void InitGeom(void);
-
-// Integer SIN/COS functions (4096 = 360 degrees)
-// Does not use tables!
+/**
+ * @brief Gets sine of angle (fixed-point, high precision version)
+ *
+ * @details Returns the sine of angle a.
+ *
+ * @param a Angle in fixed-point format (131072 = 360 degrees)
+ * @return Sine value in 20.12 fixed-point format (4096 = 1.0).
+ */
int isin(int a);
+
+/**
+ * @brief Gets cosine of angle (fixed-point, high precision version)
+ *
+ * @details Returns the cosine of angle a.
+ *
+ * @param a Angle in fixed-point format (131072 = 360 degrees)
+ * @return Cosine value in 20.12 fixed-point format (4096 = 1.0).
+ */
int icos(int a);
-// Higher precision integer sin/cos functions (131072 = 360 degrees)
-// Does not use tables!
+/**
+ * @brief Gets sine of angle (fixed-point)
+ *
+ * @details Returns the sine of angle a.
+ *
+ * @param a Angle in fixed-point format (131072 = 360 degrees)
+ * @return Sine value in 20.12 fixed-point format (4096 = 1.0).
+ */
int hisin(int a);
+
+/**
+ * @brief Gets cosine of angle (fixed-point)
+ *
+ * @details Returns the cosine of angle a.
+ *
+ * @param a Angle in fixed-point format (131072 = 360 degrees)
+ * @return Cosine value in 20.12 fixed-point format (4096 = 1.0).
+ */
int hicos(int a);
+/**
+ * @brief Initializes the GTE
+ *
+ * @details Resets, enables and initializes the GTE. Must be called prior to
+ * using any GTE function or macro.
+ */
+void InitGeom(void);
+
+/**
+ * @brief Gets square root (fixed-point)
+ *
+ * @details Returns the square root of value v.
+ *
+ * @param v Value in 20.12 fixed-point format (4096 = 1.0)
+ * @return Square root in 20.12 fixed-point format (4096 = 1.0).
+ */
+int SquareRoot12(int v);
+
+/**
+ * @brief Gets square root (integer)
+ *
+ * @details Returns the square root of value v.
+ *
+ * @param v Value in integer format
+ * @return Square root in integer format.
+ */
+int SquareRoot0(int v);
+
+/**
+ * @brief Pushes the current GTE matrix to the matrix stack
+ *
+ * @details Pushes the current GTE rotation matrix and translation vector to
+ * the internal matrix stack. Only one matrix stack level is currently
+ * supported.
+ */
void PushMatrix(void);
+
+/**
+ * @brief Pops the last matrix pushed into the matrix stack back to the GTE
+ *
+ * @details Pops the last inserted matrix in the internal matrix stack back to
+ * the GTE. Only one matrix stack level is currently supported.
+ */
void PopMatrix(void);
+/**
+ * @brief Defines the rotation matrix of a MATRIX
+ *
+ * @details Defines the rotation matrix of m from rotation coordinates of r.
+ * The matrix is computed as follows:
+ *
+ * [ 1 0 0 ] [ cy 0 sy] [ cz -sz 0 ]
+ * [ 0 cx -sx] * [ 0 1 0 ] * [ sz cz 0 ]
+ * [ 0 sx cx] [-sy 0 cy] [ 0 0 1 ]
+ *
+ * where:
+ *
+ * sx = sin(r.x) sy = sin(r.y) sz = sin(r.z)
+ * cx = cos(r.x) cy = cos(r.y) cz = cos(r.z)
+ *
+ * @param r Rotation vector (input)
+ * @param m Matrix (output)
+ * @return Pointer to m.
+ *
+ * @see TransMatrix(), CompMatrixLV()
+ */
MATRIX *RotMatrix(SVECTOR *r, MATRIX *m);
+
+/**
+ * @brief Defines the rotation matrix of a MATRIX (high precision version)
+ *
+ * @details Defines the rotation matrix of m from rotation coordinates of r.
+ * This function is a variant of RotMatrix() that uses hisin()/hicos() instead
+ * of isin()/icos().
+ *
+ * See RotMatrix() for more details.
+ *
+ * @param r Rotation vector (input)
+ * @param m Matrix (output)
+ * @return Pointer to m.
+ *
+ * @see RotMatrix()
+ */
MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m);
+/**
+ * @brief Defines the translation vector of a MATRIX
+ *
+ * @details Simply sets the translation vector of MATRIX m. To perform
+ * accumulative translation operations, see CompMatrixLV().
+ *
+ * @param m Translation vector (input)
+ * @param r Matrix (output)
+ * @return Pointer to m.
+ *
+ * @see RotMatrix(), CompMatrixLV()
+ */
MATRIX *TransMatrix(MATRIX *m, VECTOR *r);
+
MATRIX *ScaleMatrix(MATRIX *m, VECTOR *s);
MATRIX *ScaleMatrixL(MATRIX *m, VECTOR *s);
MATRIX *MulMatrix(MATRIX *m0, MATRIX *m1);
MATRIX *MulMatrix0(MATRIX *m0, MATRIX *m1, MATRIX *m2);
+/**
+ * @brief Composite coordinate matrix transform
+ *
+ * @details Performs vector multiply by matrix with vector addition from v0 to
+ * the translation vector of v1. Then, multiples the rotation matrix of v0 by
+ * the rotation matrix of v1. The result of both operations is then stored in
+ * v2. Replaces the current GTE rotation matrix and translation vector with v0.
+ *
+ * Often used to adjust the matrix (includes rotation and translation) of an
+ * object relative to a world matrix, so the object would render relative to
+ * the world matrix.
+ *
+ * @param v0 Input matrix A
+ * @param v1 Input matrix B
+ * @param v2 Output matrix
+ * @return Pointer to v2.
+ */
MATRIX *CompMatrixLV(MATRIX *v0, MATRIX *v1, MATRIX *v2);
+
+/**
+ * @brief Multiplies a vector by a matrix
+ *
+ * @details Multiplies vector v0 with matrix m, result is stored to v1.
+ * Replaces the current GTE rotation matrix and translation vector with m.
+ *
+ * Often used to calculate a translation vector in relation to the rotation
+ * matrix for first person or vector camera perspectives.
+ *
+ * @param m Input matrix
+ * @param v0 Input vector
+ * @param v1 Output vector
+ * @return Pointer to v1.
+ */
VECTOR *ApplyMatrixLV(MATRIX *m, VECTOR *v0, VECTOR *v1);
+/**
+ * @brief Normalizes a VECTOR into SVECTOR format
+ *
+ * Normalizes a 32-bit vector into a 16-bit vector in 4.12 fixed-point format
+ * (4096 = 1.0, 2048 = 0.5).
+ *
+ * @param v0 Input (raw) 32-bit vector
+ * @param v1 Output (normalized) 16-bit vector
+ */
void VectorNormalS(VECTOR *v0, SVECTOR *v1);
+/**
+ * @brief Calculates the square of a VECTOR
+ *
+ * @details Calculates the square of vector v0 and stores the result to v1.
+ *
+ * @param v0 Input vector
+ * @param v1 Output vector
+ */
void Square0(VECTOR *v0, VECTOR *v1);
-int SquareRoot12(int v);
-int SquareRoot0(int v);
-
-#define csin(a) isin(a)
-#define ccos(a) icos(a)
-#define rsin(a) isin(a)
-#define rcos(a) icos(a)
-
#ifdef __cplusplus
}
#endif
diff --git a/libpsn00b/include/psxpress.h b/libpsn00b/include/psxpress.h
index 2106a53..dc1d52c 100644
--- a/libpsn00b/include/psxpress.h
+++ b/libpsn00b/include/psxpress.h
@@ -3,6 +3,23 @@
* (C) 2022 spicyjpeg - MPL licensed
*/
+/**
+ * @file psxpress.h
+ * @brief MDEC library header
+ *
+ * @details This is a fully original reimplementation of the official SDK's
+ * "data compression" library. This library is made up of two parts, the MDEC
+ * API and functions to decompress Huffman-encoded bitstreams (.BS files, or
+ * frames in .STR files) into data to be fed to the MDEC. Two different
+ * implementations of the latter are provided, one using the GTE and scratchpad
+ * region and an older one using a large lookup table in main RAM.
+ *
+ * FMV playback is not part of this library per se, but can implemented using
+ * the APIs defined here alongside some code to stream data from the CD drive.
+ *
+ * Currently only version 1 and 2 .BS files are supported.
+ */
+
#ifndef __PSXPRESS_H
#define __PSXPRESS_H
@@ -70,7 +87,9 @@ extern "C" {
#endif
/**
- * @brief Resets the MDEC and aborts any MDEC DMA transfers. If mode = 0, the
+ * @brief Resets and optionally initializes the MDEC.
+ *
+ * @details Resets the MDEC and aborts any MDEC DMA transfers. If mode = 0, the
* default IDCT matrix and quantization tables are also loaded and the MDEC is
* put into color output mode, discarding any custom environment previously set
* with DecDCTPutEnv().
@@ -78,15 +97,19 @@ extern "C" {
* DecDCTReset(0) must be called at least once prior to using the MDEC.
*
* @param mode
+ *
+ * @see DecDCTPutEnv()
*/
void DecDCTReset(int mode);
/**
- * @brief Uploads the specified decoding environment's quantization tables and
- * IDCT matrix to the MDEC, or restores the default tables if a null pointer is
- * passed. Calling this function is normally not required as DecDCTReset(0)
- * initializes the MDEC with the default tables, but it may be useful for e.g.
- * decoding JPEG or a format with custom quantization tables.
+ * @brief Loads default or custom quantization and IDCT tables into the MDEC.
+ *
+ * @details Uploads the specified decoding environment's quantization tables
+ * and IDCT matrix to the MDEC, or restores the default tables if a null
+ * pointer is passed. Calling this function is normally not required as
+ * DecDCTReset(0) initializes the MDEC with the default tables, but it may be
+ * useful for e.g. decoding JPEG or a format with custom quantization tables.
*
* The second argument, not present in the official SDK, specifies whether the
* MDEC shall be put into color (0) or monochrome (1) output mode. In
@@ -103,12 +126,16 @@ void DecDCTReset(int mode);
void DecDCTPutEnv(const DECDCTENV *env, int mono);
/**
- * @brief Sets up the MDEC to start fetching and decoding the given buffer.
- * This function is meant to be used with buffers generated by DecDCTvlc(): the
- * first 32-bit word of the buffer is initially copied to the MDEC0 register,
- * then all subsequent data is read in 128-byte (32-word) chunks. The length of
- * the stream (in 32-bit units, minus the first word) is encoded by DecDCTvlc()
- * in the lower 16 bits of the first word.
+ * @brief Feeds the MDEC with a run-length code buffer from the specified
+ * location.
+ *
+ * @details Sets up the MDEC to start fetching and decoding the given buffer.
+ * This function is meant to be used with buffers generated by DecDCTvlc(),
+ * DecDCTvlc2() or their variants: the first 32-bit word of the buffer is
+ * initially copied to the MDEC0 register, then all subsequent data is read in
+ * 128-byte (32-word) chunks. The length of the stream (in 32-bit units, minus
+ * the first word) is encoded by DecDCTvlc() in the lower 16 bits of the first
+ * word.
*
* The mode argument optionally specifies the output color depth (0 for 16bpp,
* 1 for 24bpp) if not already set in the first word. Passing -1 will result in
@@ -117,11 +144,15 @@ void DecDCTPutEnv(const DECDCTENV *env, int mono);
*
* @param data
* @param mode DECDCT_MODE_* or -1
+ *
+ * @see DecDCTinRaw(), DecDCTinSync()
*/
void DecDCTin(const uint32_t *data, int mode);
/**
- * @brief Configures the MDEC to automatically fetch data (the input stream,
+ * @brief Feeds the MDEC with raw data from the specified location.
+ *
+ * @details Configures the MDEC to automatically fetch data (the input stream,
* IDCT matrix or quantization tables) in 128-byte (32-word) chunks from the
* specified address in main RAM. The transfer is stopped, and any callback
* registered with DMACallback(0) is fired, once a certain number of 32-bit
@@ -135,13 +166,17 @@ void DecDCTin(const uint32_t *data, int mode);
*
* @param data
* @param length Number of 32-bit words to read (must be multiple of 32)
+ *
+ * @see DecDCTin(), DecDCTinSync()
*/
void DecDCTinRaw(const uint32_t *data, size_t length);
/**
- * @brief Waits for the MDEC to finish decoding the input stream (if mode = 0)
- * or returns whether it is busy (if mode = 1). MDEC commands can be issued
- * only when the MDEC isn't busy.
+ * @brief Waits for an MDEC input transfer to finish or returns its status.
+ *
+ * @details Waits for the MDEC to finish decoding the input stream (if
+ * mode = 0) or returns whether it is busy (if mode = 1). MDEC commands can be
+ * issued only when the MDEC isn't busy.
*
* WARNING: DecDCTinSync(0) might time out and return -1 if the MDEC can't
* output decoded data, e.g. if the length passed DecDCTout() was too small and
@@ -155,7 +190,9 @@ void DecDCTinRaw(const uint32_t *data, size_t length);
int DecDCTinSync(int mode);
/**
- * @brief Configures the MDEC to automatically transfer decoded image data in
+ * @brief Writes image data decoded by the MDEC to the specified location.
+ *
+ * @details Configures the MDEC to automatically transfer decoded image data in
* 128-byte (32-word) chunks to the specified address in main RAM. MDEC
* operation is paused once a certain number of 32-bit words have been output
* and can be resumed by calling DecDCTout() again: the MDEC will continue
@@ -168,12 +205,16 @@ int DecDCTinSync(int mode);
*
* @param data
* @param length Number of 32-bit words to output (must be multiple of 32)
+ *
+ * @see DecDCToutSync()
*/
void DecDCTout(uint32_t *data, size_t length);
/**
- * @brief Waits until the transfer set up by DecDCTout() finishes (if mode = 0)
- * or returns whether it is still in progress (if mode = 1).
+ * @brief Waits for an MDEC output transfer to finish or returns its status.
+ *
+ * @details Waits until the transfer set up by DecDCTout() finishes (if
+ * mode = 0) or returns whether it is still in progress (if mode = 1).
*
* WARNING: DecDCToutSync(0) might time out and return -1 if the MDEC is unable
* to consume enough input data in order to produce the desired amount of data.
@@ -186,7 +227,9 @@ void DecDCTout(uint32_t *data, size_t length);
int DecDCToutSync(int mode);
/**
- * @brief Begins decompressing the contents of a .BS file (or of a single .STR
+ * @brief Decompresses or begins decompressing a .BS file into MDEC codes.
+ *
+ * @details Begins decompressing the contents of a .BS file (or of a single STR
* frame) into a buffer that can be passed to DecDCTin(). This function uses a
* small (<1 KB) lookup table combined with the GTE to accelerate the process;
* performance is roughly on par with DecDCTvlcStart2() if the lookup table
@@ -212,11 +255,15 @@ int DecDCToutSync(int mode);
* @param max_size Maximum number of 32-bit words to output
* @param bs
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcContinue(), DecDCTvlcCopyTable()
*/
int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs);
/**
- * @brief Resumes the decompression process started by DecDCTvlcStart(). The
+ * @brief Resumes or finishes decompressing a .BS file into MDEC codes.
+ *
+ * @details Resumes the decompression process started by DecDCTvlcStart(). The
* state of the decompressor is contained entirely in the VLC_Context structure
* so an arbitrary number of bitstreams can be decoded concurrently (although
* the limited CPU power makes it impractical to do so) by keeping a separate
@@ -236,14 +283,18 @@ int DecDCTvlcStart(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint3
* @param buf
* @param max_size Maximum number of 32-bit words to output
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcStart()
*/
int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size);
/**
- * A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for compatibility
- * with the official SDK. This function uses an internal context; additionally,
- * the maximum output buffer size is not passed as an argument but is instead
- * set by calling DecDCTvlcSize().
+ * @brief Decompresses a .BS file into MDEC codes.
+ *
+ * @details A wrapper around DecDCTvlcStart() and DecDCTvlcContinue() for
+ * compatibility with the official SDK. This function uses an internal context;
+ * additionally, the maximum output buffer size is not passed as an argument
+ * but is instead set by calling DecDCTvlcSize().
*
* This function behaves identically to DecDCTvlcContinue() if bs = 0 and
* DecDCTvlcStart() otherwise.
@@ -257,21 +308,34 @@ int DecDCTvlcContinue(VLC_Context *ctx, uint32_t *buf, size_t max_size);
* @param bs Pointer to bitstream data or 0 to resume decoding
* @param buf
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcSize(), DecDCTvlcCopyTable()
*/
int DecDCTvlc(const uint32_t *bs, uint32_t *buf);
/**
- * @brief Sets the maximum number of 32-bit words that a single call to
+ * @brief Sets the maximum amount of data to be decompressed.
+ *
+ * @details Sets the maximum number of 32-bit words that a single call to
* DecDCTvlc() will output. If size = 0, the entire frame will always be
* decoded in one shot.
*
+ * Note that DecDCTvlcStart() and DecDCTvlcContinue() do not use the value set
+ * by this function and instead expect the maximum size to be passed as an
+ * argument.
+ *
* @param size Maximum number of 32-bit words to output
- * @return Previously set value
+ * @return Previously set value
+ *
+ * @see DecDCTvlc()
*/
size_t DecDCTvlcSize(size_t size);
/**
- * @brief Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(),
+ * @brief Moves the lookup table used by the .BS decompressor to the scratchpad
+ * region.
+ *
+ * @details Copies the small (<1 KB) lookup table used by DecDCTvlcContinue(),
* DecDCTvlcStart() and DecDCTvlc() (a DECDCTTAB structure) to the specified
* address. A copy of this table is always present in main RAM, however this
* function can be used to copy it to the scratchpad region to boost
@@ -287,7 +351,10 @@ size_t DecDCTvlcSize(size_t size);
void DecDCTvlcCopyTable(DECDCTTAB *addr);
/**
- * @brief Begins decompressing the contents of a .BS file (or of a single .STR
+ * @brief Decompresses or begins decompressing a .BS file into MDEC codes
+ * (alternate implementation).
+ *
+ * @details Begins decompressing the contents of a .BS file (or of a single STR
* frame) into a buffer that can be passed to DecDCTin(). This function uses a
* large (34 KB) lookup table that must be loaded into main RAM beforehand by
* calling DecDCTvlcBuild(), but does not use the GTE nor the scratchpad.
@@ -311,11 +378,16 @@ void DecDCTvlcCopyTable(DECDCTTAB *addr);
* @param max_size Maximum number of 32-bit words to output
* @param bs
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcContinue2(), DecDCTvlcBuild()
*/
int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint32_t *bs);
/**
- * @brief Resumes the decompression process started by DecDCTvlcStart2(). The
+ * @brief Resumes or finishes decompressing a .BS file into MDEC codes
+ * (alternate implementation).
+ *
+ * @details Resumes the decompression process started by DecDCTvlcStart2(). The
* state of the decompressor is contained entirely in the VLC_Context structure
* so an arbitrary number of bitstreams can be decoded concurrently (although
* the limited CPU power makes it impractical to do so) by keeping a separate
@@ -333,11 +405,15 @@ int DecDCTvlcStart2(VLC_Context *ctx, uint32_t *buf, size_t max_size, const uint
* @param buf
* @param max_size Maximum number of 32-bit words to output
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcStart2()
*/
int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size);
/**
- * A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for
+ * @brief Decompresses a .BS file into MDEC codes (alternate implementation).
+ *
+ * @details A wrapper around DecDCTvlcStart2() and DecDCTvlcContinue2() for
* compatibility with the official SDK. This function uses an internal context;
* additionally, the maximum output buffer size is not passed as an argument
* but is instead set by calling DecDCTvlcSize2().
@@ -353,21 +429,35 @@ int DecDCTvlcContinue2(VLC_Context *ctx, uint32_t *buf, size_t max_size);
* @param buf
* @param table Pointer to decompressed table or 0 to use last table used
* @return 0, 1 if more data needs to be output or -1 in case of failure
+ *
+ * @see DecDCTvlcSize2(), DecDCTvlcBuild()
*/
int DecDCTvlc2(const uint32_t *bs, uint32_t *buf, DECDCTTAB2 *table);
/**
- * @brief Sets the maximum number of 32-bit words that a single call to
+ * @brief Sets the maximum amount of data to be decompressed (alternate
+ * implementation).
+ *
+ * @details Sets the maximum number of 32-bit words that a single call to
* DecDCTvlc2() will output. If size = 0, the entire frame will always be
* decoded in one shot.
*
+ * Note that DecDCTvlcStart2() and DecDCTvlcContinue2() do not use the value
+ * set by this function and instead expect the maximum size to be passed as an
+ * argument.
+ *
* @param size Maximum number of 32-bit words to output
- * @return Previously set value
+ * @return Previously set value
+ *
+ * @see DecDCTvlc2()
*/
size_t DecDCTvlcSize2(size_t size);
/**
- * @brief Generates the lookup table required by DecDCTvlcStart2(),
+ * @brief Generates the lookup table used by the alternate implementation of
+ * the .BS decompressor.
+ *
+ * @details Generates the lookup table required by DecDCTvlcStart2(),
* DecDCTvlcContinue2() and DecDCTvlc2() (a DECDCTTAB2 structure) into the
* specified buffer. Since the table is relatively large (34 KB), it is
* recommended to only generate it in a dynamically-allocated buffer when
diff --git a/libpsn00b/include/psxsio.h b/libpsn00b/include/psxsio.h
index 3f571d7..449e43a 100644
--- a/libpsn00b/include/psxsio.h
+++ b/libpsn00b/include/psxsio.h
@@ -1,64 +1,281 @@
+/*
+ * PSn00bSDK serial port library
+ * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ */
+
+/**
+ * @file psxsio.h
+ * @brief Serial port library header
+ *
+ * @details This library provides a custom API to access the PS1's serial port.
+ * Sending and receiving data is done fully asynchronously using a pair of
+ * 128-byte FIFOs kept in main RAM, with optional hardware flow control. More
+ * advanced use cases such as custom callbacks for each byte received are also
+ * supported.
+ *
+ * A BIOS TTY driver to redirect stdin/stdout (including BIOS messages as well
+ * as PSn00bSDK's own debug logging) to the serial port is also provided for
+ * debugging purposes.
+ */
+
#ifndef __PSXSIO_H
#define __PSXSIO_H
-#define SR_TXRDY 0x1
-#define SR_RXRDY 0x2
-#define SR_TXU 0x4
-#define SR_PERROR 0x8
-#define SR_OE 0x10
-#define SR_FE 0x20
-#define SR_DSR 0x80
-#define SR_CTS 0x100
-#define SR_IRQ 0x200
-
-#define SIO_TXRDY 0x1
-#define SIO_RXRDY 0x2
-#define SIO_TXU 0x4
-#define SIO_PERROR 0x8
-#define SIO_OE 0x10
-#define SIO_FE 0x20
-#define SIO_DSR 0x80
-#define SIO_CTS 0x100
-#define SIO_IRQ 0x200
-
-#define MR_CHLEN_5 0x00
-#define MR_CHLEN_6 0x04
-#define MR_CHLEN_7 0x08
-#define MR_CHLEN_8 0x0C
-#define MR_PEN 0x10
-#define MR_P_EVEN 0x30
-#define MR_SB_01 0x40
-#define MR_SB_10 0x80
-#define MR_SB_11 0xc0
-
-#define CR_TXEN 0x1
-#define CR_DTR 0x2
-#define CR_RXEN 0x4
-#define CR_BRK 0x8
-#define CR_INTRST 0x10
-#define CR_RTS 0x20
-#define CR_ERRRST 0x40
-#define CR_BUFSZ_1 0x00
-#define CR_BUFSZ_2 0x100
-#define CR_BUFSZ_4 0x200
-#define CR_BUFSZ_8 0x300
-#define CR_TXIEN 0x400
-#define CR_RXIEN 0x800
-#define CR_DSRIEN 0x1000
+#include <stdint.h>
+
+/* Enum and register definitions */
+
+typedef enum _SIO_StatusRegFlag {
+ SR_TXRDY = 1 << 0,
+ SR_RXRDY = 1 << 1,
+ SR_TXU = 1 << 2,
+ SR_PERROR = 1 << 3,
+ SR_OE = 1 << 4,
+ SR_FE = 1 << 5,
+ SR_DSR = 1 << 7,
+ SR_CTS = 1 << 8,
+ SR_IRQ = 1 << 9
+} SIO_StatusRegFlag;
+
+typedef enum _SIO_ModeRegFlag {
+ MR_BR_1 = 1 << 0,
+ MR_BR_16 = 2 << 0,
+ MR_BR_64 = 3 << 0,
+ MR_CHLEN_5 = 0 << 2,
+ MR_CHLEN_6 = 1 << 2,
+ MR_CHLEN_7 = 2 << 2,
+ MR_CHLEN_8 = 3 << 2,
+ MR_PEN = 1 << 4,
+ MR_P_EVEN = 1 << 5,
+ MR_SB_01 = 1 << 6,
+ MR_SB_10 = 2 << 6,
+ MR_SB_11 = 3 << 6
+} SIO_ModeRegFlag;
+
+typedef enum _SIO_ControlRegFlag {
+ CR_TXEN = 1 << 0,
+ CR_DTR = 1 << 1,
+ CR_RXEN = 1 << 2,
+ CR_BRK = 1 << 3,
+ CR_INTRST = 1 << 4,
+ CR_RTS = 1 << 5,
+ CR_ERRRST = 1 << 6,
+ CR_BUFSZ_1 = 0 << 8,
+ CR_BUFSZ_2 = 1 << 8,
+ CR_BUFSZ_4 = 2 << 8,
+ CR_BUFSZ_8 = 3 << 8,
+ CR_TXIEN = 1 << 10,
+ CR_RXIEN = 1 << 11,
+ CR_DSRIEN = 1 << 12
+} SIO_ControlRegFlag;
+
+typedef enum _SIO_FlowControl {
+ SIO_FC_NONE = 0,
+ SIO_FC_RTS_CTS = 1
+ //SIO_FC_DTR_DSR = 2
+} SIO_FlowControl;
+
+/* Public API */
#ifdef __cplusplus
extern "C" {
#endif
-int _sio_control(int cmd, int arg, int param);
-void AddSIO(int baud);
-void DelSIO(void);
+/**
+ * @brief Initializes the serial port driver.
+ *
+ * @details Resets the serial port, initializes the library's internal ring
+ * buffers and installs a serial IRQ handler. The given mode value (normally
+ * MR_CHLEN_8|MR_SB_01 for 8 data bits, 1 stop bit and no parity) is copied to
+ * the SIO_MODE register. Flow control is disabled by default (see
+ * SIO_SetFlowControl() for more details).
+ *
+ * This function must be called prior to using SIO_ReadByte(), SIO_ReadSync(),
+ * SIO_WriteByte(), SIO_WriteSync() or SIO_SetFlowControl(), and must not be
+ * called from an IRQ callback.
+ *
+ * @param baud Baud rate in bits per second
+ * @param mode Binary OR of SIO_ModeRegFlag enum members
+ *
+ * @see SIO_Quit()
+ */
+void SIO_Init(int baud, uint16_t mode);
+
+/**
+ * @brief Uninstalls the serial port driver.
+ *
+ * @details Resets the serial port and removes the IRQ callback added by
+ * SIO_Init(), restoring any previously installed handler if any. If SIO_Init()
+ * was previously invoked, calling SIO_Quit() before accessing serial port
+ * registers manually is highly recommended.
+ *
+ * @see SIO_Init()
+ */
+void SIO_Quit(void);
-void *Sio1Callback(void (*func)(void));
+/**
+ * @brief Sets the flow control mode.
+ *
+ * @details Changes the serial port's flow control mode. The following modes
+ * are available:
+ *
+ * - SIO_FC_NONE (default): do not assert RTS or DTR automatically and ignore
+ * DSR. Note that the hardware will still wait for CTS to be asserted before
+ * sending any data; there is no way to disable this behavior.
+ * - SIO_FC_RTS_CTS: assert RTS when the RX buffer is full and wait for CTS to
+ * be asserted before sending any data.
+ *
+ * The flow control mode shall only be changed while the TX and RX buffers are
+ * empty.
+ *
+ * @param mode
+ */
+void SIO_SetFlowControl(SIO_FlowControl mode);
-// ORIGINAL
-void WaitSIO(void);
-int kbhit();
+/**
+ * @brief Reads a byte from the RX buffer (blocking).
+ *
+ * @details Reads a byte from the RX buffer. If the buffer is empty, blocks
+ * indefinitely until a byte is received.
+ *
+ * WARNING: this function shall not be used in a critical section or IRQ
+ * callback as no data is sent or received while interrupts are disabled. It
+ * also lacks a timeout, so consider polling for new data using SIO_ReadByte2()
+ * or SIO_ReadSync(1) and implementing a timeout instead.
+ *
+ * @return Received byte
+ *
+ * @see SIO_ReadByte2(), SIO_ReadSync()
+ */
+int SIO_ReadByte(void);
+
+/**
+ * @brief Reads a byte from the RX buffer (non-blocking).
+ *
+ * @details Non-blocking variant of SIO_ReadByte(). Reads a byte from the RX
+ * buffer or returns -1 if the buffer is empty. Unlike SIO_ReadByte() this
+ * function is safe to use in a critical section (although no data will be
+ * received while interrupts are disabled).
+ *
+ * @return Received byte, -1 if no data is available
+ *
+ * @see SIO_ReadByte()
+ */
+int SIO_ReadByte2(void);
+
+/**
+ * @brief Waits for a byte to be received or returns the RX buffer's length.
+ *
+ * @details Waits for at least one byte to be available in the RX buffer (if
+ * mode = 0) or returns the length of the RX buffer (if mode = 1).
+ *
+ * WARNING: this function shall not be used in a critical section or IRQ
+ * callback as no data is sent or received while interrupts are disabled. Using
+ * SIO_ReadSync(0) is additionally discouraged as it lacks a timeout; consider
+ * polling for new data using SIO_ReadByte2() or SIO_ReadSync(1) and
+ * implementing a timeout instead.
+ *
+ * @param mode
+ * @return Number of RX bytes in the buffer
+ */
+int SIO_ReadSync(int mode);
+
+/**
+ * @brief Sets a callback for received bytes.
+ *
+ * @details Registers a function to be called whenever a byte is received. The
+ * received byte is passed as an argument to the callback, which shall then
+ * return a zero value to also store the byte in the RX buffer or a non-zero
+ * value to drop it. This can be used to e.g. filter or validate incoming data,
+ * or to bypass the library's RX buffer for custom buffering purposes.
+ *
+ * The callback will run in the exception handler's context, so it should be as
+ * fast as possible and shall not call any function that relies on interrupts
+ * being enabled.
+ *
+ * @param func
+ * @return Previously set callback or NULL
+ */
+void *SIO_ReadCallback(int (*func)(uint8_t));
+
+/**
+ * @brief Writes a byte to the TX buffer (blocking).
+ *
+ * @details Sends the given byte, or appends it to the TX buffer if the serial
+ * port is busy. If the buffer is full, blocks until the byte can be stored in
+ * the buffer (with a timeout).
+ *
+ * WARNING: this function shall not be used in a critical section or IRQ
+ * callback as no data is sent or received while interrupts are disabled.
+ *
+ * @param value
+ * @return Number of TX bytes previously pending, -1 in case of a timeout
+ *
+ * @see SIO_WriteByte2(), SIO_WriteSync()
+ */
+int SIO_WriteByte(uint8_t value);
+
+/**
+ * @brief Writes a byte to the TX buffer (non-blocking).
+ *
+ * @details Non-blocking variant of SIO_WriteByte(). Sends the given byte, or
+ * appends it to the TX buffer if the serial port is busy. If the buffer is
+ * full, returns -1 without actually sending the byte. Unlike SIO_WriteByte()
+ * this function is safe to use in a critical section (although no data will be
+ * sent while interrupts are disabled).
+ *
+ * @param value
+ * @return Number of TX bytes previously pending, -1 in case of failure
+ *
+ * @see SIO_WriteByte()
+ */
+int SIO_WriteByte2(uint8_t value);
+
+/**
+ * @brief Waits for all bytes to be sent or returns the TX buffer's length.
+ *
+ * @details Waits for all bytes pending in the TX buffer to be sent (if
+ * mode = 0) or returns the length of the TX buffer (if mode = 1).
+ *
+ * WARNING: this function shall not be used in a critical section or IRQ
+ * callback as no data is sent or received while interrupts are disabled.
+ *
+ * @param mode
+ * @return Number of TX bytes pending, -1 in case of a timeout (mode = 0)
+ */
+int SIO_WriteSync(int mode);
+
+/**
+ * @brief Installs the serial port TTY driver.
+ *
+ * @details Installs a BIOS file driver to redirect TTY stdin/stdout to the
+ * serial port. Uses SIO_Init() internally. The port is configured for 8 data
+ * bits, 1 stop bit and no parity.
+ *
+ * This function shall only be used for debugging purposes. Picking a high baud
+ * rate is recommended as all TTY writes are blocking and bypass the TX buffer.
+ *
+ * NOTE: some executable loaders, such as Unirom and Caetla, already replace
+ * the BIOS TTY driver with a custom one. Calling AddSIO() will break the
+ * built-in TTY functionality of these loaders.
+ *
+ * @param baud Baud rate in bits per second
+ *
+ * @see DelSIO()
+ */
+void AddSIO(int baud);
+
+/**
+ * @brief Removes the serial port TTY driver.
+ *
+ * @details Uninstalls the BIOS driver installed by AddSIO() and attempts to
+ * restore the default "dummy" TTY driver. Uses SIO_Quit() internally. Calling
+ * this function is not recommended as any further TTY usage may crash the
+ * system.
+ *
+ * @see AddSIO()
+ */
+void DelSIO(void);
#ifdef __cplusplus
}
diff --git a/libpsn00b/include/psxspu.h b/libpsn00b/include/psxspu.h
index cf78e3d..cdc3ac7 100644
--- a/libpsn00b/include/psxspu.h
+++ b/libpsn00b/include/psxspu.h
@@ -73,6 +73,20 @@ typedef struct _SpuCommonAttr {
SpuExtAttr cd, ext;
} SpuCommonAttr;
+/* Macros */
+
+#define getSPUAddr(addr) ((uint16_t) (((addr) + 7) / 8))
+#define getSPUSampleRate(rate) ((uint16_t) (((rate) * (1 << 12)) / 44100))
+
+#define getSPUADSR(ar, dr, sr, rr, sl) ( \
+ (sl) | \
+ ((dr) << 4) | \
+ ((ar) << 8) | \
+ ((rr) << 16) | \
+ ((sr) << 22) | \
+ (1 << 30) \
+)
+
/* "Useless" macros for official SDK compatibility */
#define SpuSetCommonMasterVolume(left, right) \
@@ -87,21 +101,29 @@ typedef struct _SpuCommonAttr {
((enable) ? (SPU_CTRL |= 0x0002) : (SPU_CTRL &= 0xfffd))
#define SpuSetReverbAddr(addr) \
- (SPU_REVERB_ADDR = ((addr) + 7) / 8)
+ (SPU_REVERB_ADDR = getSPUAddr(addr))
#define SpuSetIRQAddr(addr) \
- (SPU_IRQ_ADDR = ((addr) + 7) / 8)
+ (SPU_IRQ_ADDR = getSPUAddr(addr))
#define SpuSetVoiceVolume(ch, left, right) \
(SPU_CH_VOL_L(ch) = (left), SPU_CH_VOL_R(ch) = (right))
#define SpuSetVoicePitch(ch, pitch) \
(SPU_CH_FREQ(ch) = (pitch))
#define SpuSetVoiceStartAddr(ch, addr) \
- (SPU_CH_ADDR(ch) = ((addr) + 7) / 8)
-#define SpuSetVoiceADSR(ch, ar, dr, sr, rr, sl) \
- (SPU_CH_ADSR(ch) = ((sl)) | ((dr) << 4) | ((ar) << 8) | ((rr) << 16) | ((sr) << 22) | (1 << 30))
+ (SPU_CH_ADDR(ch) = getSPUAddr(addr))
+#define SpuSetVoiceADSR(ch, ar, dr, sr, rr, sl) ( \
+ SPU_CH_ADSR1(ch) = (sl) | ((dr) << 4) | ((ar) << 8), \
+ SPU_CH_ADSR2(ch) = (rr) | ((sr) << 6) | (1 << 14) \
+)
#define SpuSetKey(enable, voice_bit) \
- ((enable) ? (SPU_KEY_ON = (voice_bit)) : (SPU_KEY_OFF = (voice_bit)))
+ ((enable) ? ( \
+ SPU_KEY_ON1 = (uint16_t) (voice_bit), \
+ SPU_KEY_ON2 = (uint16_t) ((voice_bit) >> 16) \
+ ) : ( \
+ SPU_KEY_OFF1 = (uint16_t) (voice_bit), \
+ SPU_KEY_OFF2 = (uint16_t) ((voice_bit) >> 16) \
+ ))
/* Public API */
@@ -111,8 +133,9 @@ extern "C" {
void SpuInit(void);
-void SpuRead(uint32_t *data, size_t size);
-void SpuWrite(const uint32_t *data, size_t size);
+size_t SpuRead(uint32_t *data, size_t size);
+size_t SpuWrite(const uint32_t *data, size_t size);
+size_t SpuWritePartly(const uint32_t *data, size_t size);
SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode);
uint32_t SpuSetTransferStartAddr(uint32_t addr);
int SpuIsTransferCompleted(int mode);
diff --git a/libpsn00b/include/stdlib.h b/libpsn00b/include/stdlib.h
index f0753c1..049d067 100644
--- a/libpsn00b/include/stdlib.h
+++ b/libpsn00b/include/stdlib.h
@@ -31,17 +31,19 @@ extern "C" {
extern int __argc;
extern const char **__argv;
+void abort(void);
+
int rand(void);
-void srand(unsigned long seed);
+void srand(int seed);
int abs(int j);
long labs(long i);
-long long strtoll(const char *nptr, char **endptr, int base);
-long strtol(const char *nptr, char **endptr, int base);
-long double strtold(const char *nptr, char **endptr);
-double strtod(const char *nptr, char **endptr);
+long strtol(const char *nptr, char **endptr, int base);
+long long strtoll(const char *nptr, char **endptr, int base);
float strtof(const char *nptr, char **endptr);
+double strtod(const char *nptr, char **endptr);
+long double strtold(const char *nptr, char **endptr);
void InitHeap(void *addr, size_t size);
void *sbrk(ptrdiff_t incr);
diff --git a/libpsn00b/libc/_start.s b/libpsn00b/libc/_start.s
index 56075c8..fcd4c4c 100644
--- a/libpsn00b/libc/_start.s
+++ b/libpsn00b/libc/_start.s
@@ -2,17 +2,16 @@
# (C) 2022 spicyjpeg - MPL licensed
#
# This file provides a weak function that can be easily overridden to e.g. set
-# $sp or perform additional initialization before the "real" _start()
+# $sp or perform additional initialization before the "real" _start() function
# (_start_inner()) is called.
.set noreorder
-.section .text
+.section .text._start
.global _start
.type _start, @function
.weak _start
_start:
la $gp, _gp
-
j _start_inner
nop
diff --git a/libpsn00b/libc/abort.c b/libpsn00b/libc/abort.c
index 2db5016..0a3c325 100644
--- a/libpsn00b/libc/abort.c
+++ b/libpsn00b/libc/abort.c
@@ -1,19 +1,26 @@
+/*
+ * PSn00bSDK assert macro and internal logging
+ * (C) 2022 spicyjpeg - MPL licensed
+ */
-#include <psxetc.h>
+#undef SDK_LIBRARY_NAME
-/* Standard abort */
+#include <assert.h>
+#include <psxapi.h>
-void abort(void) {
- _sdk_log("abort()\n");
+/* Internal function used by assert() macro */
+
+void _assert_abort(const char *file, int line, const char *expr) {
+ _sdk_log("%s:%d: assert(%s)\n", file, line, expr);
for (;;)
__asm__ volatile("");
}
-/* Internal function used by assert() macro */
+/* Standard abort */
-void _assert_abort(const char *file, int line, const char *expr) {
- _sdk_log("%s:%d: assert(%s)\n", file, line, expr);
+void abort(void) {
+ _sdk_log("abort()\n");
for (;;)
__asm__ volatile("");
diff --git a/libpsn00b/libc/memset.s b/libpsn00b/libc/memset.s
index 5a1589d..6ef84ec 100644
--- a/libpsn00b/libc/memset.s
+++ b/libpsn00b/libc/memset.s
@@ -38,8 +38,9 @@ memset:
sb $a1, 0xc($a0)
sb $a1, 0xd($a0)
sb $a1, 0xe($a0)
- jr $ra
sb $a1, 0xf($a0)
+ jr $ra
+ nop
.Llarge_fill:
# Initialize fast filling by repeating the fill byte 4 times, so it can be
diff --git a/libpsn00b/psxcd/getsector.c b/libpsn00b/psxcd/getsector.c
index 31d0ac7..a214d7a 100644
--- a/libpsn00b/psxcd/getsector.c
+++ b/libpsn00b/psxcd/getsector.c
@@ -4,7 +4,7 @@
*/
#include <stdint.h>
-#include <psxetc.h>
+#include <assert.h>
#include <psxcd.h>
#include <hwregs_c.h>
@@ -46,6 +46,6 @@ int CdDataSync(int mode) {
return 0;
}
- _sdk_log("psxcd: CdDataSync() timeout\n");
+ _sdk_log("CdDataSync() timeout\n");
return -1;
}
diff --git a/libpsn00b/psxcd/isofs.c b/libpsn00b/psxcd/isofs.c
index 0425c0d..e00ddeb 100644
--- a/libpsn00b/psxcd/isofs.c
+++ b/libpsn00b/psxcd/isofs.c
@@ -1,10 +1,14 @@
+
+#undef SDK_LIBRARY_NAME
+#define SDK_LIBRARY_NAME "psxcd/iso"
+
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include <psxgpu.h>
#include <psxapi.h>
-#include <psxetc.h>
-#include "psxcd.h"
+#include <psxcd.h>
#include "isofs.h"
#define DEFAULT_PATH_SEP '\\'
@@ -43,7 +47,7 @@ static int _CdReadIsoDescriptor(int session_offs)
CdControl(CdlNop, 0, 0);
if( (CdStatus()&0x10) )
{
- _sdk_log("psxcd: Lid is still open.\n");
+ _sdk_log("Lid is still open.\n");
_cd_iso_error = CdlIsoLidOpen;
return -1;
@@ -58,45 +62,45 @@ static int _CdReadIsoDescriptor(int session_offs)
return 0;
}
- _sdk_log("psxcd: Parsing ISO file system.\n");
+ _sdk_log("Parsing ISO file system.\n");
// Seek to volume descriptor
CdIntToPos(16+session_offs, &loc);
if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) )
{
- _sdk_log("psxcd: Could not set seek destination.\n");
+ _sdk_log("Could not set seek destination.\n");
_cd_iso_error = CdlIsoSeekError;
return -1;
}
- _sdk_log("psxcd: Read sectors.\n");
+ _sdk_log("Read sectors.\n");
// Read volume descriptor
CdRead(1, (uint32_t*)_cd_iso_descriptor_buff, CdlModeSpeed);
if( CdReadSync(0, 0) )
{
- _sdk_log("psxcd: Error reading ISO volume descriptor.\n");
+ _sdk_log("Error reading ISO volume descriptor.\n");
_cd_iso_error = CdlIsoReadError;
return -1;
}
- _sdk_log("psxcd: Read complete.\n");
+ _sdk_log("Read complete.\n");
// Verify if volume descriptor is present
descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff;
if( strncmp("CD001", descriptor->header.id, 5) )
{
- _sdk_log("psxcd: Disc does not contain a ISO9660 file system.\n");
+ _sdk_log("Disc does not contain a ISO9660 file system.\n");
_cd_iso_error = CdlIsoInvalidFs;
return -1;
}
- _sdk_log("psxcd: Path table LBA = %d\n", descriptor->pathTable1Offs);
- _sdk_log("psxcd: Path table len = %d\n", descriptor->pathTableSize.lsb);
+ _sdk_log("Path table LBA = %d\n", descriptor->pathTable1Offs);
+ _sdk_log("Path table len = %d\n", descriptor->pathTableSize.lsb);
// Allocate path table buffer
i = ((2047+descriptor->pathTableSize.lsb)>>11)<<11;
@@ -106,7 +110,7 @@ static int _CdReadIsoDescriptor(int session_offs)
}
_cd_iso_pathtable_buff = (uint8_t*)malloc(i);
- _sdk_log("psxcd: Allocated %d bytes for path table.\n", i);
+ _sdk_log("Allocated %d bytes for path table.\n", i);
// Read path table
CdIntToPos(descriptor->pathTable1Offs, &loc);
@@ -114,7 +118,7 @@ static int _CdReadIsoDescriptor(int session_offs)
CdRead(i>>11, (uint32_t*)_cd_iso_pathtable_buff, CdlModeSpeed);
if( CdReadSync(0, 0) )
{
- _sdk_log("psxcd: Error reading ISO path table.\n");
+ _sdk_log("Error reading ISO path table.\n");
_cd_iso_error = CdlIsoReadError;
return -1;
@@ -142,11 +146,11 @@ static int _CdReadIsoDirectory(int lba)
CdIntToPos(lba, &loc);
i = CdPosToInt(&loc);
- _sdk_log("psxcd: Seek to sector %d\n", i);
+ _sdk_log("Seek to sector %d\n", i);
if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) )
{
- _sdk_log("psxcd: Could not set seek destination.\n");
+ _sdk_log("Could not set seek destination.\n");
_cd_iso_error = CdlIsoSeekError;
return -1;
@@ -162,7 +166,7 @@ static int _CdReadIsoDirectory(int lba)
CdRead(1, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed);
if( CdReadSync(0, 0) )
{
- _sdk_log("psxcd: Error reading initial directory record.\n");
+ _sdk_log("Error reading initial directory record.\n");
_cd_iso_error = CdlIsoReadError;
return -1;
@@ -171,14 +175,14 @@ static int _CdReadIsoDirectory(int lba)
direntry = (ISO_DIR_ENTRY*)_cd_iso_directory_buff;
_cd_iso_directory_len = direntry->entrySize.lsb;
- _sdk_log("psxcd: Location of directory record = %d\n", direntry->entryOffs.lsb);
- _sdk_log("psxcd: Size of directory record = %d\n", _cd_iso_directory_len);
+ _sdk_log("Location of directory record = %d\n", direntry->entryOffs.lsb);
+ _sdk_log("Size of directory record = %d\n", _cd_iso_directory_len);
if( _cd_iso_directory_len > 2048 )
{
if( !CdControl(CdlSetloc, (uint8_t*)&loc, 0) )
{
- _sdk_log("psxcd: Could not set seek destination.\n");
+ _sdk_log("Could not set seek destination.\n");
_cd_iso_error = CdlIsoSeekError;
return -1;
@@ -188,12 +192,12 @@ static int _CdReadIsoDirectory(int lba)
i = ((2047+_cd_iso_directory_len)>>11)<<11;
_cd_iso_directory_buff = (uint8_t*)malloc(i);
- _sdk_log("psxcd: Allocated %d bytes for directory record.\n", i);
+ _sdk_log("Allocated %d bytes for directory record.\n", i);
CdRead(i>>11, (uint32_t*)_cd_iso_directory_buff, CdlModeSpeed);
if( CdReadSync(0, 0) )
{
- _sdk_log("psxcd: Error reading remaining directory record.\n");
+ _sdk_log("Error reading remaining directory record.\n");
_cd_iso_error = CdlIsoReadError;
return -1;
@@ -215,7 +219,7 @@ static void dump_directory(void)
ISO_DIR_ENTRY *dir_entry;
char namebuff[16];
- _sdk_log("psxcd: Cached directory record contents:\n");
+ _sdk_log("Cached directory record contents:\n");
i = 0;
dir_pos = 0;
@@ -226,7 +230,7 @@ static void dump_directory(void)
strncpy(namebuff,
_cd_iso_directory_buff+dir_pos+sizeof(ISO_DIR_ENTRY), dir_entry->identifierLen);
- _sdk_log("psxcd: P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff);
+ _sdk_log("P:%d L:%d %s\n", dir_pos, dir_entry->identifierLen, namebuff);
dir_pos += dir_entry->entryLength;
i++;
@@ -245,7 +249,7 @@ static void dump_directory(void)
}
}
- _sdk_log("psxcd: --\n");
+ _sdk_log("--\n");
}
@@ -256,7 +260,7 @@ static void dump_pathtable(void)
ISO_DESCRIPTOR *descriptor;
char namebuff[16];
- _sdk_log("psxcd: Path table entries:\n");
+ _sdk_log("Path table entries:\n");
descriptor = (ISO_DESCRIPTOR*)_cd_iso_descriptor_buff;
@@ -270,7 +274,7 @@ static void dump_pathtable(void)
tbl_pos+sizeof(ISO_PATHTABLE_ENTRY),
tbl_entry->nameLength);
- _sdk_log("psxcd: %s\n", namebuff);
+ _sdk_log("%s\n", namebuff);
// Advance to next entry
tbl_pos += sizeof(ISO_PATHTABLE_ENTRY)
@@ -366,7 +370,7 @@ static int find_dir_entry(const char *name, ISO_DIR_ENTRY *dirent)
ISO_DIR_ENTRY *dir_entry;
char namebuff[16];
- _sdk_log("psxcd: Locating file %s.\n", name);
+ _sdk_log("Locating file %s.\n", name);
i = 0;
dir_pos = 0;
@@ -459,11 +463,11 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
// Read ISO descriptor and path table
if( _CdReadIsoDescriptor(0) )
{
- _sdk_log("psxcd: Could not read ISO file system.\n");
+ _sdk_log("Could not read ISO file system.\n");
return NULL;
}
- // _sdk_log("psxcd: ISO file system cache updated.\n");
+ // _sdk_log("ISO file system cache updated.\n");
// _cd_media_changed = 0;
//}
@@ -471,23 +475,23 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
num_dirs = get_pathtable_entry(0, NULL, NULL);
#ifndef NDEBUG
- _sdk_log("psxcd: Directories in path table: %d\n", num_dirs);
+ _sdk_log("Directories in path table: %d\n", num_dirs);
rbuff = resolve_pathtable_path(num_dirs-1, tpath_rbuff+127);
if( !rbuff )
{
- _sdk_log("psxcd: Could not resolve path.\n");
+ _sdk_log("Could not resolve path.\n");
}
else
{
- _sdk_log("psxcd: Longest path: %s|\n", rbuff);
+ _sdk_log("Longest path: %s|\n", rbuff);
}
#endif
if( get_pathname(search_path, filename) )
{
- _sdk_log("psxcd: Search path = %s|\n", search_path);
+ _sdk_log("Search path = %s|\n", search_path);
}
// Search the pathtable for a matching path
@@ -495,7 +499,7 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
for(i=1; i<num_dirs; i++)
{
rbuff = resolve_pathtable_path(i, tpath_rbuff+127);
- _sdk_log("psxcd: Found = %s|\n", rbuff);
+ _sdk_log("Found = %s|\n", rbuff);
if( rbuff )
{
@@ -509,14 +513,14 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
if( !found_dir )
{
- _sdk_log("psxcd: Directory path not found.\n");
+ _sdk_log("Directory path not found.\n");
return NULL;
}
- _sdk_log("psxcd: Found directory at record %d!\n", found_dir);
+ _sdk_log("Found directory at record %d!\n", found_dir);
get_pathtable_entry(found_dir, &tbl_entry, NULL);
- _sdk_log("psxcd: Directory LBA = %d\n", tbl_entry.dirOffs);
+ _sdk_log("Directory LBA = %d\n", tbl_entry.dirOffs);
_CdReadIsoDirectory(tbl_entry.dirOffs);
get_filename(fp->name, filename);
@@ -533,12 +537,12 @@ CdlFILE *CdSearchFile(CdlFILE *fp, const char *filename)
if( find_dir_entry(fp->name, &dir_entry) )
{
- _sdk_log("psxcd: Could not find file.\n");
+ _sdk_log("Could not find file.\n");
return NULL;
}
- _sdk_log("psxcd: Located file at LBA %d.\n", dir_entry.entryOffs.lsb);
+ _sdk_log("Located file at LBA %d.\n", dir_entry.entryOffs.lsb);
CdIntToPos(dir_entry.entryOffs.lsb, &fp->pos);
fp->size = dir_entry.entrySize.lsb;
@@ -562,11 +566,11 @@ CdlDIR *CdOpenDir(const char* path)
// Read ISO descriptor and path table
if( _CdReadIsoDescriptor( 0 ) )
{
- _sdk_log( "psxcd: Could not read ISO file system.\n" );
+ _sdk_log( "Could not read ISO file system.\n" );
return NULL;
}
-// _sdk_log( "psxcd: ISO file system cache updated.\n" );
+// _sdk_log( "ISO file system cache updated.\n" );
// _cd_media_changed = 0;
// }
@@ -576,7 +580,7 @@ CdlDIR *CdOpenDir(const char* path)
for( i=1; i<num_dirs; i++ )
{
rbuff = resolve_pathtable_path( i, tpath_rbuff+127 );
- _sdk_log( "psxcd: Found = %s|\n", rbuff );
+ _sdk_log( "Found = %s|\n", rbuff );
if( rbuff )
{
@@ -590,14 +594,14 @@ CdlDIR *CdOpenDir(const char* path)
if( !found_dir )
{
- _sdk_log( "psxcd: Directory path not found.\n" );
+ _sdk_log( "Directory path not found.\n" );
return NULL;
}
- _sdk_log( "psxcd: Found directory at record %d!\n", found_dir );
+ _sdk_log( "Found directory at record %d!\n", found_dir );
get_pathtable_entry( found_dir, &tbl_entry, NULL );
- _sdk_log( "psxcd: Directory LBA = %d\n", tbl_entry.dirOffs );
+ _sdk_log( "Directory LBA = %d\n", tbl_entry.dirOffs );
_CdReadIsoDirectory( tbl_entry.dirOffs );
@@ -662,11 +666,11 @@ int CdReadDir(CdlDIR *dir, CdlFILE* file)
file->size = dir_entry->entrySize.lsb;
- _sdk_log("psxcd: dir_entry->entryLength = %d, ", dir_entry->entryLength);
+ _sdk_log("dir_entry->entryLength = %d, ", dir_entry->entryLength);
d_dir->_pos += dir_entry->entryLength;
- _sdk_log("psxcd: d_dir->_pos = %d\n", d_dir->_pos);
+ _sdk_log("d_dir->_pos = %d\n", d_dir->_pos);
// Check if padding is reached (end of record sector)
if( d_dir->_dir[d_dir->_pos] == 0 )
@@ -770,13 +774,13 @@ int CdLoadSession(int session)
int i;
// Seek to specified session
- _sdk_log("psxcd: CdLoadSession(): Seeking to session %d...\n", session);
+ _sdk_log("CdLoadSession(): Seeking to session %d...\n", session);
CdControl(CdlSetsession, (unsigned char*)&session,
(unsigned char*)&resultbuff);
if( CdSync(0, 0) == CdlDiskError )
{
- _sdk_log("psxcd: CdLoadSession(): Session seek failed, session does not exist. Restarting CD-ROM...\n");
+ _sdk_log("CdLoadSession(): Session seek failed, session does not exist. Restarting CD-ROM...\n");
// Restart CD-ROM on session seek failure
CdControl(CdlNop, 0, 0);
@@ -797,7 +801,7 @@ int CdLoadSession(int session)
_ses_scanbuff = scanbuff;
// Begin scan for an ISO volume descriptor
- _sdk_log("psxcd: CdLoadSession(): Scanning for ISO9660 volume descriptor.\n");
+ _sdk_log("CdLoadSession(): Scanning for ISO9660 volume descriptor.\n");
i = CdlModeSpeed;
CdControl(CdlSetmode, (unsigned char*)&i, 0);
@@ -812,7 +816,7 @@ int CdLoadSession(int session)
if( !_ses_scanfound )
{
- _sdk_log("psxcd: CdLoadSession(): Did not find volume descriptor.\n");
+ _sdk_log("CdLoadSession(): Did not find volume descriptor.\n");
_cd_iso_error = CdlIsoInvalidFs;
EnterCriticalSection();
@@ -841,11 +845,11 @@ int CdLoadSession(int session)
loc = (CdlLOC*)resultbuff;
- _sdk_log("psxcd: CdLoadSession(): Session found in %02d:%02d:%02d (LBA=%d)\n",
+ _sdk_log("CdLoadSession(): Session found in %02d:%02d:%02d (LBA=%d)\n",
btoi(loc->minute), btoi(loc->second), btoi(loc->sector), CdPosToInt(loc));
i = CdPosToInt(loc)-17;
- _sdk_log("psxcd: CdLoadSession(): Session starting at LBA=%d\n", i);
+ _sdk_log("CdLoadSession(): Session starting at LBA=%d\n", i);
_cd_media_changed = 1;
diff --git a/libpsn00b/psxcd/psxcd.c b/libpsn00b/psxcd/psxcd.c
index b914b5e..9392d30 100644
--- a/libpsn00b/psxcd/psxcd.c
+++ b/libpsn00b/psxcd/psxcd.c
@@ -1,8 +1,8 @@
#include <stdint.h>
+#include <assert.h>
#include <psxgpu.h>
-#include <psxetc.h>
#include <psxapi.h>
-#include "psxcd.h"
+#include <psxcd.h>
#define READ_TIMEOUT 600 // 10 seconds for NTSC
@@ -39,9 +39,9 @@ int CdInit(void) {
if(CdSync(0, 0) != CdlDiskError) {
CdControl(CdlDemute, 0, 0);
- _sdk_log("psxcd: setup done\n");
+ _sdk_log("setup done\n");
} else {
- _sdk_log("psxcd: setup error, bad disc/drive or no disc inserted\n");
+ _sdk_log("setup error, bad disc/drive or no disc inserted\n");
}
return 1;
@@ -208,7 +208,7 @@ CdlLOC *CdIntToPos(int i, CdlLOC *p) {
}
-int CdPosToInt(CdlLOC *p)
+int CdPosToInt(const CdlLOC *p)
{
return ((75*(btoi(p->minute)*60))+(75*btoi(p->second))+btoi(p->sector))-150;
}
@@ -305,7 +305,7 @@ static void CdDoRetry()
{
int cb;
- _sdk_log("psxcd: retrying read...\n");
+ _sdk_log("retrying read...\n");
// Stop reading
CdControl(CdlPause, 0, 0);
diff --git a/libpsn00b/psxetc/dl.c b/libpsn00b/psxetc/dl.c
index b85a7df..ccf7a7c 100644
--- a/libpsn00b/psxetc/dl.c
+++ b/libpsn00b/psxetc/dl.c
@@ -1,6 +1,6 @@
/*
* PSn00bSDK dynamic linker
- * (C) 2021 spicyjpeg - MPL licensed
+ * (C) 2021-2022 spicyjpeg - MPL licensed
*
* The bulk of this code is MIPS-specific but not PS1-specific, so the whole
* dynamic linker could be ported to other MIPS platforms that do not have one
@@ -23,23 +23,21 @@
* of entries
*/
+#undef SDK_LIBRARY_NAME
+#define SDK_LIBRARY_NAME "psxetc/dl"
+
#include <stdint.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <ctype.h>
+#include <assert.h>
#include <elf.h>
#include <dlfcn.h>
#include <string.h>
-#include <psxetc.h>
#include <psxapi.h>
-/* Compile options */
-
-// Comment before building to disable functions that rely on BIOS file APIs,
-// i.e. DL_LoadSymbolMapFromFile() and DL_LoadDLLFromFile().
-// FIXME: those seem to be broken currently, and shouldn't be used anyway
-//#define USE_FILE_API
-
/* Private types */
typedef struct {
@@ -48,17 +46,15 @@ typedef struct {
} MapEntry;
typedef struct {
- uint32_t nbucket;
- uint32_t nchain;
+ int nbucket, nchain, index;
MapEntry *entries;
uint32_t *bucket;
uint32_t *chain;
} SymbolMap;
-/* Data */
+/* Internal globals */
-static DL_Error _error_code = RTLD_E_NONE;
static SymbolMap _symbol_map;
// Accessed by _dl_resolve_helper, stores the pointer to the current resolver
@@ -67,11 +63,6 @@ void *(*_dl_resolve_callback)(DLL *, const char *) = 0;
/* Private utilities */
-#define _ERROR(code, ret) { \
- _error_code = code; \
- return ret; \
-}
-
void _dl_resolve_wrapper(void);
// Called by _dl_resolve_wrapper() (which is in turn called by GCC stubs) to
@@ -79,29 +70,28 @@ void _dl_resolve_wrapper(void);
void *_dl_resolve_helper(DLL *dll, uint32_t index) {
Elf32_Sym *sym = &(dll->symtab[index]);
const char *_name = &(dll->strtab[sym->st_name]);
- void *address;
+ void *addr;
if (_dl_resolve_callback)
- address = _dl_resolve_callback(dll, _name);
+ addr = _dl_resolve_callback(dll, _name);
else
- address = DL_GetSymbolByName(_name);
+ addr = DL_GetMapSymbol(_name);
- if (!address) {
- _sdk_log("psxetc: FATAL! can't resolve %s, locking up\n", _name);
- while (1)
- __asm__ volatile("nop");
+ if (!addr) {
+ _sdk_log("FATAL! can't resolve %s, aborting\n", _name);
+ abort();
}
// Patch the GOT entry to "cache" the resolved address. This can probably
// be implemented in a faster way, but this thing is already too complex.
- for (uint32_t i = 0; i < dll->got_length; i++) {
+ for (int i = 0; i < dll->got_length; i++) {
if (dll->got[2 + i] == (uint32_t) sym->st_value) {
- dll->got[2 + i] = (uint32_t) address;
+ dll->got[2 + i] = (uint32_t) addr;
break;
}
}
- return address;
+ return addr;
}
// Implementation of the weird obscure hashing function used in the ELF .hash
@@ -124,142 +114,121 @@ static uint32_t _elf_hash(const char *str) {
return value;
}
-#ifdef USE_FILE_API
-static uint8_t *_dl_load_file(const char *filename, size_t *size_output) {
- int32_t fd = open(filename, 1);
- if (fd < 0) {
- _sdk_log("psxetc: can't open %s, error = %d\n", filename, fd);
- _ERROR(RTLD_E_FILE_OPEN, 0);
- }
+/* Symbol map loading/introspection API */
- // Extract file size from the file's associated control block.
- // https://problemkaputt.de/psx-spx.htm#biosmemorymap
- FCB *fcb = (FCB *) *((FCB **) 0x80000140);
- size_t size = fcb[fd].filesize;
+int DL_InitSymbolMap(int num_entries) {
+ if (_symbol_map.entries)
+ DL_UnloadSymbolMap();
- uint8_t *buffer = malloc(size);
- if (!buffer) {
- _sdk_log("psxetc: unable to allocate %d bytes for %s\n", size, filename);
- _ERROR(RTLD_E_FILE_ALLOC, 0);
- }
+ // TODO: find a way to calculate the optimal number of hash table "buckets"
+ // in order to minimize hash table size
+ _symbol_map.nbucket = num_entries;
+ _symbol_map.nchain = num_entries;
+ _symbol_map.index = 0;
+ _sdk_log(
+ "allocating nbucket = %d, nchain = %d\n",
+ _symbol_map.nbucket, num_entries
+ );
- //_sdk_log("psxetc: loading %s (%d bytes)..", filename, size);
+ _symbol_map.entries = malloc(sizeof(MapEntry) * num_entries);
+ _symbol_map.bucket = malloc(sizeof(uint32_t) * num_entries);
+ _symbol_map.chain = malloc(sizeof(uint32_t) * num_entries);
- for (uint32_t offset = 0; offset < size; ) {
- int32_t length = read(fd, &(buffer[offset]), 0x800);
+ if (!_symbol_map.entries || !_symbol_map.bucket || !_symbol_map.chain) {
+ _sdk_log("unable to allocate symbol map table\n");
+ return -1;
+ }
- if (length <= 0) {
- close(fd);
- free(buffer);
+ memset(_symbol_map.bucket, 0xff, sizeof(uint32_t) * num_entries);
+ memset(_symbol_map.chain, 0xff, sizeof(uint32_t) * num_entries);
- _sdk_log("failed, error = %d\n", length);
- _ERROR(RTLD_E_FILE_READ, 0);
- }
+ return 0;
+}
- //_sdk_log(".");
- offset += length;
- }
+void DL_UnloadSymbolMap(void) {
+ if (!_symbol_map.entries)
+ return;
- close(fd);
- _sdk_log(" done\n");
+ free(_symbol_map.entries);
+ free(_symbol_map.bucket);
+ free(_symbol_map.chain);
- if (size_output)
- *size_output = size;
- return buffer;
+ _symbol_map.entries = 0;
+ _symbol_map.bucket = 0;
+ _symbol_map.chain = 0;
}
-#endif
-/* Symbol map loading/parsing API */
+void DL_AddMapSymbol(const char *name, void *ptr) {
+ uint32_t hash = _elf_hash(name);
+ int index = _symbol_map.index;
+ _symbol_map.index = index + 1;
+
+ MapEntry *entry = &(_symbol_map.entries[index]);
+ entry->hash = hash;
+ entry->ptr = ptr;
-int32_t DL_ParseSymbolMap(const char *ptr, size_t size) {
- DL_UnloadSymbolMap();
+ // Append a reference to the entry to the hash table's chain.
+ uint32_t *hash_entry = &(_symbol_map.bucket[hash % _symbol_map.nbucket]);
+ while (*hash_entry != 0xffffffff)
+ hash_entry = &(_symbol_map.chain[*hash_entry]);
+
+ *hash_entry = index;
+}
+
+int DL_ParseSymbolMap(const char *ptr, size_t size) {
+ int entries = 0;
// Perform a quick scan over the entire map text and count the number of
// newlines. This allows us to (over)estimate the number of entries and
- // allocate a sufficiently large hash/entry table.
- uint32_t entries = 0;
- for (uint32_t pos = 0; pos < size; pos++) {
+ // allocate a sufficiently large hash table.
+ for (int pos = 0; pos < size; pos++) {
if (ptr[pos] == '\n')
entries++;
}
- // TODO: find a way to calculate the optimal number of hash table "buckets"
- // in order to minimize hash table size
- _symbol_map.nbucket = entries;
- _symbol_map.nchain = entries;
- _sdk_log(
- "psxetc: allocating nbucket = %d, nchain = %d\n",
- _symbol_map.nbucket,
- entries
- );
+ int err = DL_InitSymbolMap(entries);
+ if (err)
+ return err;
- // Allocate an entry table to store parsed symbols in, and an associated
- // hash table (same format as .hash section, with 8-byte header).
- _symbol_map.entries = malloc(sizeof(MapEntry) * entries);
- _symbol_map.bucket = malloc(sizeof(uint32_t) * _symbol_map.nbucket);
- _symbol_map.chain = malloc(sizeof(uint32_t) * entries);
+ // Go again through the symbol map and fill in the hash table by calling
+ // DL_AddMapSymbol() for each valid entry.
+ entries = 0;
- if (!_symbol_map.entries || !_symbol_map.bucket || !_symbol_map.chain) {
- _sdk_log("psxetc: unable to allocate symbol map table\n");
- _ERROR(RTLD_E_MAP_ALLOC, -1);
- }
-
- for (uint32_t i = 0; i < _symbol_map.nbucket; i++)
- _symbol_map.bucket[i] = 0xffffffff;
- for (uint32_t i = 0; i < entries; i++)
- _symbol_map.chain[i] = 0xffffffff;
-
- // Go again through the symbol map and fill in the hash table.
- uint32_t index = 0;
- for (uint32_t pos = 0; (pos < size) && ptr[pos]; pos++) {
- char name[64];
- char type_string[2];
- uint64_t address64;
+ for (int pos = 0; (pos < size) && ptr[pos]; pos++) {
+ uint64_t full_addr;
+ char name[64], type_string[4];
size_t _size;
// e.g. "main T ffffffff80000000 100 ...\n"
- int32_t parsed = sscanf(
+ int parsed = sscanf(
&(ptr[pos]),
"%63s %1s %Lx %x",
name,
type_string,
- &address64,
+ &full_addr,
&_size // Optional, unused (yet)
);
if (parsed >= 3) {
// Drop the upper 32 bits of the address (for some reason MIPS nm
- // insists on printing 64-bit addresses... wtf) and normalize the
- // type letter to upper case, then check if the entry is valid and
- // non-null.
- void *address = (void *) address64;
- char _type = toupper(type_string[0]);
- uint32_t hash = _elf_hash(name);
- uint32_t hash_mod = hash % _symbol_map.nbucket;
-
- if (address && (
+ // insists on printing 64-bit addresses... wtf) and check if the
+ // entry is valid and non-null.
+ void *addr = (void *) ((uint32_t) full_addr);
+ char _type = toupper(type_string[0]);
+
+ if (addr && (
(_type == 'T') || // .text
(_type == 'R') || // .rodata
(_type == 'D') || // .data
(_type == 'B') // .bss
)) {
//_sdk_log(
- //"psxetc: map sym: %08x,%08x [%c %s]\n",
- //address, _size, _type, name
+ //"map sym: %08x,%08x [%c %s]\n",
+ //addr, _size, _type, name
//);
- MapEntry *entry = &(_symbol_map.entries[index]);
- entry->hash = hash;
- entry->ptr = address;
-
- // Append a reference to the entry to the hash table's chain
- // for the current hash_mod. I can't explain this properly.
- uint32_t *hash_entry = &(_symbol_map.bucket[hash_mod]);
- while (*hash_entry != 0xffffffff)
- hash_entry = &(_symbol_map.chain[*hash_entry]);
-
- *hash_entry = index;
- index++;
+ DL_AddMapSymbol(name, addr);
+ entries++;
}
}
@@ -269,92 +238,61 @@ int32_t DL_ParseSymbolMap(const char *ptr, size_t size) {
pos++;
}
- _sdk_log("psxetc: parsed %d symbols\n", entries);
- if (!entries)
- _ERROR(RTLD_E_NO_SYMBOLS, -1);
-
- return entries;
-}
-
-#ifdef USE_FILE_API
-int32_t DL_LoadSymbolMapFromFile(const char *filename) {
- size_t size;
- char *ptr = _dl_load_file(filename, &size);
- if (!ptr)
- return -1;
-
- int32_t entries = DL_ParseSymbolMap(ptr, size);
- free(ptr);
-
+ _sdk_log("parsed %d symbols\n", entries);
return entries;
}
-#endif
-
-void DL_UnloadSymbolMap(void) {
- if (!_symbol_map.entries)
- return;
- free(_symbol_map.entries);
- free(_symbol_map.bucket);
- free(_symbol_map.chain);
- _symbol_map.entries = 0;
-}
-
-void *DL_GetSymbolByName(const char *name) {
+void *DL_GetMapSymbol(const char *name) {
if (!_symbol_map.entries) {
- _sdk_log("psxetc: attempted lookup with no map loaded\n");
- _ERROR(RTLD_E_NO_MAP, 0);
+ _sdk_log("DL_GetMapSymbol() with no map loaded\n");
+ return 0;
}
- // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
- uint32_t hash = _elf_hash(name);
- uint32_t hash_mod = hash % _symbol_map.nbucket;
-
// Go through the hash table's chain until the symbol hash matches the one
// calculated.
- for (uint32_t i = _symbol_map.bucket[hash_mod]; i != 0xffffffff;) {
+ // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
+ uint32_t hash = _elf_hash(name);
+
+ for (int i = _symbol_map.bucket[hash % _symbol_map.nbucket]; i != 0xffffffff;) {
if (i >= _symbol_map.nchain) {
_sdk_log(
- "psxetc: GetSymbolByName() index out of bounds (%d >= %d)\n",
+ "DL_GetMapSymbol() index out of bounds (%d >= %d)\n",
i, _symbol_map.nchain
);
- _ERROR(RTLD_E_HASH_LOOKUP, 0);
+ return 0;
}
MapEntry *entry = &(_symbol_map.entries[i]);
if (hash == entry->hash) {
- //_sdk_log("psxetc: map lookup [%s = %08x]\n", name, entry->ptr);
+ //_sdk_log("map lookup [%s = %08x]\n", name, entry->ptr);
return entry->ptr;
}
i = _symbol_map.chain[i];
}
- _sdk_log("psxetc: map lookup [%s not found]\n", name);
- _ERROR(RTLD_E_MAP_SYMBOL, 0);
+ _sdk_log("map lookup [%s not found]\n", name);
+ return 0;
}
-void DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) {
+void *DL_SetResolveCallback(void *(*callback)(DLL *, const char *)) {
+ void *old_callback = _dl_resolve_callback;
_dl_resolve_callback = callback;
+
+ return old_callback;
}
/* Library loading and linking API */
-DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
- if (!ptr)
- _ERROR(RTLD_E_DLL_NULL, 0);
-
- DLL *dll = malloc(sizeof(DLL));
- if (!dll) {
- _sdk_log("psxetc: unable to allocate DLL struct\n");
- _ERROR(RTLD_E_DLL_ALLOC, 0);
- }
+DLL *DL_CreateDLL(DLL *dll, void *ptr, size_t size, DL_ResolveMode mode) {
+ if (!dll || !ptr)
+ return 0;
dll->ptr = ptr;
- dll->malloc_ptr = (mode & RTLD_FREE_ON_DESTROY) ? ptr : 0;
+ dll->malloc_ptr = (mode & DL_FREE_ON_DESTROY) ? ptr : 0;
dll->size = size;
- _sdk_log("psxetc: initializing DLL at %08x\n", ptr);
+ _sdk_log("initializing DLL at %08x\n", ptr);
// Interpret the key-value pairs in the .dynamic section to obtain info
// about all the other sections. The pairs are null-terminated, which makes
@@ -363,52 +301,35 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
uint32_t first_got_sym = 0;
for (Elf32_Dyn *dyn = (Elf32_Dyn *) ptr; dyn->d_tag; dyn++) {
- //_sdk_log("psxetc: .dynamic %08x=%08x ", dyn->d_tag, dyn->d_un.d_val);
+ //_sdk_log(".dynamic %08x=%08x ", dyn->d_tag, dyn->d_un.d_val);
switch (dyn->d_tag) {
// Offset of .got section
case DT_PLTGOT:
- //_sdk_log("[PLTGOT]\n");
-
dll->got = (void *) (ptr + dyn->d_un.d_val);
break;
// Offset of .hash section
case DT_HASH:
- //_sdk_log("[HASH]\n");
-
dll->hash = (void *) (ptr + dyn->d_un.d_val);
break;
// Offset of .dynstr (NOT .strtab) section
case DT_STRTAB:
- //_sdk_log("[STRTAB]\n");
-
dll->strtab = (void *) (ptr + dyn->d_un.d_val);
break;
// Offset of .dynsym (NOT .symtab) section
case DT_SYMTAB:
- //_sdk_log("[SYMTAB]\n");
-
dll->symtab = (void *) (ptr + dyn->d_un.d_val);
break;
- // Length of .dynstr section
- //case DT_STRSZ:
- //_sdk_log("[STRSZ]\n");
- //break;
-
// Length of each .dynsym entry
case DT_SYMENT:
- //_sdk_log("[SYMENT]\n");
-
// Only 16-byte symbol table entries are supported.
if (dyn->d_un.d_val != sizeof(Elf32_Sym)) {
- free(dll);
-
- _sdk_log("psxetc: invalid DLL symtab entry size %d\n", dyn->d_un.d_val);
- _ERROR(RTLD_E_DLL_FORMAT, 0);
+ _sdk_log("invalid DLL symtab entry size %d\n", dyn->d_un.d_val);
+ return 0;
}
break;
@@ -418,73 +339,44 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
// Versions other than 1 are unsupported (do they even exist?).
if (dyn->d_un.d_val != 1) {
- free(dll);
-
- _sdk_log("psxetc: invalid DLL version %d\n", dyn->d_un.d_val);
- _ERROR(RTLD_E_DLL_FORMAT, 0);
+ _sdk_log("invalid DLL version %d\n", dyn->d_un.d_val);
+ return 0;
}
break;
// DLL/ABI flags
case DT_MIPS_FLAGS:
- //_sdk_log("[MIPS_FLAGS]\n");
-
// Shortcut pointers (whatever they are) are not supported.
if (dyn->d_un.d_val & RHF_QUICKSTART) {
- free(dll);
-
- _sdk_log("psxetc: invalid DLL flags\n");
- _ERROR(RTLD_E_DLL_FORMAT, 0);
+ _sdk_log("invalid DLL flags\n");
+ return 0;
}
break;
// Number of local (not to resolve) GOT entries
case DT_MIPS_LOCAL_GOTNO:
- //_sdk_log("[MIPS_LOCAL_GOTNO]\n");
-
local_got_len = dyn->d_un.d_val;
break;
// Base address DLL was compiled for
case DT_MIPS_BASE_ADDRESS:
- //_sdk_log("[MIPS_BASE_ADDRESS]\n");
-
// Base addresses other than zero are not supported. It would
// be easy enough to support them, but why?
if (dyn->d_un.d_val) {
- free(dll);
-
- _sdk_log("psxetc: invalid DLL base address %08x\n", dyn->d_un.d_val);
- _ERROR(RTLD_E_DLL_FORMAT, 0);
+ _sdk_log("invalid DLL base address %08x\n", dyn->d_un.d_val);
+ return 0;
}
break;
// Number of symbol table entries
case DT_MIPS_SYMTABNO:
- //_sdk_log("[MIPS_SYMTABNO]\n");
-
dll->symbol_count = dyn->d_un.d_val;
break;
- // Index of first unresolved symbol table entry
- //case DT_MIPS_UNREFEXTNO:
- //_sdk_log("[MIPS_UNREFEXTNO]\n");
- //break;
-
// Index of first symbol table entry which has a matching GOT entry
case DT_MIPS_GOTSYM:
- //_sdk_log("[MIPS_GOTSYM]\n");
-
first_got_sym = dyn->d_un.d_val;
break;
-
- // Number of pages the GOT is split into (does not apply to PS1)
- //case DT_MIPS_HIPAGENO:
- //_sdk_log("[MIPS_HIPAGENO]\n");
- //break;
-
- //default:
- //_sdk_log("[ignored]\n");
}
}
@@ -497,7 +389,7 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
dll->got_length = local_got_len + (dll->symbol_count - first_got_sym) - 2;
_sdk_log(
- "psxetc: %d symbols, %d GOT entries\n",
+ "%d symbols, %d GOT entries\n",
dll->symbol_count, dll->got_length
);
@@ -510,14 +402,14 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
dll->got[0] = (uint32_t) &_dl_resolve_wrapper;
dll->got[1] = (uint32_t) dll;
- for (uint32_t i = 0; i < dll->got_length; i++)
+ for (int i = 0; i < dll->got_length; i++)
dll->got[2 + i] += (uint32_t) ptr;
// Fix addresses in the symbol table.
// TODO: clean this shit up
uint32_t got_offset = first_got_sym;
- for (uint32_t i = 0; i < dll->symbol_count; i++) {
+ for (int i = 0; i < dll->symbol_count; i++) {
Elf32_Sym *sym = &(dll->symtab[i]);
const char *_name = &(dll->strtab[sym->st_name]);
@@ -526,16 +418,16 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
sym->st_value += (uint32_t) ptr;
//_sdk_log(
- //"psxetc: DLL sym: %08x,%08x [%s]\n",
+ //"DLL sym: %08x,%08x [%s]\n",
//sym->st_value, sym->st_size, _name
//);
- // If RTLD_NOW was passed, resolve GOT entries ahead of time by
+ // If DL_NOW was passed, resolve GOT entries ahead of time by
// cross-referencing them with the symbol table.
- if (!(mode & RTLD_NOW))
+ if (!(mode & DL_NOW))
continue;
- for (uint32_t j = got_offset; j < dll->got_length; j++) {
+ for (int j = got_offset; j < dll->got_length; j++) {
if (dll->got[2 + j] != (uint32_t) sym->st_value)
continue;
@@ -550,10 +442,8 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
)) {
dll->got[2 + j] = (uint32_t) _dl_resolve_callback(dll, _name);
- if (!dll->got[2 + j]) {
- free(dll);
- _ERROR(RTLD_E_MAP_SYMBOL, 0);
- }
+ if (!dll->got[2 + j])
+ return 0;
}
break;
@@ -570,7 +460,7 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
// DLL itself.
const uint32_t *ctor_list = DL_GetDLLSymbol(dll, "__CTOR_LIST__");
if (ctor_list) {
- for (uint32_t i = ((uint32_t) ctor_list[0]); i >= 1; i--) {
+ for (int i = ((int) ctor_list[0]); i >= 1; i--) {
void (*ctor)(void) = (void (*)(void)) ctor_list[i];
DL_PRE_CALL(ctor);
ctor();
@@ -580,84 +470,60 @@ DLL *DL_CreateDLL(void *ptr, size_t size, DL_ResolveMode mode) {
return dll;
}
-#ifdef USE_FILE_API
-DLL *DL_LoadDLLFromFile(const char *filename, DL_ResolveMode mode) {
- size_t size;
- char *ptr = _dl_load_file(filename, &size);
- if (!ptr)
- return 0;
-
- DLL *dll = DL_CreateDLL(ptr, size, mode | RTLD_FREE_ON_DESTROY);
- if (!dll)
- free(ptr);
-
- return dll;
-}
-#endif
-
void DL_DestroyDLL(DLL *dll) {
- if (dll == RTLD_DEFAULT)
+ if (!dll)
return;
if (dll->ptr) {
// Call the DLL's global destructors.
const uint32_t *dtor_list = DL_GetDLLSymbol(dll, "__DTOR_LIST__");
if (dtor_list) {
- for (uint32_t i = 0; i < ((uint32_t) dtor_list[0]); i++) {
+ for (int i = 0; i < ((int) dtor_list[0]); i++) {
void (*dtor)(void) = (void (*)(void)) dtor_list[i + 1];
DL_PRE_CALL(dtor);
dtor();
}
}
+
+ dll->ptr = 0;
}
- // If the DLL is associated to a buffer allocated by DL_LoadDLLFromFile(),
- // free that buffer.
- if (dll->malloc_ptr)
+ // If the DLL is associated to a buffer, free that buffer.
+ if (dll->malloc_ptr) {
free(dll->malloc_ptr);
-
- free(dll);
+ dll->malloc_ptr = 0;
+ }
}
void *DL_GetDLLSymbol(const DLL *dll, const char *name) {
- if (dll == RTLD_DEFAULT)
- return DL_GetSymbolByName(name);
- //return _dl_resolve_callback(RTLD_DEFAULT, name);
+ if (!dll)
+ return DL_GetMapSymbol(name);
+ //return _dl_resolve_callback(0, name);
- // https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
uint32_t nbucket = dll->hash[0];
uint32_t nchain = dll->hash[1];
const uint32_t *bucket = &(dll->hash[2]);
const uint32_t *chain = &(dll->hash[2 + nbucket]);
- uint32_t hash_mod = _elf_hash(name) % nbucket;
-
// Go through the hash table's chain until the symbol name matches the one
// provided.
- for (uint32_t i = bucket[hash_mod]; i != 0xffffffff;) {
+ for (int i = bucket[_elf_hash(name) % nbucket]; i != 0xffffffff;) {
if (i >= nchain) {
- _sdk_log("psxetc: DL_GetDLLSymbol() index out of bounds (%d >= %d)\n", i, nchain);
- _ERROR(RTLD_E_HASH_LOOKUP, 0);
+ _sdk_log("DL_GetDLLSymbol() index out of bounds (%d >= %d)\n", i, nchain);
+ return 0;
}
Elf32_Sym *sym = &(dll->symtab[i]);
const char *_name = &(dll->strtab[sym->st_name]);
if (!strcmp(name, _name)) {
- //_sdk_log("psxetc: DLL lookup [%s = %08x]\n", name, sym->st_value);
+ //_sdk_log("DLL lookup [%s = %08x]\n", name, sym->st_value);
return sym->st_value;
}
i = chain[i];
}
- _sdk_log("psxetc: DLL lookup [%s not found]\n", name);
- _ERROR(RTLD_E_DLL_SYMBOL, 0);
-}
-
-DL_Error DL_GetLastError(void) {
- DL_Error last = _error_code;
- _error_code = RTLD_E_NONE;
-
- return last;
+ _sdk_log("DLL lookup [%s not found]\n", name);
+ return 0;
}
diff --git a/libpsn00b/psxetc/interrupts.c b/libpsn00b/psxetc/interrupts.c
index cc9d12c..0d926c4 100644
--- a/libpsn00b/psxetc/interrupts.c
+++ b/libpsn00b/psxetc/interrupts.c
@@ -98,7 +98,7 @@ static void _global_dma_handler(void) {
/* IRQ and DMA handler API */
-void *InterruptCallback(int irq, void (*func)(void)) {
+void *InterruptCallback(IRQ_Channel irq, void (*func)(void)) {
if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS))
return 0;
@@ -115,14 +115,14 @@ void *InterruptCallback(int irq, void (*func)(void)) {
return old_callback;
}
-void *GetInterruptCallback(int irq) {
+void *GetInterruptCallback(IRQ_Channel irq) {
if ((irq < 0) || (irq >= NUM_IRQ_CHANNELS))
return 0;
return _irq_handlers[irq];
}
-void *DMACallback(int dma, void (*func)(void)) {
+void *DMACallback(DMA_Channel dma, void (*func)(void)) {
if ((dma < 0) || (dma >= NUM_DMA_CHANNELS))
return 0;
@@ -150,7 +150,7 @@ void *DMACallback(int dma, void (*func)(void)) {
return old_callback;
}
-void *GetDMACallback(int dma) {
+void *GetDMACallback(DMA_Channel dma) {
if ((dma < 0) || (dma >= NUM_DMA_CHANNELS))
return 0;
diff --git a/libpsn00b/psxgpu/common.c b/libpsn00b/psxgpu/common.c
index a262472..9f45f10 100644
--- a/libpsn00b/psxgpu/common.c
+++ b/libpsn00b/psxgpu/common.c
@@ -4,6 +4,7 @@
*/
#include <stdint.h>
+#include <assert.h>
#include <psxetc.h>
#include <psxapi.h>
#include <psxgpu.h>
@@ -37,6 +38,9 @@ static volatile uint16_t _last_hblank;
/* Private interrupt handlers */
+#define _ENTER_CRITICAL() uint16_t mask = IRQ_MASK; IRQ_MASK = 0;
+#define _EXIT_CRITICAL() IRQ_MASK = mask;
+
static void _vblank_handler(void) {
_vblank_counter++;
@@ -50,9 +54,10 @@ static void _gpu_dma_handler(void) {
__asm__ volatile("");
if (--_queue_length) {
- volatile QueueEntry *entry = &_draw_queue[_queue_head++];
- _queue_head %= QUEUE_LENGTH;
+ int head = _queue_head;
+ _queue_head = (head + 1) % QUEUE_LENGTH;
+ volatile QueueEntry *entry = &_draw_queue[head];
entry->func(entry->arg1, entry->arg2, entry->arg3);
} else {
GPU_GP1 = 0x04000000; // Disable DMA request
@@ -75,7 +80,7 @@ void ResetGraph(int mode) {
_gpu_video_mode = (GPU_GP1 >> 20) & 1;
ExitCriticalSection();
- _sdk_log("psxgpu: setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC");
+ _sdk_log("setup done, default mode is %s\n", _gpu_video_mode ? "PAL" : "NTSC");
}
if (mode == 3) {
@@ -113,8 +118,7 @@ static void _default_vsync_halt(void) {
return;
}
- _sdk_log("psxgpu: VSync() timeout\n");
- _sdk_dump_log();
+ _sdk_log("VSync() timeout\n");
ChangeClearPAD(0);
ChangeClearRCnt(3, 0);
}
@@ -130,7 +134,6 @@ int VSync(int mode) {
// Wait for at least one vertical blank event to occur.
do {
- _sdk_dump_log();
_vsync_halt_func();
// If interlaced mode is enabled, wait until the GPU starts displaying
@@ -146,19 +149,22 @@ int VSync(int mode) {
}
void *VSyncHaltFunction(void (*func)(void)) {
+ //_ENTER_CRITICAL();
+
void *old_callback = _vsync_halt_func;
_vsync_halt_func = func;
+ //_EXIT_CRITICAL();
return old_callback;
}
void *VSyncCallback(void (*func)(void)) {
- EnterCriticalSection();
+ _ENTER_CRITICAL();
void *old_callback = _vsync_callback;
_vsync_callback = func;
- ExitCriticalSection();
+ _EXIT_CRITICAL();
return old_callback;
}
@@ -177,37 +183,36 @@ int EnqueueDrawOp(
// to checking if DMA is busy; disabling them afterwards would create a
// race condition where the DMA transfer could end while interrupts are
// being disabled. Interrupts are disabled through the IRQ_MASK register
- // rather than by calling EnterCriticalSection() for performance reasons.
- uint16_t mask = IRQ_MASK;
- IRQ_MASK = 0;
-
- if (_queue_length) {
- if (_queue_length >= QUEUE_LENGTH) {
- IRQ_MASK = mask;
- _sdk_log("psxgpu: draw queue overflow, dropping commands\n");
- return -1;
- }
+ // rather than via syscalls for performance reasons.
+ _ENTER_CRITICAL();
+ int length = _queue_length;
- int length = _queue_length;
- _queue_length = length + 1;
+ if (!length) {
+ _queue_length = 1;
+ _EXIT_CRITICAL();
- volatile QueueEntry *entry = &_draw_queue[_queue_tail++];
- _queue_tail %= QUEUE_LENGTH;
-
- entry->func = func;
- entry->arg1 = arg1;
- entry->arg2 = arg2;
- entry->arg3 = arg3;
+ func(arg1, arg2, arg3);
+ return 0;
+ }
+ if (length >= QUEUE_LENGTH) {
+ _EXIT_CRITICAL();
- IRQ_MASK = mask;
- return length;
+ _sdk_log("draw queue overflow, dropping commands\n");
+ return -1;
}
- _queue_length = 1;
+ int tail = _queue_tail;
+ _queue_tail = (tail + 1) % QUEUE_LENGTH;
+ _queue_length = length + 1;
+
+ volatile QueueEntry *entry = &_draw_queue[tail];
+ entry->func = func;
+ entry->arg1 = arg1;
+ entry->arg2 = arg2;
+ entry->arg3 = arg3;
- IRQ_MASK = mask;
- func(arg1, arg2, arg3);
- return 0;
+ _EXIT_CRITICAL();
+ return length;
}
int DrawSync(int mode) {
@@ -230,20 +235,19 @@ int DrawSync(int mode) {
while (!(GPU_GP1 & (1 << 26)))
__asm__ volatile("");
} else {
- _sdk_log("psxgpu: DrawSync() timeout\n");
- _sdk_dump_log();
+ _sdk_log("DrawSync() timeout\n");
}
return _queue_length;
}
void *DrawSyncCallback(void (*func)(void)) {
- EnterCriticalSection();
+ _ENTER_CRITICAL();
void *old_callback = _drawsync_callback;
_drawsync_callback = func;
- ExitCriticalSection();
+ _EXIT_CRITICAL();
return old_callback;
}
diff --git a/libpsn00b/psxgpu/image.c b/libpsn00b/psxgpu/image.c
index 968dde5..bbdb7c8 100644
--- a/libpsn00b/psxgpu/image.c
+++ b/libpsn00b/psxgpu/image.c
@@ -4,7 +4,7 @@
*/
#include <stdint.h>
-#include <psxetc.h>
+#include <assert.h>
#include <psxgpu.h>
#include <hwregs_c.h>
@@ -15,11 +15,11 @@
static void _dma_transfer(const RECT *rect, uint32_t *data, int write) {
size_t length = rect->w * rect->h;
if (length % 2)
- _sdk_log("psxgpu: can't transfer an odd number of pixels\n");
+ _sdk_log("can't transfer an odd number of pixels\n");
length /= 2;
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
- _sdk_log("psxgpu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
+ _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
length += DMA_CHUNK_LENGTH - 1;
}
diff --git a/libpsn00b/psxgte/applymatrixlv.s b/libpsn00b/psxgte/applymatrixlv.s
deleted file mode 100644
index 3180d0f..0000000
--- a/libpsn00b/psxgte/applymatrixlv.s
+++ /dev/null
@@ -1,40 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global ApplyMatrixLV
-.type ApplyMatrixLV, @function
-ApplyMatrixLV:
-
- # Load matrix to GTE
- lw $t0, 0($a0)
- lw $t1, 4($a0)
- ctc2 $t0, $0
- ctc2 $t1, $1
- lw $t0, 8($a0)
- lw $t1, 12($a0)
- lhu $t2, 16($a0)
- ctc2 $t0, $2
- ctc2 $t1, $3
- ctc2 $t2, $4
-
- lw $t0, 0($a1)
- lw $t1, 4($a1)
- mtc2 $t0, C2_IR1
- lw $t0, 8($a1)
- mtc2 $t1, C2_IR2
- mtc2 $t0, C2_IR3
-
- nMVMVA(1, 0, 3, 3, 0)
-
- swc2 C2_IR1, 0($a2)
- swc2 C2_IR2, 4($a2)
- swc2 C2_IR3, 8($a2)
-
- jr $ra
- move $v0, $a2
- \ No newline at end of file
diff --git a/libpsn00b/psxgte/compmatrixlv.s b/libpsn00b/psxgte/compmatrixlv.s
deleted file mode 100644
index 2908eb9..0000000
--- a/libpsn00b/psxgte/compmatrixlv.s
+++ /dev/null
@@ -1,100 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.set MATRIX_r11r12, 0
-.set MATRIX_r13r21, 4
-.set MATRIX_r22r23, 8
-.set MATRIX_r31r32, 12
-.set MATRIX_r33, 16
-.set MATRIX_trx, 20
-.set MATRIX_try, 24
-.set MATRIX_trz, 28
-
-
-.global CompMatrixLV
-.type CompMatrixLV, @function
-CompMatrixLV:
-
- # Load matrix v0 to GTE
- lw $t0, MATRIX_r11r12($a0)
- lw $t1, MATRIX_r13r21($a0)
- ctc2 $t0, C2_R11R12
- ctc2 $t1, C2_R13R21
- lw $t0, MATRIX_r22r23($a0)
- lw $t1, MATRIX_r31r32($a0)
- lhu $t2, MATRIX_r33($a0)
- ctc2 $t0, C2_R22R23
- lw $t0, MATRIX_trx($a0)
- ctc2 $t1, C2_R31R32
- lw $t1, MATRIX_try($a0)
- ctc2 $t2, C2_R33
- lw $t2, MATRIX_trz($a0)
- ctc2 $t0, C2_TRX
- ctc2 $t1, C2_TRY
- ctc2 $t2, C2_TRZ
-
- lw $t0, MATRIX_trx($a1)
- lw $t1, MATRIX_try($a1)
- mtc2 $t0, C2_IR1
- lw $t0, MATRIX_trz($a1)
- mtc2 $t1, C2_IR2
- mtc2 $t0, C2_IR3
-
- nMVMVA(1, 0, 3, 0, 0)
-
- swc2 C2_IR1, MATRIX_trx($a2)
- swc2 C2_IR2, MATRIX_try($a2)
- swc2 C2_IR3, MATRIX_trz($a2)
-
- lhu $t1, 2*(0+(3*1))($a1) # Load values for first
- lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(0+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- lhu $t1, 2*(1+(3*1))($a1) # Load values for second
- lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
- MVMVA(1, 0, 0, 3, 0) # First multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(1+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of first
- mfc2 $t1, C2_IR2
- sh $t0, 2*(0+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(0+(3*1))($a2)
- sh $t0, 2*(0+(3*2))($a2)
-
- lhu $t1, 2*(2+(3*1))($a1) # Load values for third
- lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
- MVMVA(1, 0, 0, 3, 0) # Second multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(2+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of second
- mfc2 $t1, C2_IR2
- sh $t0, 2*(1+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(1+(3*1))($a2)
- sh $t0, 2*(1+(3*2))($a2)
- MVMVA(1, 0, 0, 3, 0) # Third multiply
-
- mfc2 $t0, C2_IR1 # Store results of third
- mfc2 $t1, C2_IR2
- sh $t0, 2*(2+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(2+(3*1))($a2)
- sh $t0, 2*(2+(3*2))($a2)
-
- jr $ra
- move $v0, $a2 \ No newline at end of file
diff --git a/libpsn00b/psxgte/hirotmatrix.c b/libpsn00b/psxgte/hirotmatrix.c
deleted file mode 100644
index 56516b0..0000000
--- a/libpsn00b/psxgte/hirotmatrix.c
+++ /dev/null
@@ -1,35 +0,0 @@
-#include <psxgte.h>
-
-MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) {
-
- short s[3],c[3];
- MATRIX tm[3];
-
- s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz);
- c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz);
-
- // mX
- m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0;
- m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0];
- m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0];
-
- // mY
- tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1];
- tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0;
- tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1];
-
- // mZ
- tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0;
- tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0;
- tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE;
-
- PushMatrix();
-
- MulMatrix0( m, &tm[0], &tm[2] );
- MulMatrix0( &tm[2], &tm[1], m );
-
- PopMatrix();
-
- return m;
-
-}
diff --git a/libpsn00b/psxgte/hisin.c b/libpsn00b/psxgte/hisin.c
deleted file mode 100644
index 68d5d28..0000000
--- a/libpsn00b/psxgte/hisin.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Based on isin_S4 implementation from coranac:
- * http://www.coranac.com/2009/07/sines/
- *
- */
-
-#define qN 15
-#define qA 12
-#define B 19900
-#define C 3516
-
-int hisin(int x) {
-
- int c, x2, y;
-
- c= x<<(30-qN); // Semi-circle info into carry.
- x -= 1<<qN; // sine -> cosine calc
-
- x= x<<(31-qN); // Mask with PI
- x= x>>(31-qN); // Note: SIGNED shift! (to qN)
- x= x*x>>(2*qN-14); // x=x^2 To Q14
-
- y= B - (x*C>>14); // B - x^2*C
- y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C)
-
- return c>=0 ? y : -y;
-
-}
-
-int hicos(int x) {
-
- return hisin( x+32768 );
-
-}
diff --git a/libpsn00b/psxgte/initgeom.s b/libpsn00b/psxgte/initgeom.s
index d004ecc..6d1e36a 100644
--- a/libpsn00b/psxgte/initgeom.s
+++ b/libpsn00b/psxgte/initgeom.s
@@ -2,9 +2,7 @@
.include "gtereg.inc"
-.section .text
-
-
+.section .text.InitGeom
.global InitGeom
.type InitGeom, @function
InitGeom:
diff --git a/libpsn00b/psxgte/isin.c b/libpsn00b/psxgte/isin.c
index 79e2970..a0397bd 100644
--- a/libpsn00b/psxgte/isin.c
+++ b/libpsn00b/psxgte/isin.c
@@ -1,34 +1,46 @@
-/* Based on isin_S4 implementation from coranac:
- * http://www.coranac.com/2009/07/sines/
+/*
+ * PSn00bSDK (incomplete) trigonometry library
+ * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
*
+ * Based on isin_S4 implementation from coranac:
+ * https://www.coranac.com/2009/07/sines
*/
-#define qN 10
-#define qA 12
-#define B 19900
-#define C 3516
+#define qN_l 10
+#define qN_h 15
+#define qA 12
+#define B 19900
+#define C 3516
-int isin(int x) {
-
- int c, x2, y;
+static inline int _isin(int qN, int x) {
+ int c, x2, y;
- c= x<<(30-qN); // Semi-circle info into carry.
- x -= 1<<qN; // sine -> cosine calc
+ c = x << (30 - qN); // Semi-circle info into carry.
+ x -= 1 << qN; // sine -> cosine calc
- x= x<<(31-qN); // Mask with PI
- x= x>>(31-qN); // Note: SIGNED shift! (to qN)
+ x <<= (31 - qN); // Mask with PI
+ x >>= (31 - qN); // Note: SIGNED shift! (to qN)
+ x *= x;
+ x >>= (2 * qN - 14); // x=x^2 To Q14
- x= x*x>>(2*qN-14); // x=x^2 To Q14
+ y = B - (x * C >> 14); // B - x^2*C
+ y = (1 << qA) - (x * y >> 16); // A - x^2*(B-x^2*C)
- y= B - (x*C>>14); // B - x^2*C
- y= (1<<qA)-(x*y>>16); // A - x^2*(B-x^2*C)
-
- return c>=0 ? y : -y;
+ return (c >= 0) ? y : (-y);
+}
+int isin(int x) {
+ return _isin(qN_l, x);
}
int icos(int x) {
+ return _isin(qN_l, x + (1 << qN_l));
+}
- return isin( x+1024 );
+int hisin(int x) {
+ return _isin(qN_h, x);
+}
+int hicos(int x) {
+ return _isin(qN_h, x + (1 << qN_h));
}
diff --git a/libpsn00b/psxgte/matrix.c b/libpsn00b/psxgte/matrix.c
index b4dea12..805fd1b 100644
--- a/libpsn00b/psxgte/matrix.c
+++ b/libpsn00b/psxgte/matrix.c
@@ -1,7 +1,6 @@
#include <psxgte.h>
MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) {
-
short s[3],c[3];
MATRIX tm[3];
@@ -24,22 +23,47 @@ MATRIX *RotMatrix(SVECTOR *r, MATRIX *m) {
tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE;
PushMatrix();
-
MulMatrix0( m, &tm[0], &tm[2] );
MulMatrix0( &tm[2], &tm[1], m );
-
PopMatrix();
return m;
+}
+
+MATRIX *HiRotMatrix(VECTOR *r, MATRIX *m) {
+ short s[3],c[3];
+ MATRIX tm[3];
+
+ s[0] = hisin(r->vx); s[1] = hisin(r->vy); s[2] = hisin(r->vz);
+ c[0] = hicos(r->vx); c[1] = hicos(r->vy); c[2] = hicos(r->vz);
+ // mX
+ m->m[0][0] = ONE; m->m[0][1] = 0; m->m[0][2] = 0;
+ m->m[1][0] = 0; m->m[1][1] = c[0]; m->m[1][2] = -s[0];
+ m->m[2][0] = 0; m->m[2][1] = s[0]; m->m[2][2] = c[0];
+
+ // mY
+ tm[0].m[0][0] = c[1]; tm[0].m[0][1] = 0; tm[0].m[0][2] = s[1];
+ tm[0].m[1][0] = 0; tm[0].m[1][1] = ONE; tm[0].m[1][2] = 0;
+ tm[0].m[2][0] = -s[1]; tm[0].m[2][1] = 0; tm[0].m[2][2] = c[1];
+
+ // mZ
+ tm[1].m[0][0] = c[2]; tm[1].m[0][1] = -s[2]; tm[1].m[0][2] = 0;
+ tm[1].m[1][0] = s[2]; tm[1].m[1][1] = c[2]; tm[1].m[1][2] = 0;
+ tm[1].m[2][0] = 0; tm[1].m[2][1] = 0; tm[1].m[2][2] = ONE;
+
+ PushMatrix();
+ MulMatrix0( m, &tm[0], &tm[2] );
+ MulMatrix0( &tm[2], &tm[1], m );
+ PopMatrix();
+
+ return m;
}
MATRIX *TransMatrix(MATRIX *m, VECTOR *r) {
-
m->t[0] = r->vx;
m->t[1] = r->vy;
m->t[2] = r->vz;
return m;
-
}
diff --git a/libpsn00b/psxgte/matrix.s b/libpsn00b/psxgte/matrix.s
new file mode 100644
index 0000000..9de0ccd
--- /dev/null
+++ b/libpsn00b/psxgte/matrix.s
@@ -0,0 +1,439 @@
+.set noreorder
+
+.include "gtereg.inc"
+.include "inline_s.inc"
+
+.set MATRIX_r11r12, 0
+.set MATRIX_r13r21, 4
+.set MATRIX_r22r23, 8
+.set MATRIX_r31r32, 12
+.set MATRIX_r33, 16
+.set MATRIX_trx, 20
+.set MATRIX_try, 24
+.set MATRIX_trz, 28
+
+.section .text.ApplyMatrixLV
+.global ApplyMatrixLV
+.type ApplyMatrixLV, @function
+ApplyMatrixLV:
+ # Load matrix to GTE
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a0)
+ lw $t1, 12($a0)
+ lhu $t2, 16($a0)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lw $t0, 0($a1)
+ lw $t1, 4($a1)
+ mtc2 $t0, C2_IR1
+ lw $t0, 8($a1)
+ mtc2 $t1, C2_IR2
+ mtc2 $t0, C2_IR3
+
+ nMVMVA(1, 0, 3, 3, 0)
+
+ swc2 C2_IR1, 0($a2)
+ swc2 C2_IR2, 4($a2)
+ swc2 C2_IR3, 8($a2)
+
+ jr $ra
+ move $v0, $a2
+
+.section .text.CompMatrixLV
+.global CompMatrixLV
+.type CompMatrixLV, @function
+CompMatrixLV:
+ # Load matrix v0 to GTE
+ lw $t0, MATRIX_r11r12($a0)
+ lw $t1, MATRIX_r13r21($a0)
+ ctc2 $t0, C2_R11R12
+ ctc2 $t1, C2_R13R21
+ lw $t0, MATRIX_r22r23($a0)
+ lw $t1, MATRIX_r31r32($a0)
+ lhu $t2, MATRIX_r33($a0)
+ ctc2 $t0, C2_R22R23
+ lw $t0, MATRIX_trx($a0)
+ ctc2 $t1, C2_R31R32
+ lw $t1, MATRIX_try($a0)
+ ctc2 $t2, C2_R33
+ lw $t2, MATRIX_trz($a0)
+ ctc2 $t0, C2_TRX
+ ctc2 $t1, C2_TRY
+ ctc2 $t2, C2_TRZ
+
+ lw $t0, MATRIX_trx($a1)
+ lw $t1, MATRIX_try($a1)
+ mtc2 $t0, C2_IR1
+ lw $t0, MATRIX_trz($a1)
+ mtc2 $t1, C2_IR2
+ mtc2 $t0, C2_IR3
+
+ nMVMVA(1, 0, 3, 0, 0)
+
+ swc2 C2_IR1, MATRIX_trx($a2)
+ swc2 C2_IR2, MATRIX_try($a2)
+ swc2 C2_IR3, MATRIX_trz($a2)
+
+ lhu $t1, 2*(0+(3*1))($a1) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a1) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a2)
+ sh $t0, 2*(0+(3*2))($a2)
+
+ lhu $t1, 2*(2+(3*1))($a1) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a2)
+ sh $t0, 2*(1+(3*2))($a2)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a2)
+ sh $t0, 2*(2+(3*2))($a2)
+
+ jr $ra
+ move $v0, $a2
+
+.section .text.MulMatrix
+.global MulMatrix
+.type MulMatrix, @function
+MulMatrix:
+ # Load m1 to GTE
+ lw $t0, 0($a1)
+ lw $t1, 4($a1)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a1)
+ lw $t1, 12($a1)
+ lhu $t2, 16($a1)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lhu $t1, 2*(0+(3*1))($a0) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a0) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a0)
+ sh $t0, 2*(0+(3*2))($a0)
+
+ lhu $t1, 2*(2+(3*1))($a0) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a0)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a0)
+ sh $t0, 2*(1+(3*2))($a0)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a0)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a0)
+ sh $t0, 2*(2+(3*2))($a0)
+
+ jr $ra
+ move $v0, $a0
+
+.section .text.MulMatrix0
+.global MulMatrix0
+.type MulMatrix0, @function
+MulMatrix0:
+ # Load m1 to GTE
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ ctc2 $t0, $0
+ ctc2 $t1, $1
+ lw $t0, 8($a0)
+ lw $t1, 12($a0)
+ lhu $t2, 16($a0)
+ ctc2 $t0, $2
+ ctc2 $t1, $3
+ ctc2 $t2, $4
+
+ lhu $t1, 2*(0+(3*1))($a1) # Load values for first
+ lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(0+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ lhu $t1, 2*(1+(3*1))($a1) # Load values for second
+ lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
+ MVMVA(1, 0, 0, 3, 0) # First multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(1+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of first
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(0+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(0+(3*1))($a2)
+ sh $t0, 2*(0+(3*2))($a2)
+
+ lhu $t1, 2*(2+(3*1))($a1) # Load values for third
+ lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
+ MVMVA(1, 0, 0, 3, 0) # Second multiply
+ sll $t1, 16
+ or $t0, $t1
+ lhu $t1, 2*(2+(3*2))($a1)
+ mtc2 $t0, C2_VXY0
+ mtc2 $t1, C2_VZ0
+
+ mfc2 $t0, C2_IR1 # Store results of second
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(1+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(1+(3*1))($a2)
+ sh $t0, 2*(1+(3*2))($a2)
+ MVMVA(1, 0, 0, 3, 0) # Third multiply
+
+ mfc2 $t0, C2_IR1 # Store results of third
+ mfc2 $t1, C2_IR2
+ sh $t0, 2*(2+(3*0))($a2)
+ mfc2 $t0, C2_IR3
+ sh $t1, 2*(2+(3*1))($a2)
+ sh $t0, 2*(2+(3*2))($a2)
+
+ jr $ra
+ move $v0, $a2
+
+.section .text.ScaleMatrix
+.global ScaleMatrix
+.type ScaleMatrix, @function
+ScaleMatrix:
+ lwc2 C2_IR0, 0($a1) # X
+
+ lh $v0, 2*(0+(3*0))($a0)
+ lh $v1, 2*(0+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(0+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(0+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(0+(3*1))($a0)
+ sh $v0, 2*(0+(3*2))($a0)
+
+ lwc2 C2_IR0, 4($a1) # Y
+
+ lh $v0, 2*(1+(3*0))($a0)
+ lh $v1, 2*(1+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(1+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(1+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(1+(3*1))($a0)
+ sh $v0, 2*(1+(3*2))($a0)
+
+ lwc2 C2_IR0, 8($a1) # Z
+
+ lh $v0, 2*(2+(3*0))($a0)
+ lh $v1, 2*(2+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(2+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(2+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(2+(3*1))($a0)
+ sh $v0, 2*(2+(3*2))($a0)
+
+ jr $ra
+ move $v0, $a0
+
+.section .text.ScaleMatrixL
+.global ScaleMatrixL
+.type ScaleMatrixL, @function
+ScaleMatrixL:
+ lwc2 C2_IR0, 0($a1) # X
+
+ lh $v0, 2*(0+(3*0))($a0)
+ lh $v1, 2*(1+(3*0))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(2+(3*0))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(0+(3*0))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(1+(3*0))($a0)
+ sh $v0, 2*(2+(3*0))($a0)
+
+ lwc2 C2_IR0, 4($a1) # Y
+
+ lh $v0, 2*(0+(3*1))($a0)
+ lh $v1, 2*(1+(3*1))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(2+(3*1))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(0+(3*1))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(1+(3*1))($a0)
+ sh $v0, 2*(2+(3*1))($a0)
+
+ lwc2 C2_IR0, 8($a1) # Z
+
+ lh $v0, 2*(0+(3*2))($a0)
+ lh $v1, 2*(1+(3*2))($a0)
+ mtc2 $v0, C2_IR1
+ lh $v0, 2*(2+(3*2))($a0)
+ mtc2 $v1, C2_IR2
+ mtc2 $v0, C2_IR3
+
+ nGPF(1)
+
+ mfc2 $v0, C2_IR1
+ mfc2 $v1, C2_IR2
+ sh $v0, 2*(0+(3*2))($a0)
+ mfc2 $v0, C2_IR3
+ sh $v1, 2*(1+(3*2))($a0)
+ sh $v0, 2*(2+(3*2))($a0)
+
+ jr $ra
+ move $v0, $a0
+
+.section .text.PushMatrix
+.global PushMatrix
+.type PushMatrix, @function
+PushMatrix:
+ la $a0, _matrix_stack
+ cfc2 $v0, C2_R11R12
+ cfc2 $v1, C2_R13R21
+ sw $v0, 0($a0)
+ cfc2 $v0, C2_R22R23
+ sw $v1, 4($a0)
+ sw $v0, 8($a0)
+ cfc2 $v0, C2_R31R32
+ cfc2 $v1, C2_R33
+ sw $v0, 12($a0)
+ sw $v1, 16($a0)
+ cfc2 $v0, C2_TRX
+ cfc2 $v1, C2_TRY
+ sw $v0, 20($a0)
+ cfc2 $v0, C2_TRZ
+ sw $v1, 24($a0)
+ jr $ra
+ sw $v0, 28($a0)
+
+.section .text.PopMatrix
+.global PopMatrix
+.type PopMatrix, @function
+PopMatrix:
+ la $a0, _matrix_stack
+ lw $v0, 0($a0)
+ lw $v1, 4($a0)
+ ctc2 $v0, C2_R11R12
+ ctc2 $v1, C2_R13R21
+ lw $v0, 8($a0)
+ lw $v1, 12($a0)
+ ctc2 $v0, C2_R22R23
+ lw $v0, 16($a0)
+ ctc2 $v1, C2_R31R32
+ ctc2 $v0, C2_R33
+ lw $v0, 20($a0)
+ lw $v1, 24($a0)
+ ctc2 $v0, C2_TRX
+ lw $v0, 28($a0)
+ ctc2 $v1, C2_TRY
+ ctc2 $v0, C2_TRZ
+ jr $ra
+ nop
+
+.section .data._matrix_stack
+.type _matrix_stack, @object
+_matrix_stack:
+ .word 0, 0, 0, 0, 0, 0, 0, 0
diff --git a/libpsn00b/psxgte/mulmatrix.s b/libpsn00b/psxgte/mulmatrix.s
deleted file mode 100644
index 08c79c2..0000000
--- a/libpsn00b/psxgte/mulmatrix.s
+++ /dev/null
@@ -1,74 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global MulMatrix
-.type MulMatrix, @function
-MulMatrix:
-
- # Load m1 to GTE
- lw $t0, 0($a1)
- lw $t1, 4($a1)
- ctc2 $t0, $0
- ctc2 $t1, $1
- lw $t0, 8($a1)
- lw $t1, 12($a1)
- lhu $t2, 16($a1)
- ctc2 $t0, $2
- ctc2 $t1, $3
- ctc2 $t2, $4
-
- lhu $t1, 2*(0+(3*1))($a0) # Load values for first
- lhu $t0, 2*(0+(3*0))($a0) # R11 R21 R31
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(0+(3*2))($a0)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- lhu $t1, 2*(1+(3*1))($a0) # Load values for second
- lhu $t0, 2*(1+(3*0))($a0) # R12 R22 R32
- MVMVA(1, 0, 0, 3, 0) # First multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(1+(3*2))($a0)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of first
- mfc2 $t1, C2_IR2
- sh $t0, 2*(0+(3*0))($a0)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(0+(3*1))($a0)
- sh $t0, 2*(0+(3*2))($a0)
-
- lhu $t1, 2*(2+(3*1))($a0) # Load values for third
- lhu $t0, 2*(2+(3*0))($a0) # R13 R23 R33
- MVMVA(1, 0, 0, 3, 0) # Second multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(2+(3*2))($a0)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of second
- mfc2 $t1, C2_IR2
- sh $t0, 2*(1+(3*0))($a0)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(1+(3*1))($a0)
- sh $t0, 2*(1+(3*2))($a0)
- MVMVA(1, 0, 0, 3, 0) # Third multiply
-
- mfc2 $t0, C2_IR1 # Store results of third
- mfc2 $t1, C2_IR2
- sh $t0, 2*(2+(3*0))($a0)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(2+(3*1))($a0)
- sh $t0, 2*(2+(3*2))($a0)
-
- jr $ra
- move $v0, $a0
diff --git a/libpsn00b/psxgte/mulmatrix0.s b/libpsn00b/psxgte/mulmatrix0.s
deleted file mode 100644
index c2fd859..0000000
--- a/libpsn00b/psxgte/mulmatrix0.s
+++ /dev/null
@@ -1,74 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global MulMatrix0
-.type MulMatrix0, @function
-MulMatrix0:
-
- # Load m1 to GTE
- lw $t0, 0($a0)
- lw $t1, 4($a0)
- ctc2 $t0, $0
- ctc2 $t1, $1
- lw $t0, 8($a0)
- lw $t1, 12($a0)
- lhu $t2, 16($a0)
- ctc2 $t0, $2
- ctc2 $t1, $3
- ctc2 $t2, $4
-
- lhu $t1, 2*(0+(3*1))($a1) # Load values for first
- lhu $t0, 2*(0+(3*0))($a1) # R11 R21 R31
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(0+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- lhu $t1, 2*(1+(3*1))($a1) # Load values for second
- lhu $t0, 2*(1+(3*0))($a1) # R12 R22 R32
- MVMVA(1, 0, 0, 3, 0) # First multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(1+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of first
- mfc2 $t1, C2_IR2
- sh $t0, 2*(0+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(0+(3*1))($a2)
- sh $t0, 2*(0+(3*2))($a2)
-
- lhu $t1, 2*(2+(3*1))($a1) # Load values for third
- lhu $t0, 2*(2+(3*0))($a1) # R13 R23 R33
- MVMVA(1, 0, 0, 3, 0) # Second multiply
- sll $t1, 16
- or $t0, $t1
- lhu $t1, 2*(2+(3*2))($a1)
- mtc2 $t0, C2_VXY0
- mtc2 $t1, C2_VZ0
-
- mfc2 $t0, C2_IR1 # Store results of second
- mfc2 $t1, C2_IR2
- sh $t0, 2*(1+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(1+(3*1))($a2)
- sh $t0, 2*(1+(3*2))($a2)
- MVMVA(1, 0, 0, 3, 0) # Third multiply
-
- mfc2 $t0, C2_IR1 # Store results of third
- mfc2 $t1, C2_IR2
- sh $t0, 2*(2+(3*0))($a2)
- mfc2 $t0, C2_IR3
- sh $t1, 2*(2+(3*1))($a2)
- sh $t0, 2*(2+(3*2))($a2)
-
- jr $ra
- move $v0, $a2
diff --git a/libpsn00b/psxgte/pushpopmatrix.s b/libpsn00b/psxgte/pushpopmatrix.s
deleted file mode 100644
index ca6b992..0000000
--- a/libpsn00b/psxgte/pushpopmatrix.s
+++ /dev/null
@@ -1,68 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global PushMatrix
-.type PushMatrix, @function
-PushMatrix:
- la $a0, _matrix_stack
- cfc2 $v0, C2_R11R12
- cfc2 $v1, C2_R13R21
- sw $v0, 0($a0)
- cfc2 $v0, C2_R22R23
- sw $v1, 4($a0)
- sw $v0, 8($a0)
- cfc2 $v0, C2_R31R32
- cfc2 $v1, C2_R33
- sw $v0, 12($a0)
- sw $v1, 16($a0)
- cfc2 $v0, C2_TRX
- cfc2 $v1, C2_TRY
- sw $v0, 20($a0)
- cfc2 $v0, C2_TRZ
- sw $v1, 24($a0)
- jr $ra
- sw $v0, 28($a0)
-
-.global PopMatrix
-.type PopMatrix, @function
-PopMatrix:
- la $a0, _matrix_stack
- lw $v0, 0($a0)
- lw $v1, 4($a0)
- ctc2 $v0, C2_R11R12
- ctc2 $v1, C2_R13R21
- lw $v0, 8($a0)
- lw $v1, 12($a0)
- ctc2 $v0, C2_R22R23
- lw $v0, 16($a0)
- ctc2 $v1, C2_R31R32
- ctc2 $v0, C2_R33
- lw $v0, 20($a0)
- lw $v1, 24($a0)
- ctc2 $v0, C2_TRX
- lw $v0, 28($a0)
- ctc2 $v1, C2_TRY
- ctc2 $v0, C2_TRZ
- jr $ra
- nop
-
-
-.section .data
-
-
-.type matrix_stack, @object
-_matrix_stack:
- .word 0
- .word 0
- .word 0
- .word 0
- .word 0
- .word 0
- .word 0
- .word 0
-
diff --git a/libpsn00b/psxgte/scalematrix.s b/libpsn00b/psxgte/scalematrix.s
deleted file mode 100644
index 1b2b6dd..0000000
--- a/libpsn00b/psxgte/scalematrix.s
+++ /dev/null
@@ -1,68 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global ScaleMatrix
-.type ScaleMatrix, @function
-ScaleMatrix:
-
- lwc2 C2_IR0, 0($a1) # X
-
- lh $v0, 2*(0+(3*0))($a0)
- lh $v1, 2*(0+(3*1))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(0+(3*2))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(0+(3*0))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(0+(3*1))($a0)
- sh $v0, 2*(0+(3*2))($a0)
-
- lwc2 C2_IR0, 4($a1) # Y
-
- lh $v0, 2*(1+(3*0))($a0)
- lh $v1, 2*(1+(3*1))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(1+(3*2))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(1+(3*0))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(1+(3*1))($a0)
- sh $v0, 2*(1+(3*2))($a0)
-
- lwc2 C2_IR0, 8($a1) # Z
-
- lh $v0, 2*(2+(3*0))($a0)
- lh $v1, 2*(2+(3*1))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(2+(3*2))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(2+(3*0))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(2+(3*1))($a0)
- sh $v0, 2*(2+(3*2))($a0)
-
- jr $ra
- move $v0, $a0
diff --git a/libpsn00b/psxgte/scalematrixl.s b/libpsn00b/psxgte/scalematrixl.s
deleted file mode 100644
index 53c2d14..0000000
--- a/libpsn00b/psxgte/scalematrixl.s
+++ /dev/null
@@ -1,68 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global ScaleMatrixL
-.type ScaleMatrixL, @function
-ScaleMatrixL:
-
- lwc2 C2_IR0, 0($a1) # X
-
- lh $v0, 2*(0+(3*0))($a0)
- lh $v1, 2*(1+(3*0))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(2+(3*0))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(0+(3*0))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(1+(3*0))($a0)
- sh $v0, 2*(2+(3*0))($a0)
-
- lwc2 C2_IR0, 4($a1) # Y
-
- lh $v0, 2*(0+(3*1))($a0)
- lh $v1, 2*(1+(3*1))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(2+(3*1))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(0+(3*1))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(1+(3*1))($a0)
- sh $v0, 2*(2+(3*1))($a0)
-
- lwc2 C2_IR0, 8($a1) # Z
-
- lh $v0, 2*(0+(3*2))($a0)
- lh $v1, 2*(1+(3*2))($a0)
- mtc2 $v0, C2_IR1
- lh $v0, 2*(2+(3*2))($a0)
- mtc2 $v1, C2_IR2
- mtc2 $v0, C2_IR3
-
- nGPF(1)
-
- mfc2 $v0, C2_IR1
- mfc2 $v1, C2_IR2
- sh $v0, 2*(0+(3*2))($a0)
- mfc2 $v0, C2_IR3
- sh $v1, 2*(1+(3*2))($a0)
- sh $v0, 2*(2+(3*2))($a0)
-
- jr $ra
- move $v0, $a0
diff --git a/libpsn00b/psxgte/square0.s b/libpsn00b/psxgte/square0.s
deleted file mode 100644
index a8ca107..0000000
--- a/libpsn00b/psxgte/square0.s
+++ /dev/null
@@ -1,27 +0,0 @@
-.set noreorder
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-
-.global Square0
-.type Square0, @function
-Square0:
-
- # a0 - Pointer to input vector (v0)
- # a1 - Pointer to output vector (v1)
-
- lwc2 C2_IR1, 0($a0)
- lwc2 C2_IR2, 4($a0)
- lwc2 C2_IR3, 8($a0)
-
- nSQR(0)
-
- swc2 C2_IR1, 0($a1)
- swc2 C2_IR2, 4($a1)
- swc2 C2_IR3, 8($a1)
-
- jr $ra
- nop
diff --git a/libpsn00b/psxgte/squareroot.s b/libpsn00b/psxgte/squareroot.s
index 72198b4..4377ee4 100644
--- a/libpsn00b/psxgte/squareroot.s
+++ b/libpsn00b/psxgte/squareroot.s
@@ -3,10 +3,7 @@
.include "gtereg.inc"
.include "inline_s.inc"
-.section .text
-
-# Implementation based from Sony libs
-
+.section .text.SquareRoot12
.global SquareRoot12
.type SquareRoot12, @function
SquareRoot12:
@@ -34,27 +31,26 @@ SquareRoot12:
.Lvalue_greater12:
addi $t4, -64
sll $t4, 1
- la $t5, sqrt_table
+ la $t5, _sqrt_table
addu $t5, $t4
lh $t5, 0($t5)
nop
-
+
bltz $t1, .L1594c
nop
jr $ra
sllv $v0, $t5, $t1
-
-.L1594c:
+.L1594c:
sub $t1, $0 , $t1
jr $ra
srl $v0, $t5, $t1
-
+
.Lbad_sqr12:
jr $ra
move $v0, $0
-
-
+
+.section .text.SquareRoot0
.global SquareRoot0
.type SquareRoot0, @function
SquareRoot0:
@@ -82,7 +78,7 @@ SquareRoot0:
.Lvalue_greater:
addi $t4, -64
sll $t4, 1
- la $t5, sqrt_table
+ la $t5, _sqrt_table
addu $t5, $t4
lh $t5, 0($t5)
nop
@@ -92,32 +88,31 @@ SquareRoot0:
.Lbad_sqr:
jr $ra
move $v0, $0
-
-.section .data
-
-sqrt_table:
- .hword 0x1000,0x101f,0x103f,0x105e,0x107e,0x109c,0x10bb,0x10da
- .hword 0x10f8,0x1116,0x1134,0x1152,0x116f,0x118c,0x11a9,0x11c6
- .hword 0x11e3,0x1200,0x121c,0x1238,0x1254,0x1270,0x128c,0x12a7
- .hword 0x12c2,0x12de,0x12f9,0x1314,0x132e,0x1349,0x1364,0x137e
- .hword 0x1398,0x13b2,0x13cc,0x13e6,0x1400,0x1419,0x1432,0x144c
- .hword 0x1465,0x147e,0x1497,0x14b0,0x14c8,0x14e1,0x14f9,0x1512
- .hword 0x152a,0x1542,0x155a,0x1572,0x158a,0x15a2,0x15b9,0x15d1
- .hword 0x15e8,0x1600,0x1617,0x162e,0x1645,0x165c,0x1673,0x1689
- .hword 0x16a0,0x16b7,0x16cd,0x16e4,0x16fa,0x1710,0x1726,0x173c
- .hword 0x1752,0x1768,0x177e,0x1794,0x17aa,0x17bf,0x17d5,0x17ea
- .hword 0x1800,0x1815,0x182a,0x183f,0x1854,0x1869,0x187e,0x1893
- .hword 0x18a8,0x18bd,0x18d1,0x18e6,0x18fa,0x190f,0x1923,0x1938
- .hword 0x194c,0x1960,0x1974,0x1988,0x199c,0x19b0,0x19c4,0x19d8
- .hword 0x19ec,0x1a00,0x1a13,0x1a27,0x1a3a,0x1a4e,0x1a61,0x1a75
- .hword 0x1a88,0x1a9b,0x1aae,0x1ac2,0x1ad5,0x1ae8,0x1afb,0x1b0e
- .hword 0x1b21,0x1b33,0x1b46,0x1b59,0x1b6c,0x1b7e,0x1b91,0x1ba3
- .hword 0x1bb6,0x1bc8,0x1bdb,0x1bed,0x1c00,0x1c12,0x1c24,0x1c36
- .hword 0x1c48,0x1c5a,0x1c6c,0x1c7e,0x1c90,0x1ca2,0x1cb4,0x1cc6
- .hword 0x1cd8,0x1ce9,0x1cfb,0x1d0d,0x1d1e,0x1d30,0x1d41,0x1d53
- .hword 0x1d64,0x1d76,0x1d87,0x1d98,0x1daa,0x1dbb,0x1dcc,0x1ddd
- .hword 0x1dee,0x1e00,0x1e11,0x1e22,0x1e33,0x1e43,0x1e54,0x1e65
- .hword 0x1e76,0x1e87,0x1e98,0x1ea8,0x1eb9,0x1eca,0x1eda,0x1eeb
- .hword 0x1efb,0x1f0c,0x1f1c,0x1f2d,0x1f3d,0x1f4e,0x1f5e,0x1f6e
- .hword 0x1f7e,0x1f8f,0x1f9f,0x1faf,0x1fbf,0x1fcf,0x1fdf,0x1fef \ No newline at end of file
+.section .data._sqrt_table
+.type _sqrt_table, @object
+_sqrt_table:
+ .hword 0x1000, 0x101f, 0x103f, 0x105e, 0x107e, 0x109c, 0x10bb, 0x10da
+ .hword 0x10f8, 0x1116, 0x1134, 0x1152, 0x116f, 0x118c, 0x11a9, 0x11c6
+ .hword 0x11e3, 0x1200, 0x121c, 0x1238, 0x1254, 0x1270, 0x128c, 0x12a7
+ .hword 0x12c2, 0x12de, 0x12f9, 0x1314, 0x132e, 0x1349, 0x1364, 0x137e
+ .hword 0x1398, 0x13b2, 0x13cc, 0x13e6, 0x1400, 0x1419, 0x1432, 0x144c
+ .hword 0x1465, 0x147e, 0x1497, 0x14b0, 0x14c8, 0x14e1, 0x14f9, 0x1512
+ .hword 0x152a, 0x1542, 0x155a, 0x1572, 0x158a, 0x15a2, 0x15b9, 0x15d1
+ .hword 0x15e8, 0x1600, 0x1617, 0x162e, 0x1645, 0x165c, 0x1673, 0x1689
+ .hword 0x16a0, 0x16b7, 0x16cd, 0x16e4, 0x16fa, 0x1710, 0x1726, 0x173c
+ .hword 0x1752, 0x1768, 0x177e, 0x1794, 0x17aa, 0x17bf, 0x17d5, 0x17ea
+ .hword 0x1800, 0x1815, 0x182a, 0x183f, 0x1854, 0x1869, 0x187e, 0x1893
+ .hword 0x18a8, 0x18bd, 0x18d1, 0x18e6, 0x18fa, 0x190f, 0x1923, 0x1938
+ .hword 0x194c, 0x1960, 0x1974, 0x1988, 0x199c, 0x19b0, 0x19c4, 0x19d8
+ .hword 0x19ec, 0x1a00, 0x1a13, 0x1a27, 0x1a3a, 0x1a4e, 0x1a61, 0x1a75
+ .hword 0x1a88, 0x1a9b, 0x1aae, 0x1ac2, 0x1ad5, 0x1ae8, 0x1afb, 0x1b0e
+ .hword 0x1b21, 0x1b33, 0x1b46, 0x1b59, 0x1b6c, 0x1b7e, 0x1b91, 0x1ba3
+ .hword 0x1bb6, 0x1bc8, 0x1bdb, 0x1bed, 0x1c00, 0x1c12, 0x1c24, 0x1c36
+ .hword 0x1c48, 0x1c5a, 0x1c6c, 0x1c7e, 0x1c90, 0x1ca2, 0x1cb4, 0x1cc6
+ .hword 0x1cd8, 0x1ce9, 0x1cfb, 0x1d0d, 0x1d1e, 0x1d30, 0x1d41, 0x1d53
+ .hword 0x1d64, 0x1d76, 0x1d87, 0x1d98, 0x1daa, 0x1dbb, 0x1dcc, 0x1ddd
+ .hword 0x1dee, 0x1e00, 0x1e11, 0x1e22, 0x1e33, 0x1e43, 0x1e54, 0x1e65
+ .hword 0x1e76, 0x1e87, 0x1e98, 0x1ea8, 0x1eb9, 0x1eca, 0x1eda, 0x1eeb
+ .hword 0x1efb, 0x1f0c, 0x1f1c, 0x1f2d, 0x1f3d, 0x1f4e, 0x1f5e, 0x1f6e
+ .hword 0x1f7e, 0x1f8f, 0x1f9f, 0x1faf, 0x1fbf, 0x1fcf, 0x1fdf, 0x1fef
diff --git a/libpsn00b/psxgte/vector.s b/libpsn00b/psxgte/vector.s
new file mode 100644
index 0000000..2f2f8d6
--- /dev/null
+++ b/libpsn00b/psxgte/vector.s
@@ -0,0 +1,123 @@
+.set noreorder
+.set noat
+
+.include "gtereg.inc"
+.include "inline_s.inc"
+
+.section .text.Square0
+.global Square0
+.type Square0, @function
+Square0:
+ # a0 - Pointer to input vector (v0)
+ # a1 - Pointer to output vector (v1)
+
+ lwc2 C2_IR1, 0($a0)
+ lwc2 C2_IR2, 4($a0)
+ lwc2 C2_IR3, 8($a0)
+
+ nSQR(0)
+
+ swc2 C2_IR1, 0($a1)
+ swc2 C2_IR2, 4($a1)
+ swc2 C2_IR3, 8($a1)
+
+ jr $ra
+ nop
+
+.section .text.VectorNormalS
+.global VectorNormalS
+.type VectorNormalS, @function
+VectorNormalS:
+
+ lw $t0, 0($a0)
+ lw $t1, 4($a0)
+ lw $t2, 8($a0)
+
+ mtc2 $t0, C2_IR1
+ mtc2 $t1, C2_IR2
+ mtc2 $t2, C2_IR3
+
+ nSQR(0)
+
+ mfc2 $t3, C2_MAC1
+ mfc2 $t4, C2_MAC2
+ mfc2 $t5, C2_MAC3
+
+ add $t3, $t4
+ add $v0, $t3, $t5
+ mtc2 $v0, C2_LZCS
+ nop
+ nop
+ mfc2 $v1, C2_LZCR
+
+ addiu $at, $0 , -2
+ and $v1, $at
+
+ addiu $t6, $0 , 0x1f
+ sub $t6, $v1
+ sra $t6, 1
+ addiu $t3, $v1, -24
+
+ bltz $t3, .Lvalue_neg
+ nop
+ b .Lvalue_pos
+ sllv $t4, $v0, $t3
+.Lvalue_neg:
+ addiu $t3, $0 , 24
+ sub $t3, $v1
+ srav $t4, $v0, $t3
+.Lvalue_pos:
+ addi $t4, -64
+ sll $t4, 1
+
+ la $t5, _norm_table
+ addu $t5, $t4
+ lh $t5, 0($t5)
+
+ mtc2 $t0, C2_IR1
+ mtc2 $t1, C2_IR2
+ mtc2 $t2, C2_IR3
+ mtc2 $t5, C2_IR0
+
+ nGPF(0)
+
+ mfc2 $t0, C2_MAC1
+ mfc2 $t1, C2_MAC2
+ mfc2 $t2, C2_MAC3
+
+ sra $t0, $t6
+ sra $t1, $t6
+ sra $t2, $t6
+
+ sh $t0, 0($a1)
+ sh $t1, 2($a1)
+ jr $ra
+ sh $t2, 4($a1)
+
+.section .data._norm_table
+.type _norm_table, @object
+_norm_table:
+ .hword 0x1000, 0x0fe0, 0x0fc1, 0x0fa3, 0x0f85, 0x0f68, 0x0f4c, 0x0f30
+ .hword 0x0f15, 0x0efb, 0x0ee1, 0x0ec7, 0x0eae, 0x0e96, 0x0e7e, 0x0e66
+ .hword 0x0e4f, 0x0e38, 0x0e22, 0x0e0c, 0x0df7, 0x0de2, 0x0dcd, 0x0db9
+ .hword 0x0da5, 0x0d91, 0x0d7e, 0x0d6b, 0x0d58, 0x0d45, 0x0d33, 0x0d21
+ .hword 0x0d10, 0x0cff, 0x0cee, 0x0cdd, 0x0ccc, 0x0cbc, 0x0cac, 0x0c9c
+ .hword 0x0c8d, 0x0c7d, 0x0c6e, 0x0c5f, 0x0c51, 0x0c42, 0x0c34, 0x0c26
+ .hword 0x0c18, 0x0c0a, 0x0bfd, 0x0bef, 0x0be2, 0x0bd5, 0x0bc8, 0x0bbb
+ .hword 0x0baf, 0x0ba2, 0x0b96, 0x0b8a, 0x0b7e, 0x0b72, 0x0b67, 0x0b5b
+ .hword 0x0b50, 0x0b45, 0x0b39, 0x0b2e, 0x0b24, 0x0b19, 0x0b0e, 0x0b04
+ .hword 0x0af9, 0x0aef, 0x0ae5, 0x0adb, 0x0ad1, 0x0ac7, 0x0abd, 0x0ab4
+ .hword 0x0aaa, 0x0aa1, 0x0a97, 0x0a8e, 0x0a85, 0x0a7c, 0x0a73, 0x0a6a
+ .hword 0x0a61, 0x0a59, 0x0a50, 0x0a47, 0x0a3f, 0x0a37, 0x0a2e, 0x0a26
+ .hword 0x0a1e, 0x0a16, 0x0a0e, 0x0a06, 0x09fe, 0x09f6, 0x09ef, 0x09e7
+ .hword 0x09e0, 0x09d8, 0x09d1, 0x09c9, 0x09c2, 0x09bb, 0x09b4, 0x09ad
+ .hword 0x09a5, 0x099e, 0x0998, 0x0991, 0x098a, 0x0983, 0x097c, 0x0976
+ .hword 0x096f, 0x0969, 0x0962, 0x095c, 0x0955, 0x094f, 0x0949, 0x0943
+ .hword 0x093c, 0x0936, 0x0930, 0x092a, 0x0924, 0x091e, 0x0918, 0x0912
+ .hword 0x090d, 0x0907, 0x0901, 0x08fb, 0x08f6, 0x08f0, 0x08eb, 0x08e5
+ .hword 0x08e0, 0x08da, 0x08d5, 0x08cf, 0x08ca, 0x08c5, 0x08bf, 0x08ba
+ .hword 0x08b5, 0x08b0, 0x08ab, 0x08a6, 0x08a1, 0x089c, 0x0897, 0x0892
+ .hword 0x088d, 0x0888, 0x0883, 0x087e, 0x087a, 0x0875, 0x0870, 0x086b
+ .hword 0x0867, 0x0862, 0x085e, 0x0859, 0x0855, 0x0850, 0x084c, 0x0847
+ .hword 0x0843, 0x083e, 0x083a, 0x0836, 0x0831, 0x082d, 0x0829, 0x0824
+ .hword 0x0820, 0x081c, 0x0818, 0x0814, 0x0810, 0x080c, 0x0808, 0x0804
diff --git a/libpsn00b/psxgte/vectornormals.s b/libpsn00b/psxgte/vectornormals.s
deleted file mode 100644
index 85e94e6..0000000
--- a/libpsn00b/psxgte/vectornormals.s
+++ /dev/null
@@ -1,110 +0,0 @@
-.set noreorder
-.set noat
-
-.include "gtereg.inc"
-.include "inline_s.inc"
-
-.section .text
-
-# Implementation based from Sony libs
-
-.global VectorNormalS
-.type VectorNormalS, @function
-VectorNormalS:
-
- lw $t0, 0($a0)
- lw $t1, 4($a0)
- lw $t2, 8($a0)
-
- mtc2 $t0, C2_IR1
- mtc2 $t1, C2_IR2
- mtc2 $t2, C2_IR3
-
- nSQR(0)
-
- mfc2 $t3, C2_MAC1
- mfc2 $t4, C2_MAC2
- mfc2 $t5, C2_MAC3
-
- add $t3, $t4
- add $v0, $t3, $t5
- mtc2 $v0, C2_LZCS
- nop
- nop
- mfc2 $v1, C2_LZCR
-
- addiu $at, $0 , -2
- and $v1, $at
-
- addiu $t6, $0 , 0x1f
- sub $t6, $v1
- sra $t6, 1
- addiu $t3, $v1, -24
-
- bltz $t3, .Lvalue_neg
- nop
- b .Lvalue_pos
- sllv $t4, $v0, $t3
-.Lvalue_neg:
- addiu $t3, $0 , 24
- sub $t3, $v1
- srav $t4, $v0, $t3
-.Lvalue_pos:
- addi $t4, -64
- sll $t4, 1
-
- la $t5, _norm_table
- addu $t5, $t4
- lh $t5, 0($t5)
-
- mtc2 $t0, C2_IR1
- mtc2 $t1, C2_IR2
- mtc2 $t2, C2_IR3
- mtc2 $t5, C2_IR0
-
- nGPF(0)
-
- mfc2 $t0, C2_MAC1
- mfc2 $t1, C2_MAC2
- mfc2 $t2, C2_MAC3
-
- sra $t0, $t6
- sra $t1, $t6
- sra $t2, $t6
-
- sh $t0, 0($a1)
- sh $t1, 2($a1)
- jr $ra
- sh $t2, 4($a1)
-
-
-.section .data
-
-.global _norm_table
-.type _norm_table, @object
-_norm_table:
- .hword 0x1000, 0x0FE0, 0x0FC1, 0x0FA3, 0x0F85, 0x0F68, 0x0F4C, 0x0F30
- .hword 0x0F15, 0x0EFB, 0x0EE1, 0x0EC7, 0x0EAE, 0x0E96, 0x0E7E, 0x0E66
- .hword 0x0E4F, 0x0E38, 0x0E22, 0x0E0C, 0x0DF7, 0x0DE2, 0x0DCD, 0x0DB9
- .hword 0x0DA5, 0x0D91, 0x0D7E, 0x0D6B, 0x0D58, 0x0D45, 0x0D33, 0x0D21
- .hword 0x0D10, 0x0CFF, 0x0CEE, 0x0CDD, 0x0CCC, 0x0CBC, 0x0CAC, 0x0C9C
- .hword 0x0C8D, 0x0C7D, 0x0C6E, 0x0C5F, 0x0C51, 0x0C42, 0x0C34, 0x0C26
- .hword 0x0C18, 0x0C0A, 0x0BFD, 0x0BEF, 0x0BE2, 0x0BD5, 0x0BC8, 0x0BBB
- .hword 0x0BAF, 0x0BA2, 0x0B96, 0x0B8A, 0x0B7E, 0x0B72, 0x0B67, 0x0B5B
- .hword 0x0B50, 0x0B45, 0x0B39, 0x0B2E, 0x0B24, 0x0B19, 0x0B0E, 0x0B04
- .hword 0x0AF9, 0x0AEF, 0x0AE5, 0x0ADB, 0x0AD1, 0x0AC7, 0x0ABD, 0x0AB4
- .hword 0x0AAA, 0x0AA1, 0x0A97, 0x0A8E, 0x0A85, 0x0A7C, 0x0A73, 0x0A6A
- .hword 0x0A61, 0x0A59, 0x0A50, 0x0A47, 0x0A3F, 0x0A37, 0x0A2E, 0x0A26
- .hword 0x0A1E, 0x0A16, 0x0A0E, 0x0A06, 0x09FE, 0x09F6, 0x09EF, 0x09E7
- .hword 0x09E0, 0x09D8, 0x09D1, 0x09C9, 0x09C2, 0x09BB, 0x09B4, 0x09AD
- .hword 0x09A5, 0x099E, 0x0998, 0x0991, 0x098A, 0x0983, 0x097C, 0x0976
- .hword 0x096F, 0x0969, 0x0962, 0x095C, 0x0955, 0x094F, 0x0949, 0x0943
- .hword 0x093C, 0x0936, 0x0930, 0x092A, 0x0924, 0x091E, 0x0918, 0x0912
- .hword 0x090D, 0x0907, 0x0901, 0x08FB, 0x08F6, 0x08F0, 0x08EB, 0x08E5
- .hword 0x08E0, 0x08DA, 0x08D5, 0x08CF, 0x08CA, 0x08C5, 0x08BF, 0x08BA
- .hword 0x08B5, 0x08B0, 0x08AB, 0x08A6, 0x08A1, 0x089C, 0x0897, 0x0892
- .hword 0x088D, 0x0888, 0x0883, 0x087E, 0x087A, 0x0875, 0x0870, 0x086B
- .hword 0x0867, 0x0862, 0x085E, 0x0859, 0x0855, 0x0850, 0x084C, 0x0847
- .hword 0x0843, 0x083E, 0x083A, 0x0836, 0x0831, 0x082D, 0x0829, 0x0824
- .hword 0x0820, 0x081C, 0x0818, 0x0814, 0x0810, 0x080C, 0x0808, 0x0804
- \ No newline at end of file
diff --git a/libpsn00b/psxpress/mdec.c b/libpsn00b/psxpress/mdec.c
index d43436f..d55dbbb 100644
--- a/libpsn00b/psxpress/mdec.c
+++ b/libpsn00b/psxpress/mdec.c
@@ -4,7 +4,7 @@
*/
#include <stdint.h>
-#include <psxetc.h>
+#include <assert.h>
#include <psxapi.h>
#include <psxpress.h>
#include <hwregs_c.h>
@@ -127,7 +127,7 @@ void DecDCTin(const uint32_t *data, int mode) {
// the stream.
void DecDCTinRaw(const uint32_t *data, size_t length) {
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
- _sdk_log("psxpress: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
+ _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
length += DMA_CHUNK_LENGTH - 1;
}
@@ -149,8 +149,7 @@ int DecDCTinSync(int mode) {
return 0;
}
- _sdk_log("psxpress: DecDCTinSync() timeout\n");
- _sdk_dump_log();
+ _sdk_log("DecDCTinSync() timeout\n");
return -1;
}
@@ -158,7 +157,7 @@ void DecDCTout(uint32_t *data, size_t length) {
DecDCToutSync(0);
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
- _sdk_log("psxpress: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
+ _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
length += DMA_CHUNK_LENGTH - 1;
}
@@ -180,7 +179,6 @@ int DecDCToutSync(int mode) {
return 0;
}
- _sdk_log("psxpress: DecDCToutSync() timeout\n");
- _sdk_dump_log();
+ _sdk_log("DecDCToutSync() timeout\n");
return -1;
}
diff --git a/libpsn00b/psxpress/vlc.s b/libpsn00b/psxpress/vlc.s
index 885a3f7..f3a1c67 100644
--- a/libpsn00b/psxpress/vlc.s
+++ b/libpsn00b/psxpress/vlc.s
@@ -29,17 +29,17 @@
.set VLC_Context_block_index, 20
.set VLC_Context_coeff_index, 21
-.set DECDCTSMALLTAB_lut0, 0
-.set DECDCTSMALLTAB_lut2, 4
-.set DECDCTSMALLTAB_lut3, 36
-.set DECDCTSMALLTAB_lut4, 292
-.set DECDCTSMALLTAB_lut5, 308
-.set DECDCTSMALLTAB_lut7, 324
-.set DECDCTSMALLTAB_lut8, 356
-.set DECDCTSMALLTAB_lut9, 420
-.set DECDCTSMALLTAB_lut10, 484
-.set DECDCTSMALLTAB_lut11, 548
-.set DECDCTSMALLTAB_lut12, 612
+.set DECDCTTAB_lut0, 0
+.set DECDCTTAB_lut2, 4
+.set DECDCTTAB_lut3, 36
+.set DECDCTTAB_lut4, 292
+.set DECDCTTAB_lut5, 308
+.set DECDCTTAB_lut7, 324
+.set DECDCTTAB_lut8, 356
+.set DECDCTTAB_lut9, 420
+.set DECDCTTAB_lut10, 484
+.set DECDCTTAB_lut11, 548
+.set DECDCTTAB_lut12, 612
.section .text.DecDCTvlcStart
.global DecDCTvlcStart
@@ -115,7 +115,7 @@ _vlc_skip_context_load:
# Obtain the addresses of the lookup table and jump area in advance so that
# they don't have to be retrieved for each coefficient decoded.
lw $t8, _vlc_huffman_table
- la $t9, .Lac_jump_area
+ la $t9, .Lac_prefix_10
beqz $a2, .Lstop_processing
addiu $a1, 4 # output = (uint16_t *) &output[1]
@@ -123,67 +123,67 @@ _vlc_skip_context_load:
.Lprocess_next_code_loop: # while (max_size)
# This is the "hot" part of the decoder, executed for each code in the
# bitstream. The first step is to determine if the next code is a DC or AC
- # coefficient. The GTE is also given the task of counting the number of
- # leading zeroes/ones, which takes 2 more cycles.
+ # coefficient.
bnez $t7, .Lprocess_ac_coefficient
- mtc2 $t0, $30
+ addiu $t7, 1 # coeff_index++
bnez $t4, .Lprocess_dc_v3_coefficient
- #nop
+ li $v1, 0x01ff
.Lprocess_dc_v2_coefficient: # if (!coeff_index && !is_v3)
# The DC coefficient in version 2 frames is not compressed. Value 0x1ff is
# used to signal the end of the bitstream.
srl $v0, $t0, 22 # prefix = (window >> (32 - 10))
- li $v1, 0x01ff
beq $v0, $v1, .Lstop_processing # if (prefix == 0x1ff) break
or $v0, $t3 # *output = prefix | quant_scale
sll $t0, 10 # window <<= 10
- addiu $t5, -10 # bit_offset -= 10
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -10 # bit_offset -= 10
.Lprocess_dc_v3_coefficient: # if (!coeff_index && is_v3)
# TODO: version 3 is currently not supported.
jr $ra
li $v0, -1
- #b .Lwrite_value
.Lprocess_ac_coefficient: # if (coeff_index)
- # Check whether the prefix code is one of the shorter, more common ones.
+ # Check whether the prefix code is one of the shorter, more common ones,
+ # and start counting the number of leading zeroes/ones using the GTE (which
+ # takes 2 more cycles).
srl $v0, $t0, 30
li $v1, 3
beq $v0, $v1, .Lac_prefix_11
li $v1, 2
beq $v0, $v1, .Lac_prefix_10
li $v1, 1
+ mtc2 $t0, $30
beq $v0, $v1, .Lac_prefix_01
- #srl $v0, $t0, 29
- #beq $v0, $v1, .Lac_prefix_001
- #nop
+ nop
# If the code is longer, retrieve the number of leading zeroes from the GTE
# and use it as an index into the jump area. Each block in the area is 8
# instructions long and handles decoding a specific prefix.
mfc2 $v0, $31
- nop
- andi $v0, 15 # jump_addr = &ac_jump_area[(prefix % 16) * 8 * sizeof(u32)]
- sll $v0, 5
+ li $v1, 11
+ bgt $v0, $v1, .Lreturn_error # if (prefix > 11) return -1
+ sll $v0, 5 # jump_addr = &ac_jump_area[prefix * 8 * sizeof(u32)]
addu $v0, $t9
jr $v0
nop
+.Lreturn_error:
+ jr $ra
+ li $v0, -1
+
.Lac_prefix_11:
# Prefix 11 is followed by a single bit.
srl $v0, $t0, 28 # index = ((window >> (32 - 2 - 1)) & 1) * sizeof(u16)
andi $v0, 2
addu $v0, $t8 # value = table->lut0[index]
- lhu $v0, DECDCTSMALLTAB_lut0($v0)
+ lhu $v0, DECDCTTAB_lut0($v0)
sll $t0, 3 # window <<= 3
- addiu $t5, -3 # bit_offset -= 3
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -3 # bit_offset -= 3
+ #.word 0
-.Lac_jump_area:
.Lac_prefix_10:
# Prefix 10 marks the end of a block.
li $v0, 0xfe00 # value = 0xfe00
@@ -202,11 +202,10 @@ _vlc_skip_context_load:
srl $v0, $t0, 25 # index = ((window >> (32 - 2 - 3)) & 7) * sizeof(u32)
andi $v0, 28
addu $v0, $t8 # value = table->lut2[index]
- lw $v0, DECDCTSMALLTAB_lut2($v0)
- addiu $t7, 1 # coeff_index++
+ lw $v0, DECDCTTAB_lut2($v0)
b .Lupdate_window_and_write
srl $v1, $v0, 16 # length = value >> 16
- .word 0
+ .word 0, 0
.Lac_prefix_001:
# Prefix 001 can be followed by a 6-bit lookup index starting with 00, or a
@@ -214,136 +213,106 @@ _vlc_skip_context_load:
srl $v0, $t0, 21 # index = ((window >> (32 - 3 - 6)) & 63) * sizeof(u32)
andi $v0, 252
addu $v0, $t8 # value = table->lut3[index]
- lw $v0, DECDCTSMALLTAB_lut3($v0)
- addiu $t7, 1 # coeff_index++
+ lw $v0, DECDCTTAB_lut3($v0)
b .Lupdate_window_and_write
srl $v1, $v0, 16 # length = value >> 16
- .word 0
+ .word 0, 0
.Lac_prefix_0001:
# Prefix 0001 is followed by a 3-bit lookup index.
srl $v0, $t0, 24 # index = ((window >> (32 - 4 - 3)) & 7) * sizeof(u16)
andi $v0, 14
addu $v0, $t8 # value = table->lut4[index]
- lhu $v0, DECDCTSMALLTAB_lut4($v0)
+ lhu $v0, DECDCTTAB_lut4($v0)
sll $t0, 7 # window <<= 4 + 3
- addiu $t5, -7 # bit_offset -= 4 + 3
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -7 # bit_offset -= 4 + 3
+ .word 0
.Lac_prefix_00001:
# Prefix 00001 is followed by a 3-bit lookup index.
srl $v0, $t0, 23 # index = ((window >> (32 - 5 - 3)) & 7) * sizeof(u16)
andi $v0, 14
addu $v0, $t8 # value = table->lut5[index]
- lhu $v0, DECDCTSMALLTAB_lut5($v0)
+ lhu $v0, DECDCTTAB_lut5($v0)
sll $t0, 8 # window <<= 5 + 3
- addiu $t5, -8 # bit_offset -= 5 + 3
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -8 # bit_offset -= 5 + 3
+ .word 0
.Lac_prefix_000001:
# Prefix 000001 is an escape code followed by a full 16-bit MDEC value.
srl $v0, $t0, 10 # value = window >> (32 - 6 - 16)
sll $t0, 22 # window <<= 6 + 16
- addiu $t5, -22 # bit_offset -= 6 + 16
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
- .word 0, 0, 0
+ addiu $t5, -22 # bit_offset -= 6 + 16
+ .word 0, 0, 0, 0
.Lac_prefix_0000001:
# Prefix 0000001 is followed by a 4-bit lookup index.
srl $v0, $t0, 20 # index = ((window >> (32 - 7 - 4)) & 15) * sizeof(u16)
andi $v0, 30
addu $v0, $t8 # value = table->lut7[index]
- lhu $v0, DECDCTSMALLTAB_lut7($v0)
+ lhu $v0, DECDCTTAB_lut7($v0)
sll $t0, 11 # window <<= 7 + 4
- addiu $t5, -11 # bit_offset -= 7 + 4
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -11 # bit_offset -= 7 + 4
+ .word 0
.Lac_prefix_00000001:
# Prefix 00000001 is followed by a 5-bit lookup index.
srl $v0, $t0, 18 # index = ((window >> (32 - 8 - 5)) & 31) * sizeof(u16)
andi $v0, 62
addu $v0, $t8 # value = table->lut8[index]
- lhu $v0, DECDCTSMALLTAB_lut8($v0)
+ lhu $v0, DECDCTTAB_lut8($v0)
sll $t0, 13 # window <<= 8 + 5
- addiu $t5, -13 # bit_offset -= 8 + 5
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -13 # bit_offset -= 8 + 5
+ .word 0
.Lac_prefix_000000001:
# Prefix 000000001 is followed by a 5-bit lookup index.
srl $v0, $t0, 17 # index = ((window >> (32 - 9 - 5)) & 31) * sizeof(u16)
andi $v0, 62
addu $v0, $t8 # value = table->lut9[index]
- lhu $v0, DECDCTSMALLTAB_lut9($v0)
+ lhu $v0, DECDCTTAB_lut9($v0)
sll $t0, 14 # window <<= 9 + 5
- addiu $t5, -14 # bit_offset -= 9 + 5
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -14 # bit_offset -= 9 + 5
+ .word 0
.Lac_prefix_0000000001:
# Prefix 0000000001 is followed by a 5-bit lookup index.
srl $v0, $t0, 16 # index = ((window >> (32 - 10 - 5)) & 31) * sizeof(u16)
andi $v0, 62
addu $v0, $t8 # value = table->lut10[index]
- lhu $v0, DECDCTSMALLTAB_lut10($v0)
+ lhu $v0, DECDCTTAB_lut10($v0)
sll $t0, 15 # window <<= 10 + 5
- addiu $t5, -15 # bit_offset -= 10 + 5
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -15 # bit_offset -= 10 + 5
+ .word 0
.Lac_prefix_00000000001:
# Prefix 00000000001 is followed by a 5-bit lookup index.
srl $v0, $t0, 15 # index = ((window >> (32 - 11 - 5)) & 31) * sizeof(u16)
andi $v0, 62
addu $v0, $t8 # value = table->lut11[index]
- lhu $v0, DECDCTSMALLTAB_lut11($v0)
+ lhu $v0, DECDCTTAB_lut11($v0)
sll $t0, 16 # window <<= 11 + 5
- addiu $t5, -16 # bit_offset -= 11 + 5
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
+ addiu $t5, -16 # bit_offset -= 11 + 5
+ .word 0
.Lac_prefix_000000000001:
# Prefix 000000000001 is followed by a 5-bit lookup index.
srl $v0, $t0, 14 # index = ((window >> (32 - 12 - 5)) & 31) * sizeof(u16)
andi $v0, 62
addu $v0, $t8 # value = table->lut12[index]
- lhu $v0, DECDCTSMALLTAB_lut12($v0)
+ lhu $v0, DECDCTTAB_lut12($v0)
sll $t0, 17 # window <<= 12 + 5
- addiu $t5, -17 # bit_offset -= 12 + 5
b .Lwrite_value
- addiu $t7, 1 # coeff_index++
-
- # Prefix 0000000000001 is not valid.
- beqz $t0, .Lstop_processing
- nop
- jr $ra
- li $v0, -1
- .word 0, 0, 0, 0
-
- # Prefix 00000000000001 is not valid.
- beqz $t0, .Lstop_processing
- nop
- jr $ra
- li $v0, -1
- .word 0, 0, 0, 0
-
- # Prefix 000000000000001 is not valid.
- beqz $t0, .Lstop_processing
- nop
- jr $ra
- li $v0, -1
- .word 0, 0, 0, 0
-
- # Prefix 0000000000000001 is not valid.
- beqz $t0, .Lstop_processing
- nop
- jr $ra
- li $v0, -1
- #.word 0, 0, 0, 0
+ addiu $t5, -17 # bit_offset -= 12 + 5
+ .word 0
.Lupdate_window_and_write:
sllv $t0, $t0, $v1 # window <<= length
diff --git a/libpsn00b/psxsio/_sio_control.s b/libpsn00b/psxsio/_sio_control.s
deleted file mode 100644
index 6378def..0000000
--- a/libpsn00b/psxsio/_sio_control.s
+++ /dev/null
@@ -1,184 +0,0 @@
-.set noreorder
-
-.include "hwregs_a.inc"
-
-.section .text
-
-# Currently implemented serial control functions:
-#
-# cmd(a0) sub(a1)
-# 0 0 Get serial status
-# 0 1 Get control line status
-# 0 2 Get serial mode
-# 0 3 Get baud rate
-# 0 4 Read 1 byte
-# 1 1 Set serial control
-# 1 2 Set serial mode
-# 1 3 Set baud rate
-# 1 4 Write 1 byte
-# 2 0 Reset serial
-# 2 1 Acknowledge serial
-
-.global _sio_control
-.type _sio_control, @function
-_sio_control:
-
- # a0 - command
- # a1 - subcommand
- # a2 - argument
-
- lui $a3, IOBASE
-
- beq $a0, $0, .Lcmd0
- nop
- beq $a0, 1, .Lcmd1
- nop
- beq $a0, 2, .Lcmd2
- nop
- jr $ra
- nop
-
-
-.Lcmd0:
-
- beq $a1, $0, .Lcmd0arg0
- nop
- beq $a1, 1, .Lcmd0arg1
- nop
- beq $a1, 2, .Lcmd0arg2
- nop
- beq $a1, 3, .Lcmd0arg3
- nop
- beq $a1, 4, .Lcmd0arg4
- nop
- jr $ra
- nop
-
-.Lcmd0arg0: # Get SIO status
-
- lhu $v0, SIO_STAT($a3)
- nop
-
- jr $ra
- andi $v0, 0x3FF
-
-.Lcmd0arg1: # Get control line status
-
- lhu $v1, SIO_CTRL($a3)
- nop
- srl $v0, $v1, 1
- andi $v0, 0x1
- srl $v1, 4
- andi $v1, 0x2
-
- jr $ra
- or $v0, $v1
-
-
-.Lcmd0arg2: # Get serial mode
-
- lhu $v0, SIO_MODE($a3)
- nop
- jr $ra
- andi $v0, 0xFF
-
-.Lcmd0arg3:
-
- lui $v1, 0x1f
- lhu $v0, SIO_BAUD($a3)
- ori $v1, 0xa400
- div $v1, $v0
- nop
- nop
- mflo $v0
- jr $ra
- nop
-
-.Lcmd0arg4: # Serial RX read
-
- lbu $v0, SIO_TXRX($a3)
- nop
- jr $ra
- nop
-
-
-.Lcmd1:
-
- beq $a1, 1, .Lcmd1arg1
- nop
- beq $a1, 2, .Lcmd1arg2
- nop
- beq $a1, 3, .Lcmd1arg3
- nop
- beq $a1, 4, .Lcmd1arg4
- nop
- jr $ra
- nop
-
-.Lcmd1arg1:
-
- andi $v0, $a2, 0x1CFF
- sh $a2, SIO_CTRL($a3)
- jr $ra
- nop
-
-.Lcmd1arg2:
-
- sh $a2, SIO_MODE($a3)
- jr $ra
- nop
-
-.Lcmd1arg3:
-
- lui $v0, 0x1f
- ori $v0, 0xa400
- divu $v0, $a2
- bnez $a2, .Lgood_baud
- nop
- jr $ra
- nop
-
-.Lgood_baud:
-
- mflo $v0
- sh $v0, SIO_BAUD($a3)
- nop
- jr $ra
- nop
-
-.Lcmd1arg4:
-
- sb $a2, SIO_TXRX($a3)
- nop
- jr $ra
- nop
-
-.Lcmd2:
-
- beq $a1, $0 , .Lcmd2arg0
- li $v0, 1
- beq $a1, $v0, .Lcmd2arg1
- nop
- jr $ra
- nop
-
-.Lcmd2arg0:
-
- li $v0, 0x40
- sh $v0, SIO_CTRL($a3)
- sh $0 , SIO_MODE($a3)
- sh $0 , SIO_BAUD($a3)
- nop
- jr $ra
- nop
-
-.Lcmd2arg1:
-
- lhu $v0, SIO_CTRL($a3)
- nop
- ori $v0, 0x10
- sh $v0, SIO_CTRL($a3)
- jr $ra
- nop
-
- \ No newline at end of file
diff --git a/libpsn00b/psxsio/sio.c b/libpsn00b/psxsio/sio.c
new file mode 100644
index 0000000..6b80352
--- /dev/null
+++ b/libpsn00b/psxsio/sio.c
@@ -0,0 +1,269 @@
+/*
+ * PSn00bSDK buffered serial port driver
+ * (C) 2022 spicyjpeg - MPL licensed
+ *
+ * TODO: add proper support for DTR/DSR flow control
+ */
+
+#include <stdint.h>
+#include <assert.h>
+#include <psxetc.h>
+#include <psxapi.h>
+#include <psxsio.h>
+#include <hwregs_c.h>
+
+#define BUFFER_LENGTH 128
+#define SIO_SYNC_TIMEOUT 0x100000
+
+/* Private types */
+
+typedef struct {
+ uint8_t data[BUFFER_LENGTH];
+ uint8_t head, tail, length;
+} RingBuffer;
+
+/* Internal globals */
+
+static SIO_FlowControl _flow_control;
+static uint16_t _ctrl_reg_flag;
+
+static int (*_read_callback)(uint8_t) = (void *) 0;
+static void (*_old_sio_handler)(void) = (void *) 0;
+
+static volatile RingBuffer _tx_buffer, _rx_buffer;
+
+/* Private interrupt handler */
+
+#define _ENTER_CRITICAL() uint16_t mask = IRQ_MASK; IRQ_MASK = 0;
+#define _EXIT_CRITICAL() IRQ_MASK = mask;
+
+static void _sio_handler(void) {
+ // Handle any incoming bytes.
+ while (SIO_STAT & SR_RXRDY) {
+ uint8_t value = SIO_TXRX;
+
+ // Skip storing this byte into the RX buffer if the callback returns a
+ // non-zero value.
+ if (_read_callback) {
+ if (_read_callback(value))
+ continue;
+ }
+
+ int length = _rx_buffer.length;
+
+ if (length >= BUFFER_LENGTH) {
+ //_sdk_log("RX overrun, dropping bytes\n");
+ break;
+ }
+
+ int tail = _rx_buffer.tail;
+ _rx_buffer.tail = (tail + 1) % BUFFER_LENGTH;
+ _rx_buffer.length = length + 1;
+
+ _rx_buffer.data[tail] = value;
+ }
+
+ // Send the next byte in the buffer if the TX unit is ready. Note that
+ // checking for CTS is unnecessary as the serial port is already hardwired
+ // to do so.
+ if (SIO_STAT & (SR_TXRDY | SR_TXU)) {
+ int length = _tx_buffer.length;
+
+ if (length) {
+ int head = _tx_buffer.head;
+ _tx_buffer.head = (head + 1) % BUFFER_LENGTH;
+ _tx_buffer.length = length - 1;
+
+ SIO_CTRL |= CR_TXIEN;
+ SIO_TXRX = _tx_buffer.data[head];
+ } else {
+ SIO_CTRL &= CR_TXIEN ^ 0xffff;
+ }
+ }
+
+ // Acknowledge the IRQ and update flow control signals.
+ if (_rx_buffer.length < BUFFER_LENGTH)
+ SIO_CTRL = CR_INTRST | (SIO_CTRL | _ctrl_reg_flag);
+ else
+ SIO_CTRL = CR_INTRST | (SIO_CTRL & (_ctrl_reg_flag ^ 0xffff));
+}
+
+/* Serial port initialization API */
+
+void SIO_Init(int baud, uint16_t mode) {
+ EnterCriticalSection();
+ _old_sio_handler = InterruptCallback(8, &_sio_handler);
+
+ SIO_CTRL = CR_ERRRST;
+ SIO_MODE = (mode & 0xfffc) | MR_BR_16;
+ SIO_BAUD = (uint16_t) ((int) 0x1fa400 / baud);
+ SIO_CTRL = CR_TXEN | CR_RXEN | CR_RXIEN;
+
+ _tx_buffer.head = 0;
+ _tx_buffer.tail = 0;
+ _tx_buffer.length = 0;
+ _rx_buffer.head = 0;
+ _rx_buffer.tail = 0;
+ _rx_buffer.length = 0;
+
+ _flow_control = SIO_FC_NONE;
+ _ctrl_reg_flag = 0;
+
+ ExitCriticalSection();
+}
+
+void SIO_Quit(void) {
+ EnterCriticalSection();
+ InterruptCallback(8, _old_sio_handler);
+
+ SIO_CTRL = CR_ERRRST;
+
+ ExitCriticalSection();
+}
+
+void SIO_SetFlowControl(SIO_FlowControl mode) {
+ _ENTER_CRITICAL();
+
+ switch (mode) {
+ case SIO_FC_NONE:
+ _flow_control = SIO_FC_NONE;
+ _ctrl_reg_flag = 0;
+
+ SIO_CTRL &= 0xffff ^ CR_DSRIEN;
+ break;
+
+ case SIO_FC_RTS_CTS:
+ _flow_control = SIO_FC_RTS_CTS;
+ _ctrl_reg_flag = CR_RTS;
+
+ SIO_CTRL &= 0xffff ^ CR_DSRIEN;
+ break;
+
+ /*case SIO_FC_DTR_DSR:
+ _flow_control = SIO_FC_DTR_DSR;
+ _ctrl_reg_flag = CR_DTR;
+
+ SIO_CTRL |= CR_DSRIEN;
+ break;*/
+ }
+
+ _EXIT_CRITICAL();
+}
+
+/* Reading API */
+
+int SIO_ReadByte(void) {
+ /*for (int i = SIO_SYNC_TIMEOUT; i; i--) {
+ if (_rx_buffer.length)
+ return SIO_ReadByte2();
+ }*/
+ while (!_rx_buffer.length)
+ __asm__ volatile("");
+
+ return SIO_ReadByte2();
+}
+
+int SIO_ReadByte2(void) {
+ if (!_rx_buffer.length)
+ return -1;
+
+ _ENTER_CRITICAL();
+
+ int head = _rx_buffer.head;
+ _rx_buffer.head = (head + 1) % BUFFER_LENGTH;
+ _rx_buffer.length--;
+
+ _EXIT_CRITICAL();
+ return _rx_buffer.data[head];
+}
+
+int SIO_ReadSync(int mode) {
+ if (mode)
+ return _rx_buffer.length;
+
+ /*for (int i = SIO_SYNC_TIMEOUT; i; i--) {
+ if (_rx_buffer.length)
+ return 0;
+ }*/
+ while (!_rx_buffer.length)
+ __asm__ volatile("");
+
+ return 0;
+}
+
+void *SIO_ReadCallback(int (*func)(uint8_t)) {
+ EnterCriticalSection();
+
+ void *old_callback = _read_callback;
+ _read_callback = func;
+
+ ExitCriticalSection();
+}
+
+/* Writing API */
+
+int SIO_WriteByte(uint8_t value) {
+ for (int i = SIO_SYNC_TIMEOUT; i; i--) {
+ if (_tx_buffer.length < BUFFER_LENGTH)
+ return SIO_WriteByte2(value);
+ }
+
+ //_sdk_log("SIO_WriteByte() timeout\n");
+ return -1;
+}
+
+int SIO_WriteByte2(uint8_t value) {
+ // If the TX unit is currently busy, append the byte to the buffer instead
+ // of sending it immediately. Note that interrupts must be disabled *prior*
+ // to checking if TX is busy; disabling them afterwards would create a race
+ // condition where the transfer could end while interrupts are being
+ // disabled. Interrupts are disabled through the IRQ_MASK register rather
+ // than via syscalls for performance reasons.
+ _ENTER_CRITICAL();
+
+ if (SIO_STAT & (SR_TXRDY | SR_TXU)) {
+ SIO_TXRX = value;
+ _EXIT_CRITICAL();
+ return 0;
+ }
+
+ int length = _tx_buffer.length;
+
+ if (length >= BUFFER_LENGTH) {
+ _EXIT_CRITICAL();
+
+ //_sdk_log("TX overrun, dropping bytes\n");
+ return -1;
+ }
+
+ int tail = _tx_buffer.tail;
+ _tx_buffer.tail = (tail + 1) % BUFFER_LENGTH;
+ _tx_buffer.length = length + 1;
+
+ _tx_buffer.data[tail] = value;
+ SIO_CTRL |= CR_TXIEN;
+
+ _EXIT_CRITICAL();
+ return length;
+}
+
+int SIO_WriteSync(int mode) {
+ if (mode)
+ return _tx_buffer.length;
+
+ // Wait for the buffer to become empty.
+ for (int i = SIO_SYNC_TIMEOUT; i; i--) {
+ if (!_tx_buffer.length)
+ break;
+ }
+
+ if (!_tx_buffer.length) {
+ // Wait for the TX unit to finish sending the last byte.
+ while (!(SIO_STAT & (SR_TXRDY | SR_TXU)))
+ __asm__ volatile("");
+ } else {
+ //_sdk_log("SIO_WriteSync() timeout\n");
+ }
+
+ return _tx_buffer.length;
+}
diff --git a/libpsn00b/psxsio/siocons.c b/libpsn00b/psxsio/siocons.c
deleted file mode 100644
index 5937920..0000000
--- a/libpsn00b/psxsio/siocons.c
+++ /dev/null
@@ -1,220 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <ioctl.h>
-#include <psxapi.h>
-#include <psxetc.h>
-#include <psxsio.h>
-
-#define SIO_BUFF_LEN 32
-
-static volatile int _sio_key_pending;
-
-static volatile int _sio_buff_rpos;
-static volatile int _sio_buff_wpos;
-static volatile char _sio_buff[SIO_BUFF_LEN];
-
-static void _sio_init() {
-
- _sio_key_pending = 0;
-
- memset((void*)_sio_buff, 0, SIO_BUFF_LEN);
- _sio_buff_rpos = 0;
- _sio_buff_wpos = 0;
-
-}
-
-static int _sio_open(FCB *fcb, const char* file, int mode) {
-
- fcb->diskid = 1;
-
- return 0;
-
-}
-
-static int _sio_inout(FCB *fcb, int cmd) {
-
- int i;
-
- if(cmd == 2) { // Write
-
- for(i=0; i<fcb->trns_len; i++) {
- while(!(_sio_control(0, 0, 0) & SR_TXU));
- _sio_control(1, 4, ((char*)fcb->trns_addr)[i]);
- }
-
- return fcb->trns_len;
-
- } else if (cmd == 1) { // Read
-
- /*for(i=0; i<fcb->trns_len; i++) {
- while(!(_sio_control(0, 0, 0) & SR_RXRDY));
- ((char*)fcb->trns_addr)[i] = _sio_control(0, 4, 0);
- }*/
-
-
-
- for(i=0; i<fcb->trns_len; i++) {
-
- while( _sio_key_pending == 0 );
-
- ((char*)fcb->trns_addr)[i] = _sio_buff[_sio_buff_rpos];
- _sio_key_pending--;
- _sio_buff_rpos++;
- if( _sio_buff_rpos >= SIO_BUFF_LEN )
- {
- _sio_buff_rpos = 0;
- }
-
- }
-
- return fcb->trns_len;
-
- }
-
- return 0;
-
-}
-
-static int _sio_ioctl(FCB *fcb, int cmd, int arg)
-{
- if( cmd == FIOCSCAN )
- {
- if( _sio_key_pending )
- {
- return 0;
- }
- }
-
- return -1;
-}
-
-static int _sio_close(int h) {
-
- return h;
-
-}
-
-static void _sio_tty_cb(void)
-{
- _sio_key_pending++;
-
- // Get received byte
- if( _sio_key_pending < SIO_BUFF_LEN )
- {
- _sio_buff[_sio_buff_wpos] = _sio_control(0, 4, 0);
- _sio_buff_wpos++;
- if( _sio_buff_wpos >= SIO_BUFF_LEN )
- {
- _sio_buff_wpos = 0;
- }
- }
- else
- {
- _sio_control(0, 4, 0);
- }
-
- // Acknowledge SIO IRQ
- _sio_control(2, 1, 0);
-}
-
-static int _sio_dummy(void)
-{
- return -1;
-}
-
-static DCB _sio_dcb = {
- "tty",
- 0x3,
- 0x1,
- 0x0,
- (void*)_sio_init, // init
- (void*)_sio_open, // open
- (void*)_sio_inout, // inout
- _sio_close, // close
- _sio_ioctl, // ioctl
- _sio_dummy, // read
- _sio_dummy, // write
- _sio_dummy, // erase
- _sio_dummy, // undelete
- _sio_dummy, // firstfile
- _sio_dummy, // nextfile
- _sio_dummy, // format
- _sio_dummy, // chdir
- _sio_dummy, // rename
- _sio_dummy, // remove
- _sio_dummy // testdevice
-};
-
-
-volatile void (*_sio_callback)(void) = NULL;
-
-void AddSIO(int baud) {
-
- _sio_control(2, 0, 0);
- _sio_control(1, 2, MR_SB_01|MR_CHLEN_8|0x02);
- _sio_control(1, 3, baud);
- _sio_control(1, 1, CR_RXEN|CR_TXEN|CR_RXIEN);
-
- close(0);
- close(1);
-
- DelDev(_sio_dcb.name);
- AddDev(&_sio_dcb);
-
- Sio1Callback(_sio_tty_cb);
-
- open(_sio_dcb.name, 2);
- open(_sio_dcb.name, 1);
-
-}
-
-void DelSIO(void) {
-
- Sio1Callback(NULL);
-
- // Reset serial interface
- _sio_control(2, 0, 0);
-
- // Remove TTY device
- DelDev(_sio_dcb.name);
-
- // Add dummy TTY device
- AddDummyTty();
-
-}
-
-void WaitSIO(void) {
-
- while((_sio_control(0, 0, 0)&(SR_RXRDY)) != (SR_RXRDY));
- _sio_control(0, 4, NULL);
-
-}
-
-void *Sio1Callback(void (*func)()) {
-
- void *old_isr; //= *((int*)&_sio_callback);
-
- EnterCriticalSection();
-
- if( func ) {
-
- old_isr = InterruptCallback(8, func);
- //_sio_callback = func;
-
- } else {
-
- old_isr = InterruptCallback(8, NULL);
- //_sio_callback = NULL;
-
- }
-
- ExitCriticalSection();
-
- return old_isr;
-
-}
-
-int kbhit()
-{
- return(_sio_key_pending>0);
-}
diff --git a/libpsn00b/psxsio/tty.c b/libpsn00b/psxsio/tty.c
new file mode 100644
index 0000000..4dc9fd1
--- /dev/null
+++ b/libpsn00b/psxsio/tty.c
@@ -0,0 +1,107 @@
+/*
+ * PSn00bSDK serial port BIOS TTY driver
+ * (C) 2019-2022 Lameguy64, spicyjpeg - MPL licensed
+ *
+ * This driver is designed to be as simple and reliable as possible: as such it
+ * only relies on the SIO_*() API for receiving and sends data synchronously.
+ * This allows printf() to work without issues, albeit slowly, if called from a
+ * critical section or even from an interrupt handler.
+ */
+
+#include <ioctl.h>
+#include <psxapi.h>
+#include <psxsio.h>
+#include <hwregs_c.h>
+
+/* TTY device control block */
+
+static int _sio_open(FCB *fcb, const char* file, int mode) {
+ fcb->diskid = 1;
+ return 0;
+}
+
+static int _sio_inout(FCB *fcb, int cmd) {
+ char *ptr = (char*) fcb->trns_addr;
+
+ switch (cmd) {
+ case 1: // read
+ for (int i = 0; i < fcb->trns_len; i++)
+ *(ptr++) = (char) SIO_ReadByte();
+
+ return fcb->trns_len;
+
+ case 2: // write
+ for (int i = 0; i < fcb->trns_len; i++) {
+ while (!(SIO_STAT & (SR_TXRDY | SR_TXU)))
+ __asm__ volatile("");
+
+ SIO_TXRX = *(ptr++);
+ }
+
+ return fcb->trns_len;
+
+ default:
+ return 0;
+ }
+}
+
+static int _sio_close(FCB *fcb) {
+ return 0;
+}
+
+static int _sio_ioctl(FCB *fcb, int cmd, int arg) {
+ switch (cmd) {
+ case FIOCSCAN:
+ return SIO_ReadSync(1) ? 0 : -1;
+
+ default:
+ return -1;
+ }
+}
+
+static int _sio_dummy(void) {
+ return -1;
+}
+
+static DCB _sio_dcb = {
+ .name = "tty",
+ .flags = 3,
+ .ssize = 1,
+ .desc = "PSXSIO SERIAL CONSOLE",
+ .f_init = &_sio_dummy,
+ .f_open = &_sio_open,
+ .f_inout = &_sio_inout,
+ .f_close = &_sio_close,
+ .f_ioctl = &_sio_ioctl,
+ .f_read = &_sio_dummy,
+ .f_write = &_sio_dummy,
+ .f_erase = &_sio_dummy,
+ .f_undelete = &_sio_dummy,
+ .f_firstfile = &_sio_dummy,
+ .f_nextfile = &_sio_dummy,
+ .f_format = &_sio_dummy,
+ .f_chdir = &_sio_dummy,
+ .f_rename = &_sio_dummy,
+ .f_remove = &_sio_dummy,
+ .f_testdevice = &_sio_dummy
+};
+
+/* Public API */
+
+void AddSIO(int baud) {
+ SIO_Init(baud, MR_SB_01 | MR_CHLEN_8);
+
+ close(0);
+ close(1);
+ DelDev(_sio_dcb.name);
+ AddDev(&_sio_dcb);
+ open(_sio_dcb.name, 2);
+ open(_sio_dcb.name, 1);
+}
+
+void DelSIO(void) {
+ SIO_Quit();
+
+ DelDev(_sio_dcb.name);
+ AddDummyTty();
+}
diff --git a/libpsn00b/psxspu/common.c b/libpsn00b/psxspu/common.c
index 7d90858..d1dabfe 100644
--- a/libpsn00b/psxspu/common.c
+++ b/libpsn00b/psxspu/common.c
@@ -4,10 +4,12 @@
*/
#include <stdint.h>
-#include <psxetc.h>
+#include <assert.h>
#include <psxspu.h>
#include <hwregs_c.h>
+#define _min(x, y) (((x) < (y)) ? (x) : (y))
+
#define WRITABLE_AREA_ADDR 0x200
#define DMA_CHUNK_LENGTH 16
#define STATUS_TIMEOUT 0x100000
@@ -25,16 +27,16 @@ static void _wait_status(uint16_t mask, uint16_t value) {
return;
}
- _sdk_log("psxspu: status register timeout (0x%04x)\n", SPU_STAT);
+ _sdk_log("status register timeout (0x%04x)\n", SPU_STAT);
}
-static void _dma_transfer(uint32_t *data, size_t length, int write) {
+static size_t _dma_transfer(uint32_t *data, size_t length, int write) {
if (length % 4)
- _sdk_log("psxspu: can't transfer a number of bytes that isn't multiple of 4\n");
+ _sdk_log("can't transfer a number of bytes that isn't multiple of 4\n");
length /= 4;
if ((length >= DMA_CHUNK_LENGTH) && (length % DMA_CHUNK_LENGTH)) {
- _sdk_log("psxspu: transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
+ _sdk_log("transfer data length (%d) is not a multiple of %d, rounding\n", length, DMA_CHUNK_LENGTH);
length += DMA_CHUNK_LENGTH - 1;
}
@@ -42,9 +44,11 @@ static void _dma_transfer(uint32_t *data, size_t length, int write) {
_wait_status(0x0030, 0x0000);
// Enable DMA request for writing (2) or reading (3)
+ uint16_t ctrl = write ? 0x0020 : 0x0030;
+
SPU_ADDR = _transfer_addr;
- SPU_CTRL |= write ? 0x0020 : 0x0030;
- _wait_status(0x0400, 0x0000);
+ SPU_CTRL |= ctrl;
+ _wait_status(0x0030, ctrl);
DMA_MADR(4) = (uint32_t) data;
if (length < DMA_CHUNK_LENGTH)
@@ -53,6 +57,42 @@ static void _dma_transfer(uint32_t *data, size_t length, int write) {
DMA_BCR(4) = DMA_CHUNK_LENGTH | ((length / DMA_CHUNK_LENGTH) << 16);
DMA_CHCR(4) = 0x01000200 | write;
+ return length;
+}
+
+static size_t _manual_write(const uint16_t *data, size_t length) {
+ if (length % 2)
+ _sdk_log("can't transfer a number of bytes that isn't multiple of 2\n");
+
+ length /= 2;
+
+ SPU_CTRL &= 0xffcf; // Disable DMA request
+ _wait_status(0x0030, 0x0000);
+
+ // Manual transfers have to be done by filling up the SPU's transfer buffer
+ // and then letting the SPU empty it one 64-byte chunk at a time.
+ uint16_t addr = _transfer_addr;
+
+ while (length) {
+ size_t chunk = _min(DMA_CHUNK_LENGTH * 2, length);
+ length -= chunk;
+
+ SPU_ADDR = addr;
+ addr += chunk / 4;
+
+ for (; chunk; chunk--)
+ SPU_DATA = *(data++);
+
+ SPU_CTRL |= 0x0010; // Manual transfer mode
+ _wait_status(0x0030, 0x0010);
+ _wait_status(0x0400, 0x0000);
+
+ // This additional delay is required according to nocash docs.
+ for (int i = 0; i < 1000; i++)
+ __asm__ volatile("");
+ }
+
+ return length;
}
/* Public API */
@@ -65,10 +105,14 @@ void SpuInit(void) {
SPU_MASTER_VOL_R = 0;
SPU_REVERB_VOL_L = 0;
SPU_REVERB_VOL_R = 0;
- SPU_KEY_OFF = 0x00ffffff;
- SPU_FM_MODE = 0;
- SPU_NOISE_MODE = 0;
- SPU_REVERB_ON = 0;
+ SPU_KEY_OFF1 = 0xffff;
+ SPU_KEY_OFF2 = 0x00ff;
+ SPU_FM_MODE1 = 0;
+ SPU_FM_MODE2 = 0;
+ SPU_NOISE_MODE1 = 0;
+ SPU_NOISE_MODE2 = 0;
+ SPU_REVERB_ON1 = 0;
+ SPU_REVERB_ON2 = 0;
SPU_REVERB_ADDR = 0xfffe;
SPU_CD_VOL_L = 0;
SPU_CD_VOL_R = 0;
@@ -78,17 +122,16 @@ void SpuInit(void) {
DMA_DPCR |= 0x000b0000; // Enable DMA4
DMA_CHCR(4) = 0x00000201; // Stop DMA4
- SPU_CTRL = 0xc011; // Enable SPU, DAC, CD audio, set manual transfer mode
- _wait_status(0x001f, 0x0011);
+ SPU_DMA_CTRL = 0x0004; // Reset transfer mode
+ SPU_CTRL = 0xc001; // Enable SPU, DAC, CD audio, disable DMA request
+ _wait_status(0x003f, 0x0001);
- // Upload a dummy ADPCM block to the first 16 bytes of SPU RAM. This may be
- // freely used or overwritten.
- SPU_ADDR = WRITABLE_AREA_ADDR;
- _wait_status(0x0400, 0x0000);
+ // Upload a dummy looping ADPCM block to the first 16 bytes of SPU RAM.
+ // This may be freely used or overwritten.
+ uint32_t block[4] = { 0x0500, 0, 0, 0 };
- SPU_DATA = 0x0500;
- for (int i = 7; i; i--)
- SPU_DATA = 0x0000;
+ _transfer_addr = WRITABLE_AREA_ADDR;
+ _manual_write((const uint16_t *) block, 16);
// "Play" the dummy block on all channels. This will reset the start
// address and ADSR envelope status of each channel.
@@ -101,38 +144,34 @@ void SpuInit(void) {
// Sony's implementation leaves everything muted, however it makes sense to
// turn up at least the master and CD audio volume by default.
- SPU_KEY_ON = 0x00ffffff;
+ SPU_KEY_ON1 = 0xffff;
+ SPU_KEY_ON2 = 0x00ff;
SPU_MASTER_VOL_L = 0x3fff;
SPU_MASTER_VOL_R = 0x3fff;
SPU_CD_VOL_L = 0x7fff;
SPU_CD_VOL_R = 0x7fff;
}
-void SpuRead(uint32_t *data, size_t size) {
- _dma_transfer(data, size, 0);
+size_t SpuRead(uint32_t *data, size_t size) {
+ return _dma_transfer(data, size, 0) * 4;
}
-void SpuWrite(const uint32_t *data, size_t size) {
+size_t SpuWrite(const uint32_t *data, size_t size) {
if (_transfer_addr < WRITABLE_AREA_ADDR)
- return;
+ return 0;
// I/O transfer mode is not that useful, but whatever.
- if (_transfer_mode) {
- SPU_ADDR = _transfer_addr;
- SPU_CTRL = (SPU_CTRL & 0xffcf) | 0x0010; // Manual transfer mode
- _wait_status(0x0400, 0x0000);
-
- for (int i = size; i; i -= 4) {
- uint32_t value = *(data++);
-
- SPU_DATA = (uint16_t) value;
- SPU_DATA = (uint16_t) (value >> 16);
- }
+ if (_transfer_mode)
+ return _manual_write((const uint16_t *) data, size) * 2;
+ else
+ return _dma_transfer((uint32_t *) data, size, 1) * 4;
+}
- return;
- }
+size_t SpuWritePartly(const uint32_t *data, size_t size) {
+ size_t _size = SpuWrite(data, size);
- _dma_transfer((uint32_t *) data, size, 1);
+ _transfer_addr += (_size + 1) / 2;
+ return _size;
}
SPU_TransferMode SpuSetTransferMode(SPU_TransferMode mode) {